0001
0002
0003
0004
0005
0006 #include <linux/hash.h>
0007 #include <linux/bitops.h>
0008 #include <linux/lockdep.h>
0009 #include <linux/vmalloc.h>
0010 #include <linux/slab.h>
0011 #include <rdma/ib_verbs.h>
0012 #include <rdma/ib_hdrs.h>
0013 #include <rdma/opa_addr.h>
0014 #include <rdma/uverbs_ioctl.h>
0015 #include "qp.h"
0016 #include "vt.h"
0017 #include "trace.h"
0018
0019 #define RVT_RWQ_COUNT_THRESHOLD 16
0020
0021 static void rvt_rc_timeout(struct timer_list *t);
0022 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0023 enum ib_qp_type type);
0024
0025
0026
0027
0028 static const u32 ib_rvt_rnr_table[32] = {
0029 655360,
0030 10,
0031 20,
0032 30,
0033 40,
0034 60,
0035 80,
0036 120,
0037 160,
0038 240,
0039 320,
0040 480,
0041 640,
0042 960,
0043 1280,
0044 1920,
0045 2560,
0046 3840,
0047 5120,
0048 7680,
0049 10240,
0050 15360,
0051 20480,
0052 30720,
0053 40960,
0054 61440,
0055 81920,
0056 122880,
0057 163840,
0058 245760,
0059 327680,
0060 491520
0061 };
0062
0063
0064
0065
0066
0067
0068 const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
0069 [IB_QPS_RESET] = 0,
0070 [IB_QPS_INIT] = RVT_POST_RECV_OK,
0071 [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
0072 [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
0073 RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
0074 RVT_PROCESS_NEXT_SEND_OK,
0075 [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
0076 RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
0077 [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
0078 RVT_POST_SEND_OK | RVT_FLUSH_SEND,
0079 [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
0080 RVT_POST_SEND_OK | RVT_FLUSH_SEND,
0081 };
0082 EXPORT_SYMBOL(ib_rvt_state_ops);
0083
0084
0085 static int rvt_wss_llc_size(void)
0086 {
0087
0088 return boot_cpu_data.x86_cache_size;
0089 }
0090
0091
0092 static void cacheless_memcpy(void *dst, void *src, size_t n)
0093 {
0094
0095
0096
0097
0098
0099
0100 __copy_user_nocache(dst, (void __user *)src, n, 0);
0101 }
0102
0103 void rvt_wss_exit(struct rvt_dev_info *rdi)
0104 {
0105 struct rvt_wss *wss = rdi->wss;
0106
0107 if (!wss)
0108 return;
0109
0110
0111 kfree(wss->entries);
0112 wss->entries = NULL;
0113 kfree(rdi->wss);
0114 rdi->wss = NULL;
0115 }
0116
0117
0118
0119
0120
0121
0122 int rvt_wss_init(struct rvt_dev_info *rdi)
0123 {
0124 unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
0125 unsigned int wss_threshold = rdi->dparms.wss_threshold;
0126 unsigned int wss_clean_period = rdi->dparms.wss_clean_period;
0127 long llc_size;
0128 long llc_bits;
0129 long table_size;
0130 long table_bits;
0131 struct rvt_wss *wss;
0132 int node = rdi->dparms.node;
0133
0134 if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) {
0135 rdi->wss = NULL;
0136 return 0;
0137 }
0138
0139 rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node);
0140 if (!rdi->wss)
0141 return -ENOMEM;
0142 wss = rdi->wss;
0143
0144
0145 if (wss_threshold < 1 || wss_threshold > 100)
0146 wss_threshold = 80;
0147
0148
0149 if (wss_clean_period > 1000000)
0150 wss_clean_period = 256;
0151
0152
0153 if (wss_clean_period == 0)
0154 wss_clean_period = 1;
0155
0156
0157
0158
0159
0160 llc_size = rvt_wss_llc_size() * 1024;
0161 table_size = roundup_pow_of_two(llc_size);
0162
0163
0164 llc_bits = llc_size / PAGE_SIZE;
0165 table_bits = table_size / PAGE_SIZE;
0166 wss->pages_mask = table_bits - 1;
0167 wss->num_entries = table_bits / BITS_PER_LONG;
0168
0169 wss->threshold = (llc_bits * wss_threshold) / 100;
0170 if (wss->threshold == 0)
0171 wss->threshold = 1;
0172
0173 wss->clean_period = wss_clean_period;
0174 atomic_set(&wss->clean_counter, wss_clean_period);
0175
0176 wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries),
0177 GFP_KERNEL, node);
0178 if (!wss->entries) {
0179 rvt_wss_exit(rdi);
0180 return -ENOMEM;
0181 }
0182
0183 return 0;
0184 }
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201 static void wss_advance_clean_counter(struct rvt_wss *wss)
0202 {
0203 int entry;
0204 int weight;
0205 unsigned long bits;
0206
0207
0208 if (atomic_dec_and_test(&wss->clean_counter)) {
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222 atomic_set(&wss->clean_counter, wss->clean_period);
0223
0224
0225
0226
0227
0228
0229
0230 entry = (atomic_inc_return(&wss->clean_entry) - 1)
0231 & (wss->num_entries - 1);
0232
0233
0234 bits = xchg(&wss->entries[entry], 0);
0235 weight = hweight64((u64)bits);
0236
0237 if (weight)
0238 atomic_sub(weight, &wss->total_count);
0239 }
0240 }
0241
0242
0243
0244
0245 static void wss_insert(struct rvt_wss *wss, void *address)
0246 {
0247 u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask;
0248 u32 entry = page / BITS_PER_LONG;
0249 u32 nr = page & (BITS_PER_LONG - 1);
0250
0251 if (!test_and_set_bit(nr, &wss->entries[entry]))
0252 atomic_inc(&wss->total_count);
0253
0254 wss_advance_clean_counter(wss);
0255 }
0256
0257
0258
0259
0260 static inline bool wss_exceeds_threshold(struct rvt_wss *wss)
0261 {
0262 return atomic_read(&wss->total_count) >= wss->threshold;
0263 }
0264
0265 static void get_map_page(struct rvt_qpn_table *qpt,
0266 struct rvt_qpn_map *map)
0267 {
0268 unsigned long page = get_zeroed_page(GFP_KERNEL);
0269
0270
0271
0272
0273
0274 spin_lock(&qpt->lock);
0275 if (map->page)
0276 free_page(page);
0277 else
0278 map->page = (void *)page;
0279 spin_unlock(&qpt->lock);
0280 }
0281
0282
0283
0284
0285
0286
0287 static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
0288 {
0289 u32 offset, i;
0290 struct rvt_qpn_map *map;
0291 int ret = 0;
0292
0293 if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
0294 return -EINVAL;
0295
0296 spin_lock_init(&qpt->lock);
0297
0298 qpt->last = rdi->dparms.qpn_start;
0299 qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309 qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
0310
0311
0312 offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
0313
0314
0315 map = &qpt->map[qpt->nmaps];
0316
0317 rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
0318 rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
0319 for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
0320 if (!map->page) {
0321 get_map_page(qpt, map);
0322 if (!map->page) {
0323 ret = -ENOMEM;
0324 break;
0325 }
0326 }
0327 set_bit(offset, map->page);
0328 offset++;
0329 if (offset == RVT_BITS_PER_PAGE) {
0330
0331 qpt->nmaps++;
0332 map++;
0333 offset = 0;
0334 }
0335 }
0336 return ret;
0337 }
0338
0339
0340
0341
0342
0343 static void free_qpn_table(struct rvt_qpn_table *qpt)
0344 {
0345 int i;
0346
0347 for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
0348 free_page((unsigned long)qpt->map[i].page);
0349 }
0350
0351
0352
0353
0354
0355
0356
0357 int rvt_driver_qp_init(struct rvt_dev_info *rdi)
0358 {
0359 int i;
0360 int ret = -ENOMEM;
0361
0362 if (!rdi->dparms.qp_table_size)
0363 return -EINVAL;
0364
0365
0366
0367
0368
0369 if (!rdi->driver_f.free_all_qps ||
0370 !rdi->driver_f.qp_priv_alloc ||
0371 !rdi->driver_f.qp_priv_free ||
0372 !rdi->driver_f.notify_qp_reset ||
0373 !rdi->driver_f.notify_restart_rc)
0374 return -EINVAL;
0375
0376
0377 rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
0378 rdi->dparms.node);
0379 if (!rdi->qp_dev)
0380 return -ENOMEM;
0381
0382
0383 rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
0384 rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
0385 rdi->qp_dev->qp_table =
0386 kmalloc_array_node(rdi->qp_dev->qp_table_size,
0387 sizeof(*rdi->qp_dev->qp_table),
0388 GFP_KERNEL, rdi->dparms.node);
0389 if (!rdi->qp_dev->qp_table)
0390 goto no_qp_table;
0391
0392 for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
0393 RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
0394
0395 spin_lock_init(&rdi->qp_dev->qpt_lock);
0396
0397
0398 if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
0399 goto fail_table;
0400
0401 spin_lock_init(&rdi->n_qps_lock);
0402
0403 return 0;
0404
0405 fail_table:
0406 kfree(rdi->qp_dev->qp_table);
0407 free_qpn_table(&rdi->qp_dev->qpn_table);
0408
0409 no_qp_table:
0410 kfree(rdi->qp_dev);
0411
0412 return ret;
0413 }
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423 static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v)
0424 {
0425 unsigned int *qp_inuse = (unsigned int *)v;
0426 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
0427
0428
0429 rvt_reset_qp(rdi, qp, qp->ibqp.qp_type);
0430
0431
0432 (*qp_inuse)++;
0433 }
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443 static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
0444 {
0445 unsigned int qp_inuse = 0;
0446
0447 qp_inuse += rvt_mcast_tree_empty(rdi);
0448
0449 rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb);
0450
0451 return qp_inuse;
0452 }
0453
0454
0455
0456
0457
0458
0459
0460 void rvt_qp_exit(struct rvt_dev_info *rdi)
0461 {
0462 u32 qps_inuse = rvt_free_all_qps(rdi);
0463
0464 if (qps_inuse)
0465 rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
0466 qps_inuse);
0467 if (!rdi->qp_dev)
0468 return;
0469
0470 kfree(rdi->qp_dev->qp_table);
0471 free_qpn_table(&rdi->qp_dev->qpn_table);
0472 kfree(rdi->qp_dev);
0473 }
0474
0475 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
0476 struct rvt_qpn_map *map, unsigned off)
0477 {
0478 return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
0479 }
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492 static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
0493 enum ib_qp_type type, u8 port_num, u8 exclude_prefix)
0494 {
0495 u32 i, offset, max_scan, qpn;
0496 struct rvt_qpn_map *map;
0497 u32 ret;
0498 u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ?
0499 RVT_AIP_QPN_MAX : RVT_QPN_MAX;
0500
0501 if (rdi->driver_f.alloc_qpn)
0502 return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
0503
0504 if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
0505 unsigned n;
0506
0507 ret = type == IB_QPT_GSI;
0508 n = 1 << (ret + 2 * (port_num - 1));
0509 spin_lock(&qpt->lock);
0510 if (qpt->flags & n)
0511 ret = -EINVAL;
0512 else
0513 qpt->flags |= n;
0514 spin_unlock(&qpt->lock);
0515 goto bail;
0516 }
0517
0518 qpn = qpt->last + qpt->incr;
0519 if (qpn >= max_qpn)
0520 qpn = qpt->incr | ((qpt->last & 1) ^ 1);
0521
0522 offset = qpn & RVT_BITS_PER_PAGE_MASK;
0523 map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
0524 max_scan = qpt->nmaps - !offset;
0525 for (i = 0;;) {
0526 if (unlikely(!map->page)) {
0527 get_map_page(qpt, map);
0528 if (unlikely(!map->page))
0529 break;
0530 }
0531 do {
0532 if (!test_and_set_bit(offset, map->page)) {
0533 qpt->last = qpn;
0534 ret = qpn;
0535 goto bail;
0536 }
0537 offset += qpt->incr;
0538
0539
0540
0541
0542 qpn = mk_qpn(qpt, map, offset);
0543 } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
0544
0545
0546
0547
0548
0549 if (++i > max_scan) {
0550 if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
0551 break;
0552 map = &qpt->map[qpt->nmaps++];
0553
0554 offset = qpt->incr | (offset & 1);
0555 } else if (map < &qpt->map[qpt->nmaps]) {
0556 ++map;
0557
0558 offset = qpt->incr | (offset & 1);
0559 } else {
0560 map = &qpt->map[0];
0561
0562 offset = qpt->incr | ((offset & 1) ^ 1);
0563 }
0564
0565 WARN_ON(rdi->dparms.qos_shift > 1 &&
0566 offset & ((BIT(rdi->dparms.qos_shift - 1) - 1) << 1));
0567 qpn = mk_qpn(qpt, map, offset);
0568 }
0569
0570 ret = -ENOMEM;
0571
0572 bail:
0573 return ret;
0574 }
0575
0576
0577
0578
0579
0580
0581 static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
0582 {
0583 unsigned n;
0584 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
0585
0586 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
0587 rvt_put_ss(&qp->s_rdma_read_sge);
0588
0589 rvt_put_ss(&qp->r_sge);
0590
0591 if (clr_sends) {
0592 while (qp->s_last != qp->s_head) {
0593 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
0594
0595 rvt_put_qp_swqe(qp, wqe);
0596 if (++qp->s_last >= qp->s_size)
0597 qp->s_last = 0;
0598 smp_wmb();
0599 }
0600 if (qp->s_rdma_mr) {
0601 rvt_put_mr(qp->s_rdma_mr);
0602 qp->s_rdma_mr = NULL;
0603 }
0604 }
0605
0606 for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
0607 struct rvt_ack_entry *e = &qp->s_ack_queue[n];
0608
0609 if (e->rdma_sge.mr) {
0610 rvt_put_mr(e->rdma_sge.mr);
0611 e->rdma_sge.mr = NULL;
0612 }
0613 }
0614 }
0615
0616
0617
0618
0619
0620
0621
0622
0623 static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
0624 {
0625 int i;
0626
0627 for (i = 0; i < wqe->wr.num_sge; i++) {
0628 struct rvt_sge *sge = &wqe->sg_list[i];
0629
0630 if (rvt_mr_has_lkey(sge->mr, lkey))
0631 return true;
0632 }
0633 return false;
0634 }
0635
0636
0637
0638
0639
0640
0641 static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
0642 {
0643 u32 s_last = qp->s_last;
0644
0645 while (s_last != qp->s_head) {
0646 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
0647
0648 if (rvt_swqe_has_lkey(wqe, lkey))
0649 return true;
0650
0651 if (++s_last >= qp->s_size)
0652 s_last = 0;
0653 }
0654 if (qp->s_rdma_mr)
0655 if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
0656 return true;
0657 return false;
0658 }
0659
0660
0661
0662
0663
0664
0665 static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
0666 {
0667 int i;
0668 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
0669
0670 for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
0671 struct rvt_ack_entry *e = &qp->s_ack_queue[i];
0672
0673 if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
0674 return true;
0675 }
0676 return false;
0677 }
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690 void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
0691 {
0692 bool lastwqe = false;
0693
0694 if (qp->ibqp.qp_type == IB_QPT_SMI ||
0695 qp->ibqp.qp_type == IB_QPT_GSI)
0696
0697 return;
0698 spin_lock_irq(&qp->r_lock);
0699 spin_lock(&qp->s_hlock);
0700 spin_lock(&qp->s_lock);
0701
0702 if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
0703 goto check_lwqe;
0704
0705 if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
0706 rvt_qp_sends_has_lkey(qp, lkey) ||
0707 rvt_qp_acks_has_lkey(qp, lkey))
0708 lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
0709 check_lwqe:
0710 spin_unlock(&qp->s_lock);
0711 spin_unlock(&qp->s_hlock);
0712 spin_unlock_irq(&qp->r_lock);
0713 if (lastwqe) {
0714 struct ib_event ev;
0715
0716 ev.device = qp->ibqp.device;
0717 ev.element.qp = &qp->ibqp;
0718 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
0719 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
0720 }
0721 }
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731 static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
0732 {
0733 struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
0734 u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
0735 unsigned long flags;
0736 int removed = 1;
0737
0738 spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
0739
0740 if (rcu_dereference_protected(rvp->qp[0],
0741 lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
0742 RCU_INIT_POINTER(rvp->qp[0], NULL);
0743 } else if (rcu_dereference_protected(rvp->qp[1],
0744 lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
0745 RCU_INIT_POINTER(rvp->qp[1], NULL);
0746 } else {
0747 struct rvt_qp *q;
0748 struct rvt_qp __rcu **qpp;
0749
0750 removed = 0;
0751 qpp = &rdi->qp_dev->qp_table[n];
0752 for (; (q = rcu_dereference_protected(*qpp,
0753 lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
0754 qpp = &q->next) {
0755 if (q == qp) {
0756 RCU_INIT_POINTER(*qpp,
0757 rcu_dereference_protected(qp->next,
0758 lockdep_is_held(&rdi->qp_dev->qpt_lock)));
0759 removed = 1;
0760 trace_rvt_qpremove(qp, n);
0761 break;
0762 }
0763 }
0764 }
0765
0766 spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
0767 if (removed) {
0768 synchronize_rcu();
0769 rvt_put_qp(qp);
0770 }
0771 }
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785 int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node,
0786 struct ib_udata *udata)
0787 {
0788 if (udata) {
0789 rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size);
0790 if (!rq->wq)
0791 goto bail;
0792
0793 rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node);
0794 if (!rq->kwq)
0795 goto bail;
0796 rq->kwq->curr_wq = rq->wq->wq;
0797 } else {
0798
0799 rq->kwq =
0800 vzalloc_node(sizeof(struct rvt_krwq) + size, node);
0801 if (!rq->kwq)
0802 goto bail;
0803 rq->kwq->curr_wq = rq->kwq->wq;
0804 }
0805
0806 spin_lock_init(&rq->kwq->p_lock);
0807 spin_lock_init(&rq->kwq->c_lock);
0808 return 0;
0809 bail:
0810 rvt_free_rq(rq);
0811 return -ENOMEM;
0812 }
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825 static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0826 enum ib_qp_type type)
0827 {
0828 qp->remote_qpn = 0;
0829 qp->qkey = 0;
0830 qp->qp_access_flags = 0;
0831 qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
0832 qp->s_hdrwords = 0;
0833 qp->s_wqe = NULL;
0834 qp->s_draining = 0;
0835 qp->s_next_psn = 0;
0836 qp->s_last_psn = 0;
0837 qp->s_sending_psn = 0;
0838 qp->s_sending_hpsn = 0;
0839 qp->s_psn = 0;
0840 qp->r_psn = 0;
0841 qp->r_msn = 0;
0842 if (type == IB_QPT_RC) {
0843 qp->s_state = IB_OPCODE_RC_SEND_LAST;
0844 qp->r_state = IB_OPCODE_RC_SEND_LAST;
0845 } else {
0846 qp->s_state = IB_OPCODE_UC_SEND_LAST;
0847 qp->r_state = IB_OPCODE_UC_SEND_LAST;
0848 }
0849 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
0850 qp->r_nak_state = 0;
0851 qp->r_aflags = 0;
0852 qp->r_flags = 0;
0853 qp->s_head = 0;
0854 qp->s_tail = 0;
0855 qp->s_cur = 0;
0856 qp->s_acked = 0;
0857 qp->s_last = 0;
0858 qp->s_ssn = 1;
0859 qp->s_lsn = 0;
0860 qp->s_mig_state = IB_MIG_MIGRATED;
0861 qp->r_head_ack_queue = 0;
0862 qp->s_tail_ack_queue = 0;
0863 qp->s_acked_ack_queue = 0;
0864 qp->s_num_rd_atomic = 0;
0865 qp->r_sge.num_sge = 0;
0866 atomic_set(&qp->s_reserved_used, 0);
0867 }
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877 static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0878 enum ib_qp_type type)
0879 __must_hold(&qp->s_lock)
0880 __must_hold(&qp->s_hlock)
0881 __must_hold(&qp->r_lock)
0882 {
0883 lockdep_assert_held(&qp->r_lock);
0884 lockdep_assert_held(&qp->s_hlock);
0885 lockdep_assert_held(&qp->s_lock);
0886 if (qp->state != IB_QPS_RESET) {
0887 qp->state = IB_QPS_RESET;
0888
0889
0890 rdi->driver_f.flush_qp_waiters(qp);
0891 rvt_stop_rc_timers(qp);
0892 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
0893 spin_unlock(&qp->s_lock);
0894 spin_unlock(&qp->s_hlock);
0895 spin_unlock_irq(&qp->r_lock);
0896
0897
0898 rdi->driver_f.stop_send_queue(qp);
0899 rvt_del_timers_sync(qp);
0900
0901 rdi->driver_f.quiesce_qp(qp);
0902
0903
0904 rvt_remove_qp(rdi, qp);
0905
0906
0907 spin_lock_irq(&qp->r_lock);
0908 spin_lock(&qp->s_hlock);
0909 spin_lock(&qp->s_lock);
0910
0911 rvt_clear_mr_refs(qp, 1);
0912
0913
0914
0915
0916 rdi->driver_f.notify_qp_reset(qp);
0917 }
0918 rvt_init_qp(rdi, qp, type);
0919 lockdep_assert_held(&qp->r_lock);
0920 lockdep_assert_held(&qp->s_hlock);
0921 lockdep_assert_held(&qp->s_lock);
0922 }
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0934 enum ib_qp_type type)
0935 {
0936 spin_lock_irq(&qp->r_lock);
0937 spin_lock(&qp->s_hlock);
0938 spin_lock(&qp->s_lock);
0939 _rvt_reset_qp(rdi, qp, type);
0940 spin_unlock(&qp->s_lock);
0941 spin_unlock(&qp->s_hlock);
0942 spin_unlock_irq(&qp->r_lock);
0943 }
0944
0945
0946
0947
0948
0949
0950 static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
0951 {
0952 struct rvt_qpn_map *map;
0953
0954 if ((qpn & RVT_AIP_QP_PREFIX_MASK) == RVT_AIP_QP_BASE)
0955 qpn &= RVT_AIP_QP_SUFFIX;
0956
0957 map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
0958 if (map->page)
0959 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
0960 }
0961
0962
0963
0964
0965
0966 static u8 get_allowed_ops(enum ib_qp_type type)
0967 {
0968 return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ?
0969 IB_OPCODE_UC : IB_OPCODE_UD;
0970 }
0971
0972
0973
0974
0975
0976
0977
0978
0979 static void free_ud_wq_attr(struct rvt_qp *qp)
0980 {
0981 struct rvt_swqe *wqe;
0982 int i;
0983
0984 for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
0985 wqe = rvt_get_swqe_ptr(qp, i);
0986 kfree(wqe->ud_wr.attr);
0987 wqe->ud_wr.attr = NULL;
0988 }
0989 }
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999 static int alloc_ud_wq_attr(struct rvt_qp *qp, int node)
1000 {
1001 struct rvt_swqe *wqe;
1002 int i;
1003
1004 for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
1005 wqe = rvt_get_swqe_ptr(qp, i);
1006 wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr),
1007 GFP_KERNEL, node);
1008 if (!wqe->ud_wr.attr) {
1009 free_ud_wq_attr(qp);
1010 return -ENOMEM;
1011 }
1012 }
1013
1014 return 0;
1015 }
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031 int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
1032 struct ib_udata *udata)
1033 {
1034 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1035 int ret = -ENOMEM;
1036 struct rvt_swqe *swq = NULL;
1037 size_t sz;
1038 size_t sg_list_sz = 0;
1039 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1040 void *priv = NULL;
1041 size_t sqsize;
1042 u8 exclude_prefix = 0;
1043
1044 if (!rdi)
1045 return -EINVAL;
1046
1047 if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE)
1048 return -EOPNOTSUPP;
1049
1050 if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
1051 init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr)
1052 return -EINVAL;
1053
1054
1055 if (!init_attr->srq) {
1056 if (init_attr->cap.max_recv_sge >
1057 rdi->dparms.props.max_recv_sge ||
1058 init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
1059 return -EINVAL;
1060
1061 if (init_attr->cap.max_send_sge +
1062 init_attr->cap.max_send_wr +
1063 init_attr->cap.max_recv_sge +
1064 init_attr->cap.max_recv_wr == 0)
1065 return -EINVAL;
1066 }
1067 sqsize =
1068 init_attr->cap.max_send_wr + 1 +
1069 rdi->dparms.reserved_operations;
1070 switch (init_attr->qp_type) {
1071 case IB_QPT_SMI:
1072 case IB_QPT_GSI:
1073 if (init_attr->port_num == 0 ||
1074 init_attr->port_num > ibqp->device->phys_port_cnt)
1075 return -EINVAL;
1076 fallthrough;
1077 case IB_QPT_UC:
1078 case IB_QPT_RC:
1079 case IB_QPT_UD:
1080 sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge);
1081 swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node);
1082 if (!swq)
1083 return -ENOMEM;
1084
1085 if (init_attr->srq) {
1086 struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
1087
1088 if (srq->rq.max_sge > 1)
1089 sg_list_sz = sizeof(*qp->r_sg_list) *
1090 (srq->rq.max_sge - 1);
1091 } else if (init_attr->cap.max_recv_sge > 1)
1092 sg_list_sz = sizeof(*qp->r_sg_list) *
1093 (init_attr->cap.max_recv_sge - 1);
1094 qp->r_sg_list =
1095 kzalloc_node(sg_list_sz, GFP_KERNEL, rdi->dparms.node);
1096 if (!qp->r_sg_list)
1097 goto bail_qp;
1098 qp->allowed_ops = get_allowed_ops(init_attr->qp_type);
1099
1100 RCU_INIT_POINTER(qp->next, NULL);
1101 if (init_attr->qp_type == IB_QPT_RC) {
1102 qp->s_ack_queue =
1103 kcalloc_node(rvt_max_atomic(rdi),
1104 sizeof(*qp->s_ack_queue),
1105 GFP_KERNEL,
1106 rdi->dparms.node);
1107 if (!qp->s_ack_queue)
1108 goto bail_qp;
1109 }
1110
1111 timer_setup(&qp->s_timer, rvt_rc_timeout, 0);
1112 hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
1113 HRTIMER_MODE_REL);
1114 qp->s_rnr_timer.function = rvt_rc_rnr_retry;
1115
1116
1117
1118
1119
1120 priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
1121 if (IS_ERR(priv)) {
1122 ret = PTR_ERR(priv);
1123 goto bail_qp;
1124 }
1125 qp->priv = priv;
1126 qp->timeout_jiffies =
1127 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1128 1000UL);
1129 if (init_attr->srq) {
1130 sz = 0;
1131 } else {
1132 qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
1133 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
1134 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
1135 sizeof(struct rvt_rwqe);
1136 ret = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz,
1137 rdi->dparms.node, udata);
1138 if (ret)
1139 goto bail_driver_priv;
1140 }
1141
1142
1143
1144
1145
1146 spin_lock_init(&qp->r_lock);
1147 spin_lock_init(&qp->s_hlock);
1148 spin_lock_init(&qp->s_lock);
1149 atomic_set(&qp->refcount, 0);
1150 atomic_set(&qp->local_ops_pending, 0);
1151 init_waitqueue_head(&qp->wait);
1152 INIT_LIST_HEAD(&qp->rspwait);
1153 qp->state = IB_QPS_RESET;
1154 qp->s_wq = swq;
1155 qp->s_size = sqsize;
1156 qp->s_avail = init_attr->cap.max_send_wr;
1157 qp->s_max_sge = init_attr->cap.max_send_sge;
1158 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
1159 qp->s_flags = RVT_S_SIGNAL_REQ_WR;
1160 ret = alloc_ud_wq_attr(qp, rdi->dparms.node);
1161 if (ret)
1162 goto bail_rq_rvt;
1163
1164 if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE)
1165 exclude_prefix = RVT_AIP_QP_PREFIX;
1166
1167 ret = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
1168 init_attr->qp_type,
1169 init_attr->port_num,
1170 exclude_prefix);
1171 if (ret < 0)
1172 goto bail_rq_wq;
1173
1174 qp->ibqp.qp_num = ret;
1175 if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE)
1176 qp->ibqp.qp_num |= RVT_AIP_QP_BASE;
1177 qp->port_num = init_attr->port_num;
1178 rvt_init_qp(rdi, qp, init_attr->qp_type);
1179 if (rdi->driver_f.qp_priv_init) {
1180 ret = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
1181 if (ret)
1182 goto bail_rq_wq;
1183 }
1184 break;
1185
1186 default:
1187
1188 return -EOPNOTSUPP;
1189 }
1190
1191 init_attr->cap.max_inline_data = 0;
1192
1193
1194
1195
1196
1197 if (udata && udata->outlen >= sizeof(__u64)) {
1198 if (!qp->r_rq.wq) {
1199 __u64 offset = 0;
1200
1201 ret = ib_copy_to_udata(udata, &offset,
1202 sizeof(offset));
1203 if (ret)
1204 goto bail_qpn;
1205 } else {
1206 u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
1207
1208 qp->ip = rvt_create_mmap_info(rdi, s, udata,
1209 qp->r_rq.wq);
1210 if (IS_ERR(qp->ip)) {
1211 ret = PTR_ERR(qp->ip);
1212 goto bail_qpn;
1213 }
1214
1215 ret = ib_copy_to_udata(udata, &qp->ip->offset,
1216 sizeof(qp->ip->offset));
1217 if (ret)
1218 goto bail_ip;
1219 }
1220 qp->pid = current->pid;
1221 }
1222
1223 spin_lock(&rdi->n_qps_lock);
1224 if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
1225 spin_unlock(&rdi->n_qps_lock);
1226 ret = -ENOMEM;
1227 goto bail_ip;
1228 }
1229
1230 rdi->n_qps_allocated++;
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240 if (init_attr->qp_type == IB_QPT_RC) {
1241 rdi->n_rc_qps++;
1242 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1243 }
1244 spin_unlock(&rdi->n_qps_lock);
1245
1246 if (qp->ip) {
1247 spin_lock_irq(&rdi->pending_lock);
1248 list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
1249 spin_unlock_irq(&rdi->pending_lock);
1250 }
1251
1252 return 0;
1253
1254 bail_ip:
1255 if (qp->ip)
1256 kref_put(&qp->ip->ref, rvt_release_mmap_info);
1257
1258 bail_qpn:
1259 rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1260
1261 bail_rq_wq:
1262 free_ud_wq_attr(qp);
1263
1264 bail_rq_rvt:
1265 rvt_free_rq(&qp->r_rq);
1266
1267 bail_driver_priv:
1268 rdi->driver_f.qp_priv_free(rdi, qp);
1269
1270 bail_qp:
1271 kfree(qp->s_ack_queue);
1272 kfree(qp->r_sg_list);
1273 vfree(swq);
1274 return ret;
1275 }
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
1289 {
1290 struct ib_wc wc;
1291 int ret = 0;
1292 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1293
1294 lockdep_assert_held(&qp->r_lock);
1295 lockdep_assert_held(&qp->s_lock);
1296 if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
1297 goto bail;
1298
1299 qp->state = IB_QPS_ERR;
1300
1301 if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1302 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1303 del_timer(&qp->s_timer);
1304 }
1305
1306 if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
1307 qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
1308
1309 rdi->driver_f.notify_error_qp(qp);
1310
1311
1312 if (READ_ONCE(qp->s_last) != qp->s_head)
1313 rdi->driver_f.schedule_send(qp);
1314
1315 rvt_clear_mr_refs(qp, 0);
1316
1317 memset(&wc, 0, sizeof(wc));
1318 wc.qp = &qp->ibqp;
1319 wc.opcode = IB_WC_RECV;
1320
1321 if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
1322 wc.wr_id = qp->r_wr_id;
1323 wc.status = err;
1324 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1325 }
1326 wc.status = IB_WC_WR_FLUSH_ERR;
1327
1328 if (qp->r_rq.kwq) {
1329 u32 head;
1330 u32 tail;
1331 struct rvt_rwq *wq = NULL;
1332 struct rvt_krwq *kwq = NULL;
1333
1334 spin_lock(&qp->r_rq.kwq->c_lock);
1335
1336 if (qp->ip) {
1337 wq = qp->r_rq.wq;
1338 head = RDMA_READ_UAPI_ATOMIC(wq->head);
1339 tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
1340 } else {
1341 kwq = qp->r_rq.kwq;
1342 head = kwq->head;
1343 tail = kwq->tail;
1344 }
1345
1346 if (head >= qp->r_rq.size)
1347 head = 0;
1348 if (tail >= qp->r_rq.size)
1349 tail = 0;
1350 while (tail != head) {
1351 wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
1352 if (++tail >= qp->r_rq.size)
1353 tail = 0;
1354 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1355 }
1356 if (qp->ip)
1357 RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
1358 else
1359 kwq->tail = tail;
1360 spin_unlock(&qp->r_rq.kwq->c_lock);
1361 } else if (qp->ibqp.event_handler) {
1362 ret = 1;
1363 }
1364
1365 bail:
1366 return ret;
1367 }
1368 EXPORT_SYMBOL(rvt_error_qp);
1369
1370
1371
1372
1373
1374 static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1375 {
1376 struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1377 unsigned long flags;
1378
1379 rvt_get_qp(qp);
1380 spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1381
1382 if (qp->ibqp.qp_num <= 1) {
1383 rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1384 } else {
1385 u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1386
1387 qp->next = rdi->qp_dev->qp_table[n];
1388 rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1389 trace_rvt_qpinsert(qp, n);
1390 }
1391
1392 spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1393 }
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404 int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1405 int attr_mask, struct ib_udata *udata)
1406 {
1407 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1408 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1409 enum ib_qp_state cur_state, new_state;
1410 struct ib_event ev;
1411 int lastwqe = 0;
1412 int mig = 0;
1413 int pmtu = 0;
1414 int opa_ah;
1415
1416 if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1417 return -EOPNOTSUPP;
1418
1419 spin_lock_irq(&qp->r_lock);
1420 spin_lock(&qp->s_hlock);
1421 spin_lock(&qp->s_lock);
1422
1423 cur_state = attr_mask & IB_QP_CUR_STATE ?
1424 attr->cur_qp_state : qp->state;
1425 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1426 opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
1427
1428 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1429 attr_mask))
1430 goto inval;
1431
1432 if (rdi->driver_f.check_modify_qp &&
1433 rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1434 goto inval;
1435
1436 if (attr_mask & IB_QP_AV) {
1437 if (opa_ah) {
1438 if (rdma_ah_get_dlid(&attr->ah_attr) >=
1439 opa_get_mcast_base(OPA_MCAST_NR))
1440 goto inval;
1441 } else {
1442 if (rdma_ah_get_dlid(&attr->ah_attr) >=
1443 be16_to_cpu(IB_MULTICAST_LID_BASE))
1444 goto inval;
1445 }
1446
1447 if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1448 goto inval;
1449 }
1450
1451 if (attr_mask & IB_QP_ALT_PATH) {
1452 if (opa_ah) {
1453 if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1454 opa_get_mcast_base(OPA_MCAST_NR))
1455 goto inval;
1456 } else {
1457 if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1458 be16_to_cpu(IB_MULTICAST_LID_BASE))
1459 goto inval;
1460 }
1461
1462 if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1463 goto inval;
1464 if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1465 goto inval;
1466 }
1467
1468 if (attr_mask & IB_QP_PKEY_INDEX)
1469 if (attr->pkey_index >= rvt_get_npkeys(rdi))
1470 goto inval;
1471
1472 if (attr_mask & IB_QP_MIN_RNR_TIMER)
1473 if (attr->min_rnr_timer > 31)
1474 goto inval;
1475
1476 if (attr_mask & IB_QP_PORT)
1477 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1478 qp->ibqp.qp_type == IB_QPT_GSI ||
1479 attr->port_num == 0 ||
1480 attr->port_num > ibqp->device->phys_port_cnt)
1481 goto inval;
1482
1483 if (attr_mask & IB_QP_DEST_QPN)
1484 if (attr->dest_qp_num > RVT_QPN_MASK)
1485 goto inval;
1486
1487 if (attr_mask & IB_QP_RETRY_CNT)
1488 if (attr->retry_cnt > 7)
1489 goto inval;
1490
1491 if (attr_mask & IB_QP_RNR_RETRY)
1492 if (attr->rnr_retry > 7)
1493 goto inval;
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 if (attr_mask & IB_QP_PATH_MTU) {
1504 pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1505 if (pmtu < 0)
1506 goto inval;
1507 }
1508
1509 if (attr_mask & IB_QP_PATH_MIG_STATE) {
1510 if (attr->path_mig_state == IB_MIG_REARM) {
1511 if (qp->s_mig_state == IB_MIG_ARMED)
1512 goto inval;
1513 if (new_state != IB_QPS_RTS)
1514 goto inval;
1515 } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1516 if (qp->s_mig_state == IB_MIG_REARM)
1517 goto inval;
1518 if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1519 goto inval;
1520 if (qp->s_mig_state == IB_MIG_ARMED)
1521 mig = 1;
1522 } else {
1523 goto inval;
1524 }
1525 }
1526
1527 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1528 if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1529 goto inval;
1530
1531 switch (new_state) {
1532 case IB_QPS_RESET:
1533 if (qp->state != IB_QPS_RESET)
1534 _rvt_reset_qp(rdi, qp, ibqp->qp_type);
1535 break;
1536
1537 case IB_QPS_RTR:
1538
1539 qp->r_flags &= ~RVT_R_COMM_EST;
1540 qp->state = new_state;
1541 break;
1542
1543 case IB_QPS_SQD:
1544 qp->s_draining = qp->s_last != qp->s_cur;
1545 qp->state = new_state;
1546 break;
1547
1548 case IB_QPS_SQE:
1549 if (qp->ibqp.qp_type == IB_QPT_RC)
1550 goto inval;
1551 qp->state = new_state;
1552 break;
1553
1554 case IB_QPS_ERR:
1555 lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1556 break;
1557
1558 default:
1559 qp->state = new_state;
1560 break;
1561 }
1562
1563 if (attr_mask & IB_QP_PKEY_INDEX)
1564 qp->s_pkey_index = attr->pkey_index;
1565
1566 if (attr_mask & IB_QP_PORT)
1567 qp->port_num = attr->port_num;
1568
1569 if (attr_mask & IB_QP_DEST_QPN)
1570 qp->remote_qpn = attr->dest_qp_num;
1571
1572 if (attr_mask & IB_QP_SQ_PSN) {
1573 qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1574 qp->s_psn = qp->s_next_psn;
1575 qp->s_sending_psn = qp->s_next_psn;
1576 qp->s_last_psn = qp->s_next_psn - 1;
1577 qp->s_sending_hpsn = qp->s_last_psn;
1578 }
1579
1580 if (attr_mask & IB_QP_RQ_PSN)
1581 qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1582
1583 if (attr_mask & IB_QP_ACCESS_FLAGS)
1584 qp->qp_access_flags = attr->qp_access_flags;
1585
1586 if (attr_mask & IB_QP_AV) {
1587 rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
1588 qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1589 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1590 }
1591
1592 if (attr_mask & IB_QP_ALT_PATH) {
1593 rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
1594 qp->s_alt_pkey_index = attr->alt_pkey_index;
1595 }
1596
1597 if (attr_mask & IB_QP_PATH_MIG_STATE) {
1598 qp->s_mig_state = attr->path_mig_state;
1599 if (mig) {
1600 qp->remote_ah_attr = qp->alt_ah_attr;
1601 qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
1602 qp->s_pkey_index = qp->s_alt_pkey_index;
1603 }
1604 }
1605
1606 if (attr_mask & IB_QP_PATH_MTU) {
1607 qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1608 qp->log_pmtu = ilog2(qp->pmtu);
1609 }
1610
1611 if (attr_mask & IB_QP_RETRY_CNT) {
1612 qp->s_retry_cnt = attr->retry_cnt;
1613 qp->s_retry = attr->retry_cnt;
1614 }
1615
1616 if (attr_mask & IB_QP_RNR_RETRY) {
1617 qp->s_rnr_retry_cnt = attr->rnr_retry;
1618 qp->s_rnr_retry = attr->rnr_retry;
1619 }
1620
1621 if (attr_mask & IB_QP_MIN_RNR_TIMER)
1622 qp->r_min_rnr_timer = attr->min_rnr_timer;
1623
1624 if (attr_mask & IB_QP_TIMEOUT) {
1625 qp->timeout = attr->timeout;
1626 qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
1627 }
1628
1629 if (attr_mask & IB_QP_QKEY)
1630 qp->qkey = attr->qkey;
1631
1632 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1633 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1634
1635 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1636 qp->s_max_rd_atomic = attr->max_rd_atomic;
1637
1638 if (rdi->driver_f.modify_qp)
1639 rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1640
1641 spin_unlock(&qp->s_lock);
1642 spin_unlock(&qp->s_hlock);
1643 spin_unlock_irq(&qp->r_lock);
1644
1645 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1646 rvt_insert_qp(rdi, qp);
1647
1648 if (lastwqe) {
1649 ev.device = qp->ibqp.device;
1650 ev.element.qp = &qp->ibqp;
1651 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1652 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1653 }
1654 if (mig) {
1655 ev.device = qp->ibqp.device;
1656 ev.element.qp = &qp->ibqp;
1657 ev.event = IB_EVENT_PATH_MIG;
1658 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1659 }
1660 return 0;
1661
1662 inval:
1663 spin_unlock(&qp->s_lock);
1664 spin_unlock(&qp->s_hlock);
1665 spin_unlock_irq(&qp->r_lock);
1666 return -EINVAL;
1667 }
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679 int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1680 {
1681 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1682 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1683
1684 rvt_reset_qp(rdi, qp, ibqp->qp_type);
1685
1686 wait_event(qp->wait, !atomic_read(&qp->refcount));
1687
1688 rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1689
1690 spin_lock(&rdi->n_qps_lock);
1691 rdi->n_qps_allocated--;
1692 if (qp->ibqp.qp_type == IB_QPT_RC) {
1693 rdi->n_rc_qps--;
1694 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1695 }
1696 spin_unlock(&rdi->n_qps_lock);
1697
1698 if (qp->ip)
1699 kref_put(&qp->ip->ref, rvt_release_mmap_info);
1700 kvfree(qp->r_rq.kwq);
1701 rdi->driver_f.qp_priv_free(rdi, qp);
1702 kfree(qp->s_ack_queue);
1703 kfree(qp->r_sg_list);
1704 rdma_destroy_ah_attr(&qp->remote_ah_attr);
1705 rdma_destroy_ah_attr(&qp->alt_ah_attr);
1706 free_ud_wq_attr(qp);
1707 vfree(qp->s_wq);
1708 return 0;
1709 }
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1721 int attr_mask, struct ib_qp_init_attr *init_attr)
1722 {
1723 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1724 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1725
1726 attr->qp_state = qp->state;
1727 attr->cur_qp_state = attr->qp_state;
1728 attr->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1729 attr->path_mig_state = qp->s_mig_state;
1730 attr->qkey = qp->qkey;
1731 attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1732 attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1733 attr->dest_qp_num = qp->remote_qpn;
1734 attr->qp_access_flags = qp->qp_access_flags;
1735 attr->cap.max_send_wr = qp->s_size - 1 -
1736 rdi->dparms.reserved_operations;
1737 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1738 attr->cap.max_send_sge = qp->s_max_sge;
1739 attr->cap.max_recv_sge = qp->r_rq.max_sge;
1740 attr->cap.max_inline_data = 0;
1741 attr->ah_attr = qp->remote_ah_attr;
1742 attr->alt_ah_attr = qp->alt_ah_attr;
1743 attr->pkey_index = qp->s_pkey_index;
1744 attr->alt_pkey_index = qp->s_alt_pkey_index;
1745 attr->en_sqd_async_notify = 0;
1746 attr->sq_draining = qp->s_draining;
1747 attr->max_rd_atomic = qp->s_max_rd_atomic;
1748 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1749 attr->min_rnr_timer = qp->r_min_rnr_timer;
1750 attr->port_num = qp->port_num;
1751 attr->timeout = qp->timeout;
1752 attr->retry_cnt = qp->s_retry_cnt;
1753 attr->rnr_retry = qp->s_rnr_retry_cnt;
1754 attr->alt_port_num =
1755 rdma_ah_get_port_num(&qp->alt_ah_attr);
1756 attr->alt_timeout = qp->alt_timeout;
1757
1758 init_attr->event_handler = qp->ibqp.event_handler;
1759 init_attr->qp_context = qp->ibqp.qp_context;
1760 init_attr->send_cq = qp->ibqp.send_cq;
1761 init_attr->recv_cq = qp->ibqp.recv_cq;
1762 init_attr->srq = qp->ibqp.srq;
1763 init_attr->cap = attr->cap;
1764 if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1765 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1766 else
1767 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1768 init_attr->qp_type = qp->ibqp.qp_type;
1769 init_attr->port_num = qp->port_num;
1770 return 0;
1771 }
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783 int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1784 const struct ib_recv_wr **bad_wr)
1785 {
1786 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1787 struct rvt_krwq *wq = qp->r_rq.kwq;
1788 unsigned long flags;
1789 int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1790 !qp->ibqp.srq;
1791
1792
1793 if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1794 *bad_wr = wr;
1795 return -EINVAL;
1796 }
1797
1798 for (; wr; wr = wr->next) {
1799 struct rvt_rwqe *wqe;
1800 u32 next;
1801 int i;
1802
1803 if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1804 *bad_wr = wr;
1805 return -EINVAL;
1806 }
1807
1808 spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags);
1809 next = wq->head + 1;
1810 if (next >= qp->r_rq.size)
1811 next = 0;
1812 if (next == READ_ONCE(wq->tail)) {
1813 spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
1814 *bad_wr = wr;
1815 return -ENOMEM;
1816 }
1817 if (unlikely(qp_err_flush)) {
1818 struct ib_wc wc;
1819
1820 memset(&wc, 0, sizeof(wc));
1821 wc.qp = &qp->ibqp;
1822 wc.opcode = IB_WC_RECV;
1823 wc.wr_id = wr->wr_id;
1824 wc.status = IB_WC_WR_FLUSH_ERR;
1825 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1826 } else {
1827 wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1828 wqe->wr_id = wr->wr_id;
1829 wqe->num_sge = wr->num_sge;
1830 for (i = 0; i < wr->num_sge; i++) {
1831 wqe->sg_list[i].addr = wr->sg_list[i].addr;
1832 wqe->sg_list[i].length = wr->sg_list[i].length;
1833 wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
1834 }
1835
1836
1837
1838
1839 smp_store_release(&wq->head, next);
1840 }
1841 spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
1842 }
1843 return 0;
1844 }
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864 static inline int rvt_qp_valid_operation(
1865 struct rvt_qp *qp,
1866 const struct rvt_operation_params *post_parms,
1867 const struct ib_send_wr *wr)
1868 {
1869 int len;
1870
1871 if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1872 return -EINVAL;
1873 if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1874 return -EINVAL;
1875 if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1876 ibpd_to_rvtpd(qp->ibqp.pd)->user)
1877 return -EINVAL;
1878 if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1879 (wr->num_sge == 0 ||
1880 wr->sg_list[0].length < sizeof(u64) ||
1881 wr->sg_list[0].addr & (sizeof(u64) - 1)))
1882 return -EINVAL;
1883 if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1884 !qp->s_max_rd_atomic)
1885 return -EINVAL;
1886 len = post_parms[wr->opcode].length;
1887
1888 if (qp->ibqp.qp_type != IB_QPT_UC &&
1889 qp->ibqp.qp_type != IB_QPT_RC) {
1890 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1891 return -EINVAL;
1892 len = sizeof(struct ib_ud_wr);
1893 }
1894 return len;
1895 }
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911 static inline int rvt_qp_is_avail(
1912 struct rvt_qp *qp,
1913 struct rvt_dev_info *rdi,
1914 bool reserved_op)
1915 {
1916 u32 slast;
1917 u32 avail;
1918 u32 reserved_used;
1919
1920
1921 smp_mb__before_atomic();
1922 if (unlikely(reserved_op)) {
1923
1924 reserved_used = atomic_read(&qp->s_reserved_used);
1925 if (reserved_used >= rdi->dparms.reserved_operations)
1926 return -ENOMEM;
1927 return 0;
1928 }
1929
1930 if (likely(qp->s_avail))
1931 return 0;
1932
1933 slast = smp_load_acquire(&qp->s_last);
1934 if (qp->s_head >= slast)
1935 avail = qp->s_size - (qp->s_head - slast);
1936 else
1937 avail = slast - qp->s_head;
1938
1939 reserved_used = atomic_read(&qp->s_reserved_used);
1940 avail = avail - 1 -
1941 (rdi->dparms.reserved_operations - reserved_used);
1942
1943 if ((s32)avail <= 0)
1944 return -ENOMEM;
1945 qp->s_avail = avail;
1946 if (WARN_ON(qp->s_avail >
1947 (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1948 rvt_pr_err(rdi,
1949 "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1950 qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1951 qp->s_head, qp->s_tail, qp->s_cur,
1952 qp->s_acked, qp->s_last);
1953 return 0;
1954 }
1955
1956
1957
1958
1959
1960
1961
1962 static int rvt_post_one_wr(struct rvt_qp *qp,
1963 const struct ib_send_wr *wr,
1964 bool *call_send)
1965 {
1966 struct rvt_swqe *wqe;
1967 u32 next;
1968 int i;
1969 int j;
1970 int acc;
1971 struct rvt_lkey_table *rkt;
1972 struct rvt_pd *pd;
1973 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1974 u8 log_pmtu;
1975 int ret;
1976 size_t cplen;
1977 bool reserved_op;
1978 int local_ops_delayed = 0;
1979
1980 BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1981
1982
1983 if (unlikely(wr->num_sge > qp->s_max_sge))
1984 return -EINVAL;
1985
1986 ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1987 if (ret < 0)
1988 return ret;
1989 cplen = ret;
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002 if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
2003 switch (wr->opcode) {
2004 case IB_WR_REG_MR:
2005 ret = rvt_fast_reg_mr(qp,
2006 reg_wr(wr)->mr,
2007 reg_wr(wr)->key,
2008 reg_wr(wr)->access);
2009 if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
2010 return ret;
2011 break;
2012 case IB_WR_LOCAL_INV:
2013 if ((wr->send_flags & IB_SEND_FENCE) ||
2014 atomic_read(&qp->local_ops_pending)) {
2015 local_ops_delayed = 1;
2016 } else {
2017 ret = rvt_invalidate_rkey(
2018 qp, wr->ex.invalidate_rkey);
2019 if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
2020 return ret;
2021 }
2022 break;
2023 default:
2024 return -EINVAL;
2025 }
2026 }
2027
2028 reserved_op = rdi->post_parms[wr->opcode].flags &
2029 RVT_OPERATION_USE_RESERVE;
2030
2031 ret = rvt_qp_is_avail(qp, rdi, reserved_op);
2032 if (ret)
2033 return ret;
2034 next = qp->s_head + 1;
2035 if (next >= qp->s_size)
2036 next = 0;
2037
2038 rkt = &rdi->lkey_table;
2039 pd = ibpd_to_rvtpd(qp->ibqp.pd);
2040 wqe = rvt_get_swqe_ptr(qp, qp->s_head);
2041
2042
2043 memcpy(&wqe->wr, wr, cplen);
2044
2045 wqe->length = 0;
2046 j = 0;
2047 if (wr->num_sge) {
2048 struct rvt_sge *last_sge = NULL;
2049
2050 acc = wr->opcode >= IB_WR_RDMA_READ ?
2051 IB_ACCESS_LOCAL_WRITE : 0;
2052 for (i = 0; i < wr->num_sge; i++) {
2053 u32 length = wr->sg_list[i].length;
2054
2055 if (length == 0)
2056 continue;
2057 ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
2058 &wr->sg_list[i], acc);
2059 if (unlikely(ret < 0))
2060 goto bail_inval_free;
2061 wqe->length += length;
2062 if (ret)
2063 last_sge = &wqe->sg_list[j];
2064 j += ret;
2065 }
2066 wqe->wr.num_sge = j;
2067 }
2068
2069
2070
2071
2072
2073
2074 log_pmtu = qp->log_pmtu;
2075 if (qp->allowed_ops == IB_OPCODE_UD) {
2076 struct rvt_ah *ah = rvt_get_swqe_ah(wqe);
2077
2078 log_pmtu = ah->log_pmtu;
2079 rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr);
2080 }
2081
2082 if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
2083 if (local_ops_delayed)
2084 atomic_inc(&qp->local_ops_pending);
2085 else
2086 wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
2087 wqe->ssn = 0;
2088 wqe->psn = 0;
2089 wqe->lpsn = 0;
2090 } else {
2091 wqe->ssn = qp->s_ssn++;
2092 wqe->psn = qp->s_next_psn;
2093 wqe->lpsn = wqe->psn +
2094 (wqe->length ?
2095 ((wqe->length - 1) >> log_pmtu) :
2096 0);
2097 }
2098
2099
2100 if (rdi->driver_f.setup_wqe) {
2101 ret = rdi->driver_f.setup_wqe(qp, wqe, call_send);
2102 if (ret < 0)
2103 goto bail_inval_free_ref;
2104 }
2105
2106 if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
2107 qp->s_next_psn = wqe->lpsn + 1;
2108
2109 if (unlikely(reserved_op)) {
2110 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
2111 rvt_qp_wqe_reserve(qp, wqe);
2112 } else {
2113 wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
2114 qp->s_avail--;
2115 }
2116 trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
2117 smp_wmb();
2118 qp->s_head = next;
2119
2120 return 0;
2121
2122 bail_inval_free_ref:
2123 if (qp->allowed_ops == IB_OPCODE_UD)
2124 rdma_destroy_ah_attr(wqe->ud_wr.attr);
2125 bail_inval_free:
2126
2127 while (j) {
2128 struct rvt_sge *sge = &wqe->sg_list[--j];
2129
2130 rvt_put_mr(sge->mr);
2131 }
2132 return ret;
2133 }
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145 int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2146 const struct ib_send_wr **bad_wr)
2147 {
2148 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
2149 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
2150 unsigned long flags = 0;
2151 bool call_send;
2152 unsigned nreq = 0;
2153 int err = 0;
2154
2155 spin_lock_irqsave(&qp->s_hlock, flags);
2156
2157
2158
2159
2160
2161 if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
2162 spin_unlock_irqrestore(&qp->s_hlock, flags);
2163 return -EINVAL;
2164 }
2165
2166
2167
2168
2169
2170
2171 call_send = qp->s_head == READ_ONCE(qp->s_last) && !wr->next;
2172
2173 for (; wr; wr = wr->next) {
2174 err = rvt_post_one_wr(qp, wr, &call_send);
2175 if (unlikely(err)) {
2176 *bad_wr = wr;
2177 goto bail;
2178 }
2179 nreq++;
2180 }
2181 bail:
2182 spin_unlock_irqrestore(&qp->s_hlock, flags);
2183 if (nreq) {
2184
2185
2186
2187
2188 if (nreq == 1 && call_send)
2189 rdi->driver_f.do_send(qp);
2190 else
2191 rdi->driver_f.schedule_send_no_lock(qp);
2192 }
2193 return err;
2194 }
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206 int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2207 const struct ib_recv_wr **bad_wr)
2208 {
2209 struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
2210 struct rvt_krwq *wq;
2211 unsigned long flags;
2212
2213 for (; wr; wr = wr->next) {
2214 struct rvt_rwqe *wqe;
2215 u32 next;
2216 int i;
2217
2218 if ((unsigned)wr->num_sge > srq->rq.max_sge) {
2219 *bad_wr = wr;
2220 return -EINVAL;
2221 }
2222
2223 spin_lock_irqsave(&srq->rq.kwq->p_lock, flags);
2224 wq = srq->rq.kwq;
2225 next = wq->head + 1;
2226 if (next >= srq->rq.size)
2227 next = 0;
2228 if (next == READ_ONCE(wq->tail)) {
2229 spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
2230 *bad_wr = wr;
2231 return -ENOMEM;
2232 }
2233
2234 wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
2235 wqe->wr_id = wr->wr_id;
2236 wqe->num_sge = wr->num_sge;
2237 for (i = 0; i < wr->num_sge; i++) {
2238 wqe->sg_list[i].addr = wr->sg_list[i].addr;
2239 wqe->sg_list[i].length = wr->sg_list[i].length;
2240 wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
2241 }
2242
2243 smp_store_release(&wq->head, next);
2244 spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
2245 }
2246 return 0;
2247 }
2248
2249
2250
2251
2252
2253
2254 static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge)
2255 {
2256 BUILD_BUG_ON(offsetof(struct ib_sge, addr) !=
2257 offsetof(struct rvt_wqe_sge, addr));
2258 BUILD_BUG_ON(offsetof(struct ib_sge, length) !=
2259 offsetof(struct rvt_wqe_sge, length));
2260 BUILD_BUG_ON(offsetof(struct ib_sge, lkey) !=
2261 offsetof(struct rvt_wqe_sge, lkey));
2262 return (struct ib_sge *)sge;
2263 }
2264
2265
2266
2267
2268
2269 static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
2270 {
2271 int i, j, ret;
2272 struct ib_wc wc;
2273 struct rvt_lkey_table *rkt;
2274 struct rvt_pd *pd;
2275 struct rvt_sge_state *ss;
2276 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2277
2278 rkt = &rdi->lkey_table;
2279 pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
2280 ss = &qp->r_sge;
2281 ss->sg_list = qp->r_sg_list;
2282 qp->r_len = 0;
2283 for (i = j = 0; i < wqe->num_sge; i++) {
2284 if (wqe->sg_list[i].length == 0)
2285 continue;
2286
2287 ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
2288 NULL, rvt_cast_sge(&wqe->sg_list[i]),
2289 IB_ACCESS_LOCAL_WRITE);
2290 if (unlikely(ret <= 0))
2291 goto bad_lkey;
2292 qp->r_len += wqe->sg_list[i].length;
2293 j++;
2294 }
2295 ss->num_sge = j;
2296 ss->total_len = qp->r_len;
2297 return 1;
2298
2299 bad_lkey:
2300 while (j) {
2301 struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
2302
2303 rvt_put_mr(sge->mr);
2304 }
2305 ss->num_sge = 0;
2306 memset(&wc, 0, sizeof(wc));
2307 wc.wr_id = wqe->wr_id;
2308 wc.status = IB_WC_LOC_PROT_ERR;
2309 wc.opcode = IB_WC_RECV;
2310 wc.qp = &qp->ibqp;
2311
2312 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
2313 return 0;
2314 }
2315
2316
2317
2318
2319
2320
2321
2322
2323 static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip)
2324 {
2325 u32 head;
2326
2327 if (ip)
2328 head = RDMA_READ_UAPI_ATOMIC(rq->wq->head);
2329 else
2330 head = rq->kwq->head;
2331
2332 return head;
2333 }
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345 int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
2346 {
2347 unsigned long flags;
2348 struct rvt_rq *rq;
2349 struct rvt_krwq *kwq = NULL;
2350 struct rvt_rwq *wq;
2351 struct rvt_srq *srq;
2352 struct rvt_rwqe *wqe;
2353 void (*handler)(struct ib_event *, void *);
2354 u32 tail;
2355 u32 head;
2356 int ret;
2357 void *ip = NULL;
2358
2359 if (qp->ibqp.srq) {
2360 srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
2361 handler = srq->ibsrq.event_handler;
2362 rq = &srq->rq;
2363 ip = srq->ip;
2364 } else {
2365 srq = NULL;
2366 handler = NULL;
2367 rq = &qp->r_rq;
2368 ip = qp->ip;
2369 }
2370
2371 spin_lock_irqsave(&rq->kwq->c_lock, flags);
2372 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2373 ret = 0;
2374 goto unlock;
2375 }
2376 kwq = rq->kwq;
2377 if (ip) {
2378 wq = rq->wq;
2379 tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
2380 } else {
2381 tail = kwq->tail;
2382 }
2383
2384
2385 if (tail >= rq->size)
2386 tail = 0;
2387
2388 if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) {
2389 head = get_rvt_head(rq, ip);
2390 kwq->count = rvt_get_rq_count(rq, head, tail);
2391 }
2392 if (unlikely(kwq->count == 0)) {
2393 ret = 0;
2394 goto unlock;
2395 }
2396
2397 smp_rmb();
2398 wqe = rvt_get_rwqe_ptr(rq, tail);
2399
2400
2401
2402
2403
2404 if (++tail >= rq->size)
2405 tail = 0;
2406 if (ip)
2407 RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
2408 else
2409 kwq->tail = tail;
2410 if (!wr_id_only && !init_sge(qp, wqe)) {
2411 ret = -1;
2412 goto unlock;
2413 }
2414 qp->r_wr_id = wqe->wr_id;
2415
2416 kwq->count--;
2417 ret = 1;
2418 set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
2419 if (handler) {
2420
2421
2422
2423
2424 if (kwq->count < srq->limit) {
2425 kwq->count =
2426 rvt_get_rq_count(rq,
2427 get_rvt_head(rq, ip), tail);
2428 if (kwq->count < srq->limit) {
2429 struct ib_event ev;
2430
2431 srq->limit = 0;
2432 spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
2433 ev.device = qp->ibqp.device;
2434 ev.element.srq = qp->ibqp.srq;
2435 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
2436 handler(&ev, srq->ibsrq.srq_context);
2437 goto bail;
2438 }
2439 }
2440 }
2441 unlock:
2442 spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
2443 bail:
2444 return ret;
2445 }
2446 EXPORT_SYMBOL(rvt_get_rwqe);
2447
2448
2449
2450
2451
2452 void rvt_comm_est(struct rvt_qp *qp)
2453 {
2454 qp->r_flags |= RVT_R_COMM_EST;
2455 if (qp->ibqp.event_handler) {
2456 struct ib_event ev;
2457
2458 ev.device = qp->ibqp.device;
2459 ev.element.qp = &qp->ibqp;
2460 ev.event = IB_EVENT_COMM_EST;
2461 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2462 }
2463 }
2464 EXPORT_SYMBOL(rvt_comm_est);
2465
2466 void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
2467 {
2468 unsigned long flags;
2469 int lastwqe;
2470
2471 spin_lock_irqsave(&qp->s_lock, flags);
2472 lastwqe = rvt_error_qp(qp, err);
2473 spin_unlock_irqrestore(&qp->s_lock, flags);
2474
2475 if (lastwqe) {
2476 struct ib_event ev;
2477
2478 ev.device = qp->ibqp.device;
2479 ev.element.qp = &qp->ibqp;
2480 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
2481 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2482 }
2483 }
2484 EXPORT_SYMBOL(rvt_rc_error);
2485
2486
2487
2488
2489
2490
2491 unsigned long rvt_rnr_tbl_to_usec(u32 index)
2492 {
2493 return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
2494 }
2495 EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
2496
2497 static inline unsigned long rvt_aeth_to_usec(u32 aeth)
2498 {
2499 return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
2500 IB_AETH_CREDIT_MASK];
2501 }
2502
2503
2504
2505
2506
2507
2508
2509 void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
2510 {
2511 struct ib_qp *ibqp = &qp->ibqp;
2512 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
2513
2514 lockdep_assert_held(&qp->s_lock);
2515 qp->s_flags |= RVT_S_TIMER;
2516
2517 qp->s_timer.expires = jiffies + rdi->busy_jiffies +
2518 (qp->timeout_jiffies << shift);
2519 add_timer(&qp->s_timer);
2520 }
2521 EXPORT_SYMBOL(rvt_add_retry_timer_ext);
2522
2523
2524
2525
2526
2527
2528 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
2529 {
2530 u32 to;
2531
2532 lockdep_assert_held(&qp->s_lock);
2533 qp->s_flags |= RVT_S_WAIT_RNR;
2534 to = rvt_aeth_to_usec(aeth);
2535 trace_rvt_rnrnak_add(qp, to);
2536 hrtimer_start(&qp->s_rnr_timer,
2537 ns_to_ktime(1000 * to), HRTIMER_MODE_REL_PINNED);
2538 }
2539 EXPORT_SYMBOL(rvt_add_rnr_timer);
2540
2541
2542
2543
2544
2545
2546 void rvt_stop_rc_timers(struct rvt_qp *qp)
2547 {
2548 lockdep_assert_held(&qp->s_lock);
2549
2550 if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
2551 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
2552 del_timer(&qp->s_timer);
2553 hrtimer_try_to_cancel(&qp->s_rnr_timer);
2554 }
2555 }
2556 EXPORT_SYMBOL(rvt_stop_rc_timers);
2557
2558
2559
2560
2561
2562
2563
2564
2565 static void rvt_stop_rnr_timer(struct rvt_qp *qp)
2566 {
2567 lockdep_assert_held(&qp->s_lock);
2568
2569 if (qp->s_flags & RVT_S_WAIT_RNR) {
2570 qp->s_flags &= ~RVT_S_WAIT_RNR;
2571 trace_rvt_rnrnak_stop(qp, 0);
2572 }
2573 }
2574
2575
2576
2577
2578
2579 void rvt_del_timers_sync(struct rvt_qp *qp)
2580 {
2581 del_timer_sync(&qp->s_timer);
2582 hrtimer_cancel(&qp->s_rnr_timer);
2583 }
2584 EXPORT_SYMBOL(rvt_del_timers_sync);
2585
2586
2587
2588
2589 static void rvt_rc_timeout(struct timer_list *t)
2590 {
2591 struct rvt_qp *qp = from_timer(qp, t, s_timer);
2592 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2593 unsigned long flags;
2594
2595 spin_lock_irqsave(&qp->r_lock, flags);
2596 spin_lock(&qp->s_lock);
2597 if (qp->s_flags & RVT_S_TIMER) {
2598 struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
2599
2600 qp->s_flags &= ~RVT_S_TIMER;
2601 rvp->n_rc_timeouts++;
2602 del_timer(&qp->s_timer);
2603 trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
2604 if (rdi->driver_f.notify_restart_rc)
2605 rdi->driver_f.notify_restart_rc(qp,
2606 qp->s_last_psn + 1,
2607 1);
2608 rdi->driver_f.schedule_send(qp);
2609 }
2610 spin_unlock(&qp->s_lock);
2611 spin_unlock_irqrestore(&qp->r_lock, flags);
2612 }
2613
2614
2615
2616
2617 enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2618 {
2619 struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
2620 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2621 unsigned long flags;
2622
2623 spin_lock_irqsave(&qp->s_lock, flags);
2624 rvt_stop_rnr_timer(qp);
2625 trace_rvt_rnrnak_timeout(qp, 0);
2626 rdi->driver_f.schedule_send(qp);
2627 spin_unlock_irqrestore(&qp->s_lock, flags);
2628 return HRTIMER_NORESTART;
2629 }
2630 EXPORT_SYMBOL(rvt_rc_rnr_retry);
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651 struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
2652 u64 v,
2653 void (*cb)(struct rvt_qp *qp, u64 v))
2654 {
2655 struct rvt_qp_iter *i;
2656
2657 i = kzalloc(sizeof(*i), GFP_KERNEL);
2658 if (!i)
2659 return NULL;
2660
2661 i->rdi = rdi;
2662
2663 i->specials = rdi->ibdev.phys_port_cnt * 2;
2664 i->v = v;
2665 i->cb = cb;
2666
2667 return i;
2668 }
2669 EXPORT_SYMBOL(rvt_qp_iter_init);
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683 int rvt_qp_iter_next(struct rvt_qp_iter *iter)
2684 __must_hold(RCU)
2685 {
2686 int n = iter->n;
2687 int ret = 1;
2688 struct rvt_qp *pqp = iter->qp;
2689 struct rvt_qp *qp;
2690 struct rvt_dev_info *rdi = iter->rdi;
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706 for (; n < rdi->qp_dev->qp_table_size + iter->specials; n++) {
2707 if (pqp) {
2708 qp = rcu_dereference(pqp->next);
2709 } else {
2710 if (n < iter->specials) {
2711 struct rvt_ibport *rvp;
2712 int pidx;
2713
2714 pidx = n % rdi->ibdev.phys_port_cnt;
2715 rvp = rdi->ports[pidx];
2716 qp = rcu_dereference(rvp->qp[n & 1]);
2717 } else {
2718 qp = rcu_dereference(
2719 rdi->qp_dev->qp_table[
2720 (n - iter->specials)]);
2721 }
2722 }
2723 pqp = qp;
2724 if (qp) {
2725 iter->qp = qp;
2726 iter->n = n;
2727 return 0;
2728 }
2729 }
2730 return ret;
2731 }
2732 EXPORT_SYMBOL(rvt_qp_iter_next);
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750 void rvt_qp_iter(struct rvt_dev_info *rdi,
2751 u64 v,
2752 void (*cb)(struct rvt_qp *qp, u64 v))
2753 {
2754 int ret;
2755 struct rvt_qp_iter i = {
2756 .rdi = rdi,
2757 .specials = rdi->ibdev.phys_port_cnt * 2,
2758 .v = v,
2759 .cb = cb
2760 };
2761
2762 rcu_read_lock();
2763 do {
2764 ret = rvt_qp_iter_next(&i);
2765 if (!ret) {
2766 rvt_get_qp(i.qp);
2767 rcu_read_unlock();
2768 i.cb(i.qp, i.v);
2769 rcu_read_lock();
2770 rvt_put_qp(i.qp);
2771 }
2772 } while (!ret);
2773 rcu_read_unlock();
2774 }
2775 EXPORT_SYMBOL(rvt_qp_iter);
2776
2777
2778
2779
2780 void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
2781 enum ib_wc_status status)
2782 {
2783 u32 old_last, last;
2784 struct rvt_dev_info *rdi;
2785
2786 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
2787 return;
2788 rdi = ib_to_rvt(qp->ibqp.device);
2789
2790 old_last = qp->s_last;
2791 trace_rvt_qp_send_completion(qp, wqe, old_last);
2792 last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode],
2793 status);
2794 if (qp->s_acked == old_last)
2795 qp->s_acked = last;
2796 if (qp->s_cur == old_last)
2797 qp->s_cur = last;
2798 if (qp->s_tail == old_last)
2799 qp->s_tail = last;
2800 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
2801 qp->s_draining = 0;
2802 }
2803 EXPORT_SYMBOL(rvt_send_complete);
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814 void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
2815 void *data, u32 length,
2816 bool release, bool copy_last)
2817 {
2818 struct rvt_sge *sge = &ss->sge;
2819 int i;
2820 bool in_last = false;
2821 bool cacheless_copy = false;
2822 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2823 struct rvt_wss *wss = rdi->wss;
2824 unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
2825
2826 if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) {
2827 cacheless_copy = length >= PAGE_SIZE;
2828 } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) {
2829 if (length >= PAGE_SIZE) {
2830
2831
2832
2833
2834
2835 wss_insert(wss, sge->vaddr);
2836 if (length >= (2 * PAGE_SIZE))
2837 wss_insert(wss, (sge->vaddr + PAGE_SIZE));
2838
2839 cacheless_copy = wss_exceeds_threshold(wss);
2840 } else {
2841 wss_advance_clean_counter(wss);
2842 }
2843 }
2844
2845 if (copy_last) {
2846 if (length > 8) {
2847 length -= 8;
2848 } else {
2849 copy_last = false;
2850 in_last = true;
2851 }
2852 }
2853
2854 again:
2855 while (length) {
2856 u32 len = rvt_get_sge_length(sge, length);
2857
2858 WARN_ON_ONCE(len == 0);
2859 if (unlikely(in_last)) {
2860
2861 for (i = 0; i < len; i++)
2862 ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
2863 } else if (cacheless_copy) {
2864 cacheless_memcpy(sge->vaddr, data, len);
2865 } else {
2866 memcpy(sge->vaddr, data, len);
2867 }
2868 rvt_update_sge(ss, len, release);
2869 data += len;
2870 length -= len;
2871 }
2872
2873 if (copy_last) {
2874 copy_last = false;
2875 in_last = true;
2876 length = 8;
2877 goto again;
2878 }
2879 }
2880 EXPORT_SYMBOL(rvt_copy_sge);
2881
2882 static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp,
2883 struct rvt_qp *sqp)
2884 {
2885 rvp->n_pkt_drops++;
2886
2887
2888
2889
2890 return sqp->ibqp.qp_type == IB_QPT_RC ?
2891 IB_WC_RETRY_EXC_ERR : IB_WC_SUCCESS;
2892 }
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904 void rvt_ruc_loopback(struct rvt_qp *sqp)
2905 {
2906 struct rvt_ibport *rvp = NULL;
2907 struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device);
2908 struct rvt_qp *qp;
2909 struct rvt_swqe *wqe;
2910 struct rvt_sge *sge;
2911 unsigned long flags;
2912 struct ib_wc wc;
2913 u64 sdata;
2914 atomic64_t *maddr;
2915 enum ib_wc_status send_status;
2916 bool release;
2917 int ret;
2918 bool copy_last = false;
2919 int local_ops = 0;
2920
2921 rcu_read_lock();
2922 rvp = rdi->ports[sqp->port_num - 1];
2923
2924
2925
2926
2927
2928
2929 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp,
2930 sqp->remote_qpn);
2931
2932 spin_lock_irqsave(&sqp->s_lock, flags);
2933
2934
2935 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
2936 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
2937 goto unlock;
2938
2939 sqp->s_flags |= RVT_S_BUSY;
2940
2941 again:
2942 if (sqp->s_last == READ_ONCE(sqp->s_head))
2943 goto clr_busy;
2944 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
2945
2946
2947 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
2948 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
2949 goto clr_busy;
2950
2951 send_status = IB_WC_WR_FLUSH_ERR;
2952 goto flush_send;
2953 }
2954
2955
2956
2957
2958
2959
2960 if (sqp->s_last == sqp->s_cur) {
2961 if (++sqp->s_cur >= sqp->s_size)
2962 sqp->s_cur = 0;
2963 }
2964 spin_unlock_irqrestore(&sqp->s_lock, flags);
2965
2966 if (!qp) {
2967 send_status = loopback_qp_drop(rvp, sqp);
2968 goto serr_no_r_lock;
2969 }
2970 spin_lock_irqsave(&qp->r_lock, flags);
2971 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
2972 qp->ibqp.qp_type != sqp->ibqp.qp_type) {
2973 send_status = loopback_qp_drop(rvp, sqp);
2974 goto serr;
2975 }
2976
2977 memset(&wc, 0, sizeof(wc));
2978 send_status = IB_WC_SUCCESS;
2979
2980 release = true;
2981 sqp->s_sge.sge = wqe->sg_list[0];
2982 sqp->s_sge.sg_list = wqe->sg_list + 1;
2983 sqp->s_sge.num_sge = wqe->wr.num_sge;
2984 sqp->s_len = wqe->length;
2985 switch (wqe->wr.opcode) {
2986 case IB_WR_REG_MR:
2987 goto send_comp;
2988
2989 case IB_WR_LOCAL_INV:
2990 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
2991 if (rvt_invalidate_rkey(sqp,
2992 wqe->wr.ex.invalidate_rkey))
2993 send_status = IB_WC_LOC_PROT_ERR;
2994 local_ops = 1;
2995 }
2996 goto send_comp;
2997
2998 case IB_WR_SEND_WITH_INV:
2999 case IB_WR_SEND_WITH_IMM:
3000 case IB_WR_SEND:
3001 ret = rvt_get_rwqe(qp, false);
3002 if (ret < 0)
3003 goto op_err;
3004 if (!ret)
3005 goto rnr_nak;
3006 if (wqe->length > qp->r_len)
3007 goto inv_err;
3008 switch (wqe->wr.opcode) {
3009 case IB_WR_SEND_WITH_INV:
3010 if (!rvt_invalidate_rkey(qp,
3011 wqe->wr.ex.invalidate_rkey)) {
3012 wc.wc_flags = IB_WC_WITH_INVALIDATE;
3013 wc.ex.invalidate_rkey =
3014 wqe->wr.ex.invalidate_rkey;
3015 }
3016 break;
3017 case IB_WR_SEND_WITH_IMM:
3018 wc.wc_flags = IB_WC_WITH_IMM;
3019 wc.ex.imm_data = wqe->wr.ex.imm_data;
3020 break;
3021 default:
3022 break;
3023 }
3024 break;
3025
3026 case IB_WR_RDMA_WRITE_WITH_IMM:
3027 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3028 goto inv_err;
3029 wc.wc_flags = IB_WC_WITH_IMM;
3030 wc.ex.imm_data = wqe->wr.ex.imm_data;
3031 ret = rvt_get_rwqe(qp, true);
3032 if (ret < 0)
3033 goto op_err;
3034 if (!ret)
3035 goto rnr_nak;
3036
3037 goto do_write;
3038 case IB_WR_RDMA_WRITE:
3039 copy_last = rvt_is_user_qp(qp);
3040 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
3041 goto inv_err;
3042 do_write:
3043 if (wqe->length == 0)
3044 break;
3045 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
3046 wqe->rdma_wr.remote_addr,
3047 wqe->rdma_wr.rkey,
3048 IB_ACCESS_REMOTE_WRITE)))
3049 goto acc_err;
3050 qp->r_sge.sg_list = NULL;
3051 qp->r_sge.num_sge = 1;
3052 qp->r_sge.total_len = wqe->length;
3053 break;
3054
3055 case IB_WR_RDMA_READ:
3056 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
3057 goto inv_err;
3058 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
3059 wqe->rdma_wr.remote_addr,
3060 wqe->rdma_wr.rkey,
3061 IB_ACCESS_REMOTE_READ)))
3062 goto acc_err;
3063 release = false;
3064 sqp->s_sge.sg_list = NULL;
3065 sqp->s_sge.num_sge = 1;
3066 qp->r_sge.sge = wqe->sg_list[0];
3067 qp->r_sge.sg_list = wqe->sg_list + 1;
3068 qp->r_sge.num_sge = wqe->wr.num_sge;
3069 qp->r_sge.total_len = wqe->length;
3070 break;
3071
3072 case IB_WR_ATOMIC_CMP_AND_SWP:
3073 case IB_WR_ATOMIC_FETCH_AND_ADD:
3074 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
3075 goto inv_err;
3076 if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
3077 goto inv_err;
3078 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
3079 wqe->atomic_wr.remote_addr,
3080 wqe->atomic_wr.rkey,
3081 IB_ACCESS_REMOTE_ATOMIC)))
3082 goto acc_err;
3083
3084 maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
3085 sdata = wqe->atomic_wr.compare_add;
3086 *(u64 *)sqp->s_sge.sge.vaddr =
3087 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
3088 (u64)atomic64_add_return(sdata, maddr) - sdata :
3089 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
3090 sdata, wqe->atomic_wr.swap);
3091 rvt_put_mr(qp->r_sge.sge.mr);
3092 qp->r_sge.num_sge = 0;
3093 goto send_comp;
3094
3095 default:
3096 send_status = IB_WC_LOC_QP_OP_ERR;
3097 goto serr;
3098 }
3099
3100 sge = &sqp->s_sge.sge;
3101 while (sqp->s_len) {
3102 u32 len = rvt_get_sge_length(sge, sqp->s_len);
3103
3104 WARN_ON_ONCE(len == 0);
3105 rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
3106 len, release, copy_last);
3107 rvt_update_sge(&sqp->s_sge, len, !release);
3108 sqp->s_len -= len;
3109 }
3110 if (release)
3111 rvt_put_ss(&qp->r_sge);
3112
3113 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
3114 goto send_comp;
3115
3116 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
3117 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
3118 else
3119 wc.opcode = IB_WC_RECV;
3120 wc.wr_id = qp->r_wr_id;
3121 wc.status = IB_WC_SUCCESS;
3122 wc.byte_len = wqe->length;
3123 wc.qp = &qp->ibqp;
3124 wc.src_qp = qp->remote_qpn;
3125 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
3126 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
3127 wc.port_num = 1;
3128
3129 rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED);
3130
3131 send_comp:
3132 spin_unlock_irqrestore(&qp->r_lock, flags);
3133 spin_lock_irqsave(&sqp->s_lock, flags);
3134 rvp->n_loop_pkts++;
3135 flush_send:
3136 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
3137 spin_lock(&sqp->r_lock);
3138 rvt_send_complete(sqp, wqe, send_status);
3139 spin_unlock(&sqp->r_lock);
3140 if (local_ops) {
3141 atomic_dec(&sqp->local_ops_pending);
3142 local_ops = 0;
3143 }
3144 goto again;
3145
3146 rnr_nak:
3147
3148 if (qp->ibqp.qp_type == IB_QPT_UC)
3149 goto send_comp;
3150 rvp->n_rnr_naks++;
3151
3152
3153
3154
3155 if (sqp->s_rnr_retry == 0) {
3156 send_status = IB_WC_RNR_RETRY_EXC_ERR;
3157 goto serr;
3158 }
3159 if (sqp->s_rnr_retry_cnt < 7)
3160 sqp->s_rnr_retry--;
3161 spin_unlock_irqrestore(&qp->r_lock, flags);
3162 spin_lock_irqsave(&sqp->s_lock, flags);
3163 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
3164 goto clr_busy;
3165 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
3166 IB_AETH_CREDIT_SHIFT);
3167 goto clr_busy;
3168
3169 op_err:
3170 send_status = IB_WC_REM_OP_ERR;
3171 wc.status = IB_WC_LOC_QP_OP_ERR;
3172 goto err;
3173
3174 inv_err:
3175 send_status =
3176 sqp->ibqp.qp_type == IB_QPT_RC ?
3177 IB_WC_REM_INV_REQ_ERR :
3178 IB_WC_SUCCESS;
3179 wc.status = IB_WC_LOC_QP_OP_ERR;
3180 goto err;
3181
3182 acc_err:
3183 send_status = IB_WC_REM_ACCESS_ERR;
3184 wc.status = IB_WC_LOC_PROT_ERR;
3185 err:
3186
3187 rvt_rc_error(qp, wc.status);
3188
3189 serr:
3190 spin_unlock_irqrestore(&qp->r_lock, flags);
3191 serr_no_r_lock:
3192 spin_lock_irqsave(&sqp->s_lock, flags);
3193 spin_lock(&sqp->r_lock);
3194 rvt_send_complete(sqp, wqe, send_status);
3195 spin_unlock(&sqp->r_lock);
3196 if (sqp->ibqp.qp_type == IB_QPT_RC) {
3197 int lastwqe;
3198
3199 spin_lock(&sqp->r_lock);
3200 lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
3201 spin_unlock(&sqp->r_lock);
3202
3203 sqp->s_flags &= ~RVT_S_BUSY;
3204 spin_unlock_irqrestore(&sqp->s_lock, flags);
3205 if (lastwqe) {
3206 struct ib_event ev;
3207
3208 ev.device = sqp->ibqp.device;
3209 ev.element.qp = &sqp->ibqp;
3210 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
3211 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
3212 }
3213 goto done;
3214 }
3215 clr_busy:
3216 sqp->s_flags &= ~RVT_S_BUSY;
3217 unlock:
3218 spin_unlock_irqrestore(&sqp->s_lock, flags);
3219 done:
3220 rcu_read_unlock();
3221 }
3222 EXPORT_SYMBOL(rvt_ruc_loopback);