0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/atomic.h>
0026 #include <linux/hashtable.h>
0027 #include <linux/wait.h>
0028 #include <rdma/ib_verbs.h>
0029 #include <asm/div64.h>
0030
0031 #include "smc.h"
0032 #include "smc_wr.h"
0033
0034 #define SMC_WR_MAX_POLL_CQE 10
0035
0036 #define SMC_WR_RX_HASH_BITS 4
0037 static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
0038 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
0039
0040 struct smc_wr_tx_pend {
0041 u64 wr_id;
0042 smc_wr_tx_handler handler;
0043 enum ib_wc_status wc_status;
0044 struct smc_link *link;
0045 u32 idx;
0046 struct smc_wr_tx_pend_priv priv;
0047 u8 compl_requested;
0048 };
0049
0050
0051
0052
0053
0054
0055 static inline bool smc_wr_is_tx_pend(struct smc_link *link)
0056 {
0057 return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt);
0058 }
0059
0060
0061 void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
0062 {
0063 wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
0064 }
0065
0066 static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
0067 {
0068 u32 i;
0069
0070 for (i = 0; i < link->wr_tx_cnt; i++) {
0071 if (link->wr_tx_pends[i].wr_id == wr_id)
0072 return i;
0073 }
0074 return link->wr_tx_cnt;
0075 }
0076
0077 static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
0078 {
0079 struct smc_wr_tx_pend pnd_snd;
0080 struct smc_link *link;
0081 u32 pnd_snd_idx;
0082
0083 link = wc->qp->qp_context;
0084
0085 if (wc->opcode == IB_WC_REG_MR) {
0086 if (wc->status)
0087 link->wr_reg_state = FAILED;
0088 else
0089 link->wr_reg_state = CONFIRMED;
0090 smc_wr_wakeup_reg_wait(link);
0091 return;
0092 }
0093
0094 pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
0095 if (pnd_snd_idx == link->wr_tx_cnt) {
0096 if (link->lgr->smc_version != SMC_V2 ||
0097 link->wr_tx_v2_pend->wr_id != wc->wr_id)
0098 return;
0099 link->wr_tx_v2_pend->wc_status = wc->status;
0100 memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
0101
0102 memset(link->wr_tx_v2_pend, 0,
0103 sizeof(*link->wr_tx_v2_pend));
0104 memset(link->lgr->wr_tx_buf_v2, 0,
0105 sizeof(*link->lgr->wr_tx_buf_v2));
0106 } else {
0107 link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
0108 if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
0109 complete(&link->wr_tx_compl[pnd_snd_idx]);
0110 memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
0111 sizeof(pnd_snd));
0112
0113 memset(&link->wr_tx_pends[pnd_snd_idx], 0,
0114 sizeof(link->wr_tx_pends[pnd_snd_idx]));
0115 memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
0116 sizeof(link->wr_tx_bufs[pnd_snd_idx]));
0117 if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
0118 return;
0119 }
0120
0121 if (wc->status) {
0122 if (link->lgr->smc_version == SMC_V2) {
0123 memset(link->wr_tx_v2_pend, 0,
0124 sizeof(*link->wr_tx_v2_pend));
0125 memset(link->lgr->wr_tx_buf_v2, 0,
0126 sizeof(*link->lgr->wr_tx_buf_v2));
0127 }
0128
0129 smcr_link_down_cond_sched(link);
0130 }
0131 if (pnd_snd.handler)
0132 pnd_snd.handler(&pnd_snd.priv, link, wc->status);
0133 wake_up(&link->wr_tx_wait);
0134 }
0135
0136 static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
0137 {
0138 struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
0139 struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
0140 int i = 0, rc;
0141 int polled = 0;
0142
0143 again:
0144 polled++;
0145 do {
0146 memset(&wc, 0, sizeof(wc));
0147 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
0148 if (polled == 1) {
0149 ib_req_notify_cq(dev->roce_cq_send,
0150 IB_CQ_NEXT_COMP |
0151 IB_CQ_REPORT_MISSED_EVENTS);
0152 }
0153 if (!rc)
0154 break;
0155 for (i = 0; i < rc; i++)
0156 smc_wr_tx_process_cqe(&wc[i]);
0157 } while (rc > 0);
0158 if (polled == 1)
0159 goto again;
0160 }
0161
0162 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
0163 {
0164 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
0165
0166 tasklet_schedule(&dev->send_tasklet);
0167 }
0168
0169
0170
0171 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
0172 {
0173 *idx = link->wr_tx_cnt;
0174 if (!smc_link_sendable(link))
0175 return -ENOLINK;
0176 for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
0177 if (!test_and_set_bit(*idx, link->wr_tx_mask))
0178 return 0;
0179 }
0180 *idx = link->wr_tx_cnt;
0181 return -EBUSY;
0182 }
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195 int smc_wr_tx_get_free_slot(struct smc_link *link,
0196 smc_wr_tx_handler handler,
0197 struct smc_wr_buf **wr_buf,
0198 struct smc_rdma_wr **wr_rdma_buf,
0199 struct smc_wr_tx_pend_priv **wr_pend_priv)
0200 {
0201 struct smc_link_group *lgr = smc_get_lgr(link);
0202 struct smc_wr_tx_pend *wr_pend;
0203 u32 idx = link->wr_tx_cnt;
0204 struct ib_send_wr *wr_ib;
0205 u64 wr_id;
0206 int rc;
0207
0208 *wr_buf = NULL;
0209 *wr_pend_priv = NULL;
0210 if (in_softirq() || lgr->terminating) {
0211 rc = smc_wr_tx_get_free_slot_index(link, &idx);
0212 if (rc)
0213 return rc;
0214 } else {
0215 rc = wait_event_interruptible_timeout(
0216 link->wr_tx_wait,
0217 !smc_link_sendable(link) ||
0218 lgr->terminating ||
0219 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
0220 SMC_WR_TX_WAIT_FREE_SLOT_TIME);
0221 if (!rc) {
0222
0223 smcr_link_down_cond_sched(link);
0224 return -EPIPE;
0225 }
0226 if (idx == link->wr_tx_cnt)
0227 return -EPIPE;
0228 }
0229 wr_id = smc_wr_tx_get_next_wr_id(link);
0230 wr_pend = &link->wr_tx_pends[idx];
0231 wr_pend->wr_id = wr_id;
0232 wr_pend->handler = handler;
0233 wr_pend->link = link;
0234 wr_pend->idx = idx;
0235 wr_ib = &link->wr_tx_ibs[idx];
0236 wr_ib->wr_id = wr_id;
0237 *wr_buf = &link->wr_tx_bufs[idx];
0238 if (wr_rdma_buf)
0239 *wr_rdma_buf = &link->wr_tx_rdmas[idx];
0240 *wr_pend_priv = &wr_pend->priv;
0241 return 0;
0242 }
0243
0244 int smc_wr_tx_get_v2_slot(struct smc_link *link,
0245 smc_wr_tx_handler handler,
0246 struct smc_wr_v2_buf **wr_buf,
0247 struct smc_wr_tx_pend_priv **wr_pend_priv)
0248 {
0249 struct smc_wr_tx_pend *wr_pend;
0250 struct ib_send_wr *wr_ib;
0251 u64 wr_id;
0252
0253 if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt)
0254 return -EBUSY;
0255
0256 *wr_buf = NULL;
0257 *wr_pend_priv = NULL;
0258 wr_id = smc_wr_tx_get_next_wr_id(link);
0259 wr_pend = link->wr_tx_v2_pend;
0260 wr_pend->wr_id = wr_id;
0261 wr_pend->handler = handler;
0262 wr_pend->link = link;
0263 wr_pend->idx = link->wr_tx_cnt;
0264 wr_ib = link->wr_tx_v2_ib;
0265 wr_ib->wr_id = wr_id;
0266 *wr_buf = link->lgr->wr_tx_buf_v2;
0267 *wr_pend_priv = &wr_pend->priv;
0268 return 0;
0269 }
0270
0271 int smc_wr_tx_put_slot(struct smc_link *link,
0272 struct smc_wr_tx_pend_priv *wr_pend_priv)
0273 {
0274 struct smc_wr_tx_pend *pend;
0275
0276 pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
0277 if (pend->idx < link->wr_tx_cnt) {
0278 u32 idx = pend->idx;
0279
0280
0281 memset(&link->wr_tx_pends[idx], 0,
0282 sizeof(link->wr_tx_pends[idx]));
0283 memset(&link->wr_tx_bufs[idx], 0,
0284 sizeof(link->wr_tx_bufs[idx]));
0285 test_and_clear_bit(idx, link->wr_tx_mask);
0286 wake_up(&link->wr_tx_wait);
0287 return 1;
0288 } else if (link->lgr->smc_version == SMC_V2 &&
0289 pend->idx == link->wr_tx_cnt) {
0290
0291 memset(&link->wr_tx_v2_pend, 0,
0292 sizeof(link->wr_tx_v2_pend));
0293 memset(&link->lgr->wr_tx_buf_v2, 0,
0294 sizeof(link->lgr->wr_tx_buf_v2));
0295 return 1;
0296 }
0297
0298 return 0;
0299 }
0300
0301
0302
0303
0304 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
0305 {
0306 struct smc_wr_tx_pend *pend;
0307 int rc;
0308
0309 ib_req_notify_cq(link->smcibdev->roce_cq_send,
0310 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
0311 pend = container_of(priv, struct smc_wr_tx_pend, priv);
0312 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
0313 if (rc) {
0314 smc_wr_tx_put_slot(link, priv);
0315 smcr_link_down_cond_sched(link);
0316 }
0317 return rc;
0318 }
0319
0320 int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
0321 int len)
0322 {
0323 int rc;
0324
0325 link->wr_tx_v2_ib->sg_list[0].length = len;
0326 ib_req_notify_cq(link->smcibdev->roce_cq_send,
0327 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
0328 rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL);
0329 if (rc) {
0330 smc_wr_tx_put_slot(link, priv);
0331 smcr_link_down_cond_sched(link);
0332 }
0333 return rc;
0334 }
0335
0336
0337
0338
0339
0340 int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
0341 unsigned long timeout)
0342 {
0343 struct smc_wr_tx_pend *pend;
0344 u32 pnd_idx;
0345 int rc;
0346
0347 pend = container_of(priv, struct smc_wr_tx_pend, priv);
0348 pend->compl_requested = 1;
0349 pnd_idx = pend->idx;
0350 init_completion(&link->wr_tx_compl[pnd_idx]);
0351
0352 rc = smc_wr_tx_send(link, priv);
0353 if (rc)
0354 return rc;
0355
0356 rc = wait_for_completion_interruptible_timeout(
0357 &link->wr_tx_compl[pnd_idx], timeout);
0358 if (rc <= 0)
0359 rc = -ENODATA;
0360 if (rc > 0)
0361 rc = 0;
0362 return rc;
0363 }
0364
0365
0366 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
0367 {
0368 int rc;
0369
0370 ib_req_notify_cq(link->smcibdev->roce_cq_send,
0371 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
0372 link->wr_reg_state = POSTED;
0373 link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
0374 link->wr_reg.mr = mr;
0375 link->wr_reg.key = mr->rkey;
0376 rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
0377 if (rc)
0378 return rc;
0379
0380 atomic_inc(&link->wr_reg_refcnt);
0381 rc = wait_event_interruptible_timeout(link->wr_reg_wait,
0382 (link->wr_reg_state != POSTED),
0383 SMC_WR_REG_MR_WAIT_TIME);
0384 if (atomic_dec_and_test(&link->wr_reg_refcnt))
0385 wake_up_all(&link->wr_reg_wait);
0386 if (!rc) {
0387
0388 smcr_link_down_cond_sched(link);
0389 return -EPIPE;
0390 }
0391 if (rc == -ERESTARTSYS)
0392 return -EINTR;
0393 switch (link->wr_reg_state) {
0394 case CONFIRMED:
0395 rc = 0;
0396 break;
0397 case FAILED:
0398 rc = -EIO;
0399 break;
0400 case POSTED:
0401 rc = -EPIPE;
0402 break;
0403 }
0404 return rc;
0405 }
0406
0407
0408
0409 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
0410 {
0411 struct smc_wr_rx_handler *h_iter;
0412 int rc = 0;
0413
0414 spin_lock(&smc_wr_rx_hash_lock);
0415 hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
0416 if (h_iter->type == handler->type) {
0417 rc = -EEXIST;
0418 goto out_unlock;
0419 }
0420 }
0421 hash_add(smc_wr_rx_hash, &handler->list, handler->type);
0422 out_unlock:
0423 spin_unlock(&smc_wr_rx_hash_lock);
0424 return rc;
0425 }
0426
0427
0428
0429
0430
0431 static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
0432 {
0433 struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
0434 struct smc_wr_rx_handler *handler;
0435 struct smc_wr_rx_hdr *wr_rx;
0436 u64 temp_wr_id;
0437 u32 index;
0438
0439 if (wc->byte_len < sizeof(*wr_rx))
0440 return;
0441 temp_wr_id = wc->wr_id;
0442 index = do_div(temp_wr_id, link->wr_rx_cnt);
0443 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
0444 hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
0445 if (handler->type == wr_rx->type)
0446 handler->handler(wc, wr_rx);
0447 }
0448 }
0449
0450 static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
0451 {
0452 struct smc_link *link;
0453 int i;
0454
0455 for (i = 0; i < num; i++) {
0456 link = wc[i].qp->qp_context;
0457 link->wr_rx_id_compl = wc[i].wr_id;
0458 if (wc[i].status == IB_WC_SUCCESS) {
0459 link->wr_rx_tstamp = jiffies;
0460 smc_wr_rx_demultiplex(&wc[i]);
0461 smc_wr_rx_post(link);
0462 } else {
0463
0464 switch (wc[i].status) {
0465 case IB_WC_RETRY_EXC_ERR:
0466 case IB_WC_RNR_RETRY_EXC_ERR:
0467 case IB_WC_WR_FLUSH_ERR:
0468 smcr_link_down_cond_sched(link);
0469 if (link->wr_rx_id_compl == link->wr_rx_id)
0470 wake_up(&link->wr_rx_empty_wait);
0471 break;
0472 default:
0473 smc_wr_rx_post(link);
0474 break;
0475 }
0476 }
0477 }
0478 }
0479
0480 static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t)
0481 {
0482 struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet);
0483 struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
0484 int polled = 0;
0485 int rc;
0486
0487 again:
0488 polled++;
0489 do {
0490 memset(&wc, 0, sizeof(wc));
0491 rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
0492 if (polled == 1) {
0493 ib_req_notify_cq(dev->roce_cq_recv,
0494 IB_CQ_SOLICITED_MASK
0495 | IB_CQ_REPORT_MISSED_EVENTS);
0496 }
0497 if (!rc)
0498 break;
0499 smc_wr_rx_process_cqes(&wc[0], rc);
0500 } while (rc > 0);
0501 if (polled == 1)
0502 goto again;
0503 }
0504
0505 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
0506 {
0507 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
0508
0509 tasklet_schedule(&dev->recv_tasklet);
0510 }
0511
0512 int smc_wr_rx_post_init(struct smc_link *link)
0513 {
0514 u32 i;
0515 int rc = 0;
0516
0517 for (i = 0; i < link->wr_rx_cnt; i++)
0518 rc = smc_wr_rx_post(link);
0519 return rc;
0520 }
0521
0522
0523
0524 void smc_wr_remember_qp_attr(struct smc_link *lnk)
0525 {
0526 struct ib_qp_attr *attr = &lnk->qp_attr;
0527 struct ib_qp_init_attr init_attr;
0528
0529 memset(attr, 0, sizeof(*attr));
0530 memset(&init_attr, 0, sizeof(init_attr));
0531 ib_query_qp(lnk->roce_qp, attr,
0532 IB_QP_STATE |
0533 IB_QP_CUR_STATE |
0534 IB_QP_PKEY_INDEX |
0535 IB_QP_PORT |
0536 IB_QP_QKEY |
0537 IB_QP_AV |
0538 IB_QP_PATH_MTU |
0539 IB_QP_TIMEOUT |
0540 IB_QP_RETRY_CNT |
0541 IB_QP_RNR_RETRY |
0542 IB_QP_RQ_PSN |
0543 IB_QP_ALT_PATH |
0544 IB_QP_MIN_RNR_TIMER |
0545 IB_QP_SQ_PSN |
0546 IB_QP_PATH_MIG_STATE |
0547 IB_QP_CAP |
0548 IB_QP_DEST_QPN,
0549 &init_attr);
0550
0551 lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
0552 lnk->qp_attr.cap.max_send_wr);
0553 lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
0554 lnk->qp_attr.cap.max_recv_wr);
0555 }
0556
0557 static void smc_wr_init_sge(struct smc_link *lnk)
0558 {
0559 int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
0560 bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
0561 u32 i;
0562
0563 for (i = 0; i < lnk->wr_tx_cnt; i++) {
0564 lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) :
0565 lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
0566 lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
0567 lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
0568 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
0569 lnk->roce_pd->local_dma_lkey;
0570 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
0571 lnk->roce_pd->local_dma_lkey;
0572 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
0573 lnk->roce_pd->local_dma_lkey;
0574 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
0575 lnk->roce_pd->local_dma_lkey;
0576 lnk->wr_tx_ibs[i].next = NULL;
0577 lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
0578 lnk->wr_tx_ibs[i].num_sge = 1;
0579 lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
0580 lnk->wr_tx_ibs[i].send_flags =
0581 IB_SEND_SIGNALED | IB_SEND_SOLICITED;
0582 if (send_inline)
0583 lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE;
0584 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
0585 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
0586 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
0587 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
0588 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
0589 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
0590 }
0591
0592 if (lnk->lgr->smc_version == SMC_V2) {
0593 lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr;
0594 lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE;
0595 lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey;
0596
0597 lnk->wr_tx_v2_ib->next = NULL;
0598 lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge;
0599 lnk->wr_tx_v2_ib->num_sge = 1;
0600 lnk->wr_tx_v2_ib->opcode = IB_WR_SEND;
0601 lnk->wr_tx_v2_ib->send_flags =
0602 IB_SEND_SIGNALED | IB_SEND_SOLICITED;
0603 }
0604
0605
0606
0607
0608
0609
0610
0611 for (i = 0; i < lnk->wr_rx_cnt; i++) {
0612 int x = i * sges_per_buf;
0613
0614 lnk->wr_rx_sges[x].addr =
0615 lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
0616 lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
0617 lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
0618 if (lnk->lgr->smc_version == SMC_V2) {
0619 lnk->wr_rx_sges[x + 1].addr =
0620 lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
0621 lnk->wr_rx_sges[x + 1].length =
0622 SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE;
0623 lnk->wr_rx_sges[x + 1].lkey =
0624 lnk->roce_pd->local_dma_lkey;
0625 }
0626 lnk->wr_rx_ibs[i].next = NULL;
0627 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
0628 lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
0629 }
0630 lnk->wr_reg.wr.next = NULL;
0631 lnk->wr_reg.wr.num_sge = 0;
0632 lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
0633 lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
0634 lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
0635 }
0636
0637 void smc_wr_free_link(struct smc_link *lnk)
0638 {
0639 struct ib_device *ibdev;
0640
0641 if (!lnk->smcibdev)
0642 return;
0643 ibdev = lnk->smcibdev->ibdev;
0644
0645 smc_wr_drain_cq(lnk);
0646 smc_wr_wakeup_reg_wait(lnk);
0647 smc_wr_wakeup_tx_wait(lnk);
0648
0649 smc_wr_tx_wait_no_pending_sends(lnk);
0650 wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
0651 wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
0652
0653 if (lnk->wr_rx_dma_addr) {
0654 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
0655 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
0656 DMA_FROM_DEVICE);
0657 lnk->wr_rx_dma_addr = 0;
0658 }
0659 if (lnk->wr_rx_v2_dma_addr) {
0660 ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
0661 SMC_WR_BUF_V2_SIZE,
0662 DMA_FROM_DEVICE);
0663 lnk->wr_rx_v2_dma_addr = 0;
0664 }
0665 if (lnk->wr_tx_dma_addr) {
0666 ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
0667 SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
0668 DMA_TO_DEVICE);
0669 lnk->wr_tx_dma_addr = 0;
0670 }
0671 if (lnk->wr_tx_v2_dma_addr) {
0672 ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
0673 SMC_WR_BUF_V2_SIZE,
0674 DMA_TO_DEVICE);
0675 lnk->wr_tx_v2_dma_addr = 0;
0676 }
0677 }
0678
0679 void smc_wr_free_lgr_mem(struct smc_link_group *lgr)
0680 {
0681 if (lgr->smc_version < SMC_V2)
0682 return;
0683
0684 kfree(lgr->wr_rx_buf_v2);
0685 lgr->wr_rx_buf_v2 = NULL;
0686 kfree(lgr->wr_tx_buf_v2);
0687 lgr->wr_tx_buf_v2 = NULL;
0688 }
0689
0690 void smc_wr_free_link_mem(struct smc_link *lnk)
0691 {
0692 kfree(lnk->wr_tx_v2_ib);
0693 lnk->wr_tx_v2_ib = NULL;
0694 kfree(lnk->wr_tx_v2_sge);
0695 lnk->wr_tx_v2_sge = NULL;
0696 kfree(lnk->wr_tx_v2_pend);
0697 lnk->wr_tx_v2_pend = NULL;
0698 kfree(lnk->wr_tx_compl);
0699 lnk->wr_tx_compl = NULL;
0700 kfree(lnk->wr_tx_pends);
0701 lnk->wr_tx_pends = NULL;
0702 bitmap_free(lnk->wr_tx_mask);
0703 lnk->wr_tx_mask = NULL;
0704 kfree(lnk->wr_tx_sges);
0705 lnk->wr_tx_sges = NULL;
0706 kfree(lnk->wr_tx_rdma_sges);
0707 lnk->wr_tx_rdma_sges = NULL;
0708 kfree(lnk->wr_rx_sges);
0709 lnk->wr_rx_sges = NULL;
0710 kfree(lnk->wr_tx_rdmas);
0711 lnk->wr_tx_rdmas = NULL;
0712 kfree(lnk->wr_rx_ibs);
0713 lnk->wr_rx_ibs = NULL;
0714 kfree(lnk->wr_tx_ibs);
0715 lnk->wr_tx_ibs = NULL;
0716 kfree(lnk->wr_tx_bufs);
0717 lnk->wr_tx_bufs = NULL;
0718 kfree(lnk->wr_rx_bufs);
0719 lnk->wr_rx_bufs = NULL;
0720 }
0721
0722 int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
0723 {
0724 if (lgr->smc_version < SMC_V2)
0725 return 0;
0726
0727 lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
0728 if (!lgr->wr_rx_buf_v2)
0729 return -ENOMEM;
0730 lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
0731 if (!lgr->wr_tx_buf_v2) {
0732 kfree(lgr->wr_rx_buf_v2);
0733 return -ENOMEM;
0734 }
0735 return 0;
0736 }
0737
0738 int smc_wr_alloc_link_mem(struct smc_link *link)
0739 {
0740 int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
0741
0742
0743 link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
0744 if (!link->wr_tx_bufs)
0745 goto no_mem;
0746 link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
0747 GFP_KERNEL);
0748 if (!link->wr_rx_bufs)
0749 goto no_mem_wr_tx_bufs;
0750 link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
0751 GFP_KERNEL);
0752 if (!link->wr_tx_ibs)
0753 goto no_mem_wr_rx_bufs;
0754 link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
0755 sizeof(link->wr_rx_ibs[0]),
0756 GFP_KERNEL);
0757 if (!link->wr_rx_ibs)
0758 goto no_mem_wr_tx_ibs;
0759 link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
0760 sizeof(link->wr_tx_rdmas[0]),
0761 GFP_KERNEL);
0762 if (!link->wr_tx_rdmas)
0763 goto no_mem_wr_rx_ibs;
0764 link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
0765 sizeof(link->wr_tx_rdma_sges[0]),
0766 GFP_KERNEL);
0767 if (!link->wr_tx_rdma_sges)
0768 goto no_mem_wr_tx_rdmas;
0769 link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
0770 GFP_KERNEL);
0771 if (!link->wr_tx_sges)
0772 goto no_mem_wr_tx_rdma_sges;
0773 link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
0774 sizeof(link->wr_rx_sges[0]) * sges_per_buf,
0775 GFP_KERNEL);
0776 if (!link->wr_rx_sges)
0777 goto no_mem_wr_tx_sges;
0778 link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL);
0779 if (!link->wr_tx_mask)
0780 goto no_mem_wr_rx_sges;
0781 link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
0782 sizeof(link->wr_tx_pends[0]),
0783 GFP_KERNEL);
0784 if (!link->wr_tx_pends)
0785 goto no_mem_wr_tx_mask;
0786 link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
0787 sizeof(link->wr_tx_compl[0]),
0788 GFP_KERNEL);
0789 if (!link->wr_tx_compl)
0790 goto no_mem_wr_tx_pends;
0791
0792 if (link->lgr->smc_version == SMC_V2) {
0793 link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib),
0794 GFP_KERNEL);
0795 if (!link->wr_tx_v2_ib)
0796 goto no_mem_tx_compl;
0797 link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge),
0798 GFP_KERNEL);
0799 if (!link->wr_tx_v2_sge)
0800 goto no_mem_v2_ib;
0801 link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend),
0802 GFP_KERNEL);
0803 if (!link->wr_tx_v2_pend)
0804 goto no_mem_v2_sge;
0805 }
0806 return 0;
0807
0808 no_mem_v2_sge:
0809 kfree(link->wr_tx_v2_sge);
0810 no_mem_v2_ib:
0811 kfree(link->wr_tx_v2_ib);
0812 no_mem_tx_compl:
0813 kfree(link->wr_tx_compl);
0814 no_mem_wr_tx_pends:
0815 kfree(link->wr_tx_pends);
0816 no_mem_wr_tx_mask:
0817 kfree(link->wr_tx_mask);
0818 no_mem_wr_rx_sges:
0819 kfree(link->wr_rx_sges);
0820 no_mem_wr_tx_sges:
0821 kfree(link->wr_tx_sges);
0822 no_mem_wr_tx_rdma_sges:
0823 kfree(link->wr_tx_rdma_sges);
0824 no_mem_wr_tx_rdmas:
0825 kfree(link->wr_tx_rdmas);
0826 no_mem_wr_rx_ibs:
0827 kfree(link->wr_rx_ibs);
0828 no_mem_wr_tx_ibs:
0829 kfree(link->wr_tx_ibs);
0830 no_mem_wr_rx_bufs:
0831 kfree(link->wr_rx_bufs);
0832 no_mem_wr_tx_bufs:
0833 kfree(link->wr_tx_bufs);
0834 no_mem:
0835 return -ENOMEM;
0836 }
0837
0838 void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
0839 {
0840 tasklet_kill(&smcibdev->recv_tasklet);
0841 tasklet_kill(&smcibdev->send_tasklet);
0842 }
0843
0844 void smc_wr_add_dev(struct smc_ib_device *smcibdev)
0845 {
0846 tasklet_setup(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn);
0847 tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
0848 }
0849
0850 int smc_wr_create_link(struct smc_link *lnk)
0851 {
0852 struct ib_device *ibdev = lnk->smcibdev->ibdev;
0853 int rc = 0;
0854
0855 smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
0856 lnk->wr_rx_id = 0;
0857 lnk->wr_rx_dma_addr = ib_dma_map_single(
0858 ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
0859 DMA_FROM_DEVICE);
0860 if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
0861 lnk->wr_rx_dma_addr = 0;
0862 rc = -EIO;
0863 goto out;
0864 }
0865 if (lnk->lgr->smc_version == SMC_V2) {
0866 lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
0867 lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
0868 DMA_FROM_DEVICE);
0869 if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
0870 lnk->wr_rx_v2_dma_addr = 0;
0871 rc = -EIO;
0872 goto dma_unmap;
0873 }
0874 lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
0875 lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
0876 DMA_TO_DEVICE);
0877 if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) {
0878 lnk->wr_tx_v2_dma_addr = 0;
0879 rc = -EIO;
0880 goto dma_unmap;
0881 }
0882 }
0883 lnk->wr_tx_dma_addr = ib_dma_map_single(
0884 ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
0885 DMA_TO_DEVICE);
0886 if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
0887 rc = -EIO;
0888 goto dma_unmap;
0889 }
0890 smc_wr_init_sge(lnk);
0891 bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
0892 init_waitqueue_head(&lnk->wr_tx_wait);
0893 atomic_set(&lnk->wr_tx_refcnt, 0);
0894 init_waitqueue_head(&lnk->wr_reg_wait);
0895 atomic_set(&lnk->wr_reg_refcnt, 0);
0896 init_waitqueue_head(&lnk->wr_rx_empty_wait);
0897 return rc;
0898
0899 dma_unmap:
0900 if (lnk->wr_rx_v2_dma_addr) {
0901 ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
0902 SMC_WR_BUF_V2_SIZE,
0903 DMA_FROM_DEVICE);
0904 lnk->wr_rx_v2_dma_addr = 0;
0905 }
0906 if (lnk->wr_tx_v2_dma_addr) {
0907 ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
0908 SMC_WR_BUF_V2_SIZE,
0909 DMA_TO_DEVICE);
0910 lnk->wr_tx_v2_dma_addr = 0;
0911 }
0912 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
0913 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
0914 DMA_FROM_DEVICE);
0915 lnk->wr_rx_dma_addr = 0;
0916 out:
0917 return rc;
0918 }