0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <rdma/ib_cm.h>
0034 #include <net/dst.h>
0035 #include <net/icmp.h>
0036 #include <linux/icmpv6.h>
0037 #include <linux/delay.h>
0038 #include <linux/slab.h>
0039 #include <linux/vmalloc.h>
0040 #include <linux/moduleparam.h>
0041 #include <linux/sched/signal.h>
0042 #include <linux/sched/mm.h>
0043
0044 #include "ipoib.h"
0045
0046 int ipoib_max_conn_qp = 128;
0047
0048 module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
0049 MODULE_PARM_DESC(max_nonsrq_conn_qp,
0050 "Max number of connected-mode QPs per interface "
0051 "(applied only if shared receive queue is not available)");
0052
0053 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
0054 static int data_debug_level;
0055
0056 module_param_named(cm_data_debug_level, data_debug_level, int, 0644);
0057 MODULE_PARM_DESC(cm_data_debug_level,
0058 "Enable data path debug tracing for connected mode if > 0");
0059 #endif
0060
0061 #define IPOIB_CM_IETF_ID 0x1000000000000000ULL
0062
0063 #define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
0064 #define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ)
0065 #define IPOIB_CM_RX_DELAY (3 * 256 * HZ)
0066 #define IPOIB_CM_RX_UPDATE_MASK (0x3)
0067
0068 #define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
0069
0070 static struct ib_qp_attr ipoib_cm_err_attr = {
0071 .qp_state = IB_QPS_ERR
0072 };
0073
0074 #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
0075
0076 static struct ib_send_wr ipoib_cm_rx_drain_wr = {
0077 .opcode = IB_WR_SEND,
0078 };
0079
0080 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
0081 const struct ib_cm_event *event);
0082
0083 static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
0084 u64 mapping[IPOIB_CM_RX_SG])
0085 {
0086 int i;
0087
0088 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
0089
0090 for (i = 0; i < frags; ++i)
0091 ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
0092 }
0093
0094 static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
0095 {
0096 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0097 int i, ret;
0098
0099 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
0100
0101 for (i = 0; i < priv->cm.num_frags; ++i)
0102 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
0103
0104 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
0105 if (unlikely(ret)) {
0106 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
0107 ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
0108 priv->cm.srq_ring[id].mapping);
0109 dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
0110 priv->cm.srq_ring[id].skb = NULL;
0111 }
0112
0113 return ret;
0114 }
0115
0116 static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
0117 struct ipoib_cm_rx *rx,
0118 struct ib_recv_wr *wr,
0119 struct ib_sge *sge, int id)
0120 {
0121 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0122 int i, ret;
0123
0124 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
0125
0126 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
0127 sge[i].addr = rx->rx_ring[id].mapping[i];
0128
0129 ret = ib_post_recv(rx->qp, wr, NULL);
0130 if (unlikely(ret)) {
0131 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
0132 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
0133 rx->rx_ring[id].mapping);
0134 dev_kfree_skb_any(rx->rx_ring[id].skb);
0135 rx->rx_ring[id].skb = NULL;
0136 }
0137
0138 return ret;
0139 }
0140
0141 static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
0142 struct ipoib_cm_rx_buf *rx_ring,
0143 int id, int frags,
0144 u64 mapping[IPOIB_CM_RX_SG],
0145 gfp_t gfp)
0146 {
0147 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0148 struct sk_buff *skb;
0149 int i;
0150
0151 skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
0152 if (unlikely(!skb))
0153 return NULL;
0154
0155
0156
0157
0158
0159 skb_reserve(skb, IPOIB_CM_RX_RESERVE);
0160
0161 mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
0162 DMA_FROM_DEVICE);
0163 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
0164 dev_kfree_skb_any(skb);
0165 return NULL;
0166 }
0167
0168 for (i = 0; i < frags; i++) {
0169 struct page *page = alloc_page(gfp);
0170
0171 if (!page)
0172 goto partial_error;
0173 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
0174
0175 mapping[i + 1] = ib_dma_map_page(priv->ca, page,
0176 0, PAGE_SIZE, DMA_FROM_DEVICE);
0177 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
0178 goto partial_error;
0179 }
0180
0181 rx_ring[id].skb = skb;
0182 return skb;
0183
0184 partial_error:
0185
0186 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
0187
0188 for (; i > 0; --i)
0189 ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
0190
0191 dev_kfree_skb_any(skb);
0192 return NULL;
0193 }
0194
0195 static void ipoib_cm_free_rx_ring(struct net_device *dev,
0196 struct ipoib_cm_rx_buf *rx_ring)
0197 {
0198 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0199 int i;
0200
0201 for (i = 0; i < ipoib_recvq_size; ++i)
0202 if (rx_ring[i].skb) {
0203 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
0204 rx_ring[i].mapping);
0205 dev_kfree_skb_any(rx_ring[i].skb);
0206 }
0207
0208 vfree(rx_ring);
0209 }
0210
0211 static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
0212 {
0213 struct ipoib_cm_rx *p;
0214
0215
0216
0217 if (list_empty(&priv->cm.rx_flush_list) ||
0218 !list_empty(&priv->cm.rx_drain_list))
0219 return;
0220
0221
0222
0223
0224
0225 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
0226 ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
0227 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
0228 ipoib_warn(priv, "failed to post drain wr\n");
0229
0230 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
0231 }
0232
0233 static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
0234 {
0235 struct ipoib_cm_rx *p = ctx;
0236 struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
0237 unsigned long flags;
0238
0239 if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
0240 return;
0241
0242 spin_lock_irqsave(&priv->lock, flags);
0243 list_move(&p->list, &priv->cm.rx_flush_list);
0244 p->state = IPOIB_CM_RX_FLUSH;
0245 ipoib_cm_start_rx_drain(priv);
0246 spin_unlock_irqrestore(&priv->lock, flags);
0247 }
0248
0249 static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
0250 struct ipoib_cm_rx *p)
0251 {
0252 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0253 struct ib_qp_init_attr attr = {
0254 .event_handler = ipoib_cm_rx_event_handler,
0255 .send_cq = priv->recv_cq,
0256 .recv_cq = priv->recv_cq,
0257 .srq = priv->cm.srq,
0258 .cap.max_send_wr = 1,
0259 .cap.max_send_sge = 1,
0260 .sq_sig_type = IB_SIGNAL_ALL_WR,
0261 .qp_type = IB_QPT_RC,
0262 .qp_context = p,
0263 };
0264
0265 if (!ipoib_cm_has_srq(dev)) {
0266 attr.cap.max_recv_wr = ipoib_recvq_size;
0267 attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
0268 }
0269
0270 return ib_create_qp(priv->pd, &attr);
0271 }
0272
0273 static int ipoib_cm_modify_rx_qp(struct net_device *dev,
0274 struct ib_cm_id *cm_id, struct ib_qp *qp,
0275 unsigned int psn)
0276 {
0277 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0278 struct ib_qp_attr qp_attr;
0279 int qp_attr_mask, ret;
0280
0281 qp_attr.qp_state = IB_QPS_INIT;
0282 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
0283 if (ret) {
0284 ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
0285 return ret;
0286 }
0287 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
0288 if (ret) {
0289 ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
0290 return ret;
0291 }
0292 qp_attr.qp_state = IB_QPS_RTR;
0293 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
0294 if (ret) {
0295 ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
0296 return ret;
0297 }
0298 qp_attr.rq_psn = psn;
0299 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
0300 if (ret) {
0301 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
0302 return ret;
0303 }
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313 qp_attr.qp_state = IB_QPS_RTS;
0314 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
0315 if (ret) {
0316 ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
0317 return 0;
0318 }
0319 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
0320 if (ret) {
0321 ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
0322 return 0;
0323 }
0324
0325 return 0;
0326 }
0327
0328 static void ipoib_cm_init_rx_wr(struct net_device *dev,
0329 struct ib_recv_wr *wr,
0330 struct ib_sge *sge)
0331 {
0332 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0333 int i;
0334
0335 for (i = 0; i < priv->cm.num_frags; ++i)
0336 sge[i].lkey = priv->pd->local_dma_lkey;
0337
0338 sge[0].length = IPOIB_CM_HEAD_SIZE;
0339 for (i = 1; i < priv->cm.num_frags; ++i)
0340 sge[i].length = PAGE_SIZE;
0341
0342 wr->next = NULL;
0343 wr->sg_list = sge;
0344 wr->num_sge = priv->cm.num_frags;
0345 }
0346
0347 static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
0348 struct ipoib_cm_rx *rx)
0349 {
0350 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0351 struct {
0352 struct ib_recv_wr wr;
0353 struct ib_sge sge[IPOIB_CM_RX_SG];
0354 } *t;
0355 int ret;
0356 int i;
0357
0358 rx->rx_ring = vzalloc(array_size(ipoib_recvq_size,
0359 sizeof(*rx->rx_ring)));
0360 if (!rx->rx_ring)
0361 return -ENOMEM;
0362
0363 t = kmalloc(sizeof(*t), GFP_KERNEL);
0364 if (!t) {
0365 ret = -ENOMEM;
0366 goto err_free_1;
0367 }
0368
0369 ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
0370
0371 spin_lock_irq(&priv->lock);
0372
0373 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
0374 spin_unlock_irq(&priv->lock);
0375 ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
0376 ret = -EINVAL;
0377 goto err_free;
0378 } else
0379 ++priv->cm.nonsrq_conn_qp;
0380
0381 spin_unlock_irq(&priv->lock);
0382
0383 for (i = 0; i < ipoib_recvq_size; ++i) {
0384 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
0385 rx->rx_ring[i].mapping,
0386 GFP_KERNEL)) {
0387 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
0388 ret = -ENOMEM;
0389 goto err_count;
0390 }
0391 ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
0392 if (ret) {
0393 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
0394 "failed for buf %d\n", i);
0395 ret = -EIO;
0396 goto err_count;
0397 }
0398 }
0399
0400 rx->recv_count = ipoib_recvq_size;
0401
0402 kfree(t);
0403
0404 return 0;
0405
0406 err_count:
0407 spin_lock_irq(&priv->lock);
0408 --priv->cm.nonsrq_conn_qp;
0409 spin_unlock_irq(&priv->lock);
0410
0411 err_free:
0412 kfree(t);
0413
0414 err_free_1:
0415 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
0416
0417 return ret;
0418 }
0419
0420 static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
0421 struct ib_qp *qp,
0422 const struct ib_cm_req_event_param *req,
0423 unsigned int psn)
0424 {
0425 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0426 struct ipoib_cm_data data = {};
0427 struct ib_cm_rep_param rep = {};
0428
0429 data.qpn = cpu_to_be32(priv->qp->qp_num);
0430 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
0431
0432 rep.private_data = &data;
0433 rep.private_data_len = sizeof(data);
0434 rep.flow_control = 0;
0435 rep.rnr_retry_count = req->rnr_retry_count;
0436 rep.srq = ipoib_cm_has_srq(dev);
0437 rep.qp_num = qp->qp_num;
0438 rep.starting_psn = psn;
0439 return ib_send_cm_rep(cm_id, &rep);
0440 }
0441
0442 static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
0443 const struct ib_cm_event *event)
0444 {
0445 struct net_device *dev = cm_id->context;
0446 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0447 struct ipoib_cm_rx *p;
0448 unsigned int psn;
0449 int ret;
0450
0451 ipoib_dbg(priv, "REQ arrived\n");
0452 p = kzalloc(sizeof(*p), GFP_KERNEL);
0453 if (!p)
0454 return -ENOMEM;
0455 p->dev = dev;
0456 p->id = cm_id;
0457 cm_id->context = p;
0458 p->state = IPOIB_CM_RX_LIVE;
0459 p->jiffies = jiffies;
0460 INIT_LIST_HEAD(&p->list);
0461
0462 p->qp = ipoib_cm_create_rx_qp(dev, p);
0463 if (IS_ERR(p->qp)) {
0464 ret = PTR_ERR(p->qp);
0465 goto err_qp;
0466 }
0467
0468 psn = prandom_u32() & 0xffffff;
0469 ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
0470 if (ret)
0471 goto err_modify;
0472
0473 if (!ipoib_cm_has_srq(dev)) {
0474 ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
0475 if (ret)
0476 goto err_modify;
0477 }
0478
0479 spin_lock_irq(&priv->lock);
0480 queue_delayed_work(priv->wq,
0481 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
0482
0483
0484 p->jiffies = jiffies;
0485 if (p->state == IPOIB_CM_RX_LIVE)
0486 list_move(&p->list, &priv->cm.passive_ids);
0487 spin_unlock_irq(&priv->lock);
0488
0489 ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
0490 if (ret) {
0491 ipoib_warn(priv, "failed to send REP: %d\n", ret);
0492 if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
0493 ipoib_warn(priv, "unable to move qp to error state\n");
0494 }
0495 return 0;
0496
0497 err_modify:
0498 ib_destroy_qp(p->qp);
0499 err_qp:
0500 kfree(p);
0501 return ret;
0502 }
0503
0504 static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
0505 const struct ib_cm_event *event)
0506 {
0507 struct ipoib_cm_rx *p;
0508 struct ipoib_dev_priv *priv;
0509
0510 switch (event->event) {
0511 case IB_CM_REQ_RECEIVED:
0512 return ipoib_cm_req_handler(cm_id, event);
0513 case IB_CM_DREQ_RECEIVED:
0514 ib_send_cm_drep(cm_id, NULL, 0);
0515 fallthrough;
0516 case IB_CM_REJ_RECEIVED:
0517 p = cm_id->context;
0518 priv = ipoib_priv(p->dev);
0519 if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
0520 ipoib_warn(priv, "unable to move qp to error state\n");
0521 fallthrough;
0522 default:
0523 return 0;
0524 }
0525 }
0526
0527 static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
0528 unsigned int length, struct sk_buff *toskb)
0529 {
0530 int i, num_frags;
0531 unsigned int size;
0532
0533
0534 size = min(length, hdr_space);
0535 skb->tail += size;
0536 skb->len += size;
0537 length -= size;
0538
0539 num_frags = skb_shinfo(skb)->nr_frags;
0540 for (i = 0; i < num_frags; i++) {
0541 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
0542
0543 if (length == 0) {
0544
0545 skb_fill_page_desc(toskb, i, skb_frag_page(frag),
0546 0, PAGE_SIZE);
0547 --skb_shinfo(skb)->nr_frags;
0548 } else {
0549 size = min_t(unsigned int, length, PAGE_SIZE);
0550
0551 skb_frag_size_set(frag, size);
0552 skb->data_len += size;
0553 skb->truesize += size;
0554 skb->len += size;
0555 length -= size;
0556 }
0557 }
0558 }
0559
0560 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
0561 {
0562 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0563 struct ipoib_cm_rx_buf *rx_ring;
0564 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
0565 struct sk_buff *skb, *newskb;
0566 struct ipoib_cm_rx *p;
0567 unsigned long flags;
0568 u64 mapping[IPOIB_CM_RX_SG];
0569 int frags;
0570 int has_srq;
0571 struct sk_buff *small_skb;
0572
0573 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
0574 wr_id, wc->status);
0575
0576 if (unlikely(wr_id >= ipoib_recvq_size)) {
0577 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
0578 spin_lock_irqsave(&priv->lock, flags);
0579 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
0580 ipoib_cm_start_rx_drain(priv);
0581 queue_work(priv->wq, &priv->cm.rx_reap_task);
0582 spin_unlock_irqrestore(&priv->lock, flags);
0583 } else
0584 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
0585 wr_id, ipoib_recvq_size);
0586 return;
0587 }
0588
0589 p = wc->qp->qp_context;
0590
0591 has_srq = ipoib_cm_has_srq(dev);
0592 rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;
0593
0594 skb = rx_ring[wr_id].skb;
0595
0596 if (unlikely(wc->status != IB_WC_SUCCESS)) {
0597 ipoib_dbg(priv,
0598 "cm recv error (status=%d, wrid=%d vend_err %#x)\n",
0599 wc->status, wr_id, wc->vendor_err);
0600 ++dev->stats.rx_dropped;
0601 if (has_srq)
0602 goto repost;
0603 else {
0604 if (!--p->recv_count) {
0605 spin_lock_irqsave(&priv->lock, flags);
0606 list_move(&p->list, &priv->cm.rx_reap_list);
0607 spin_unlock_irqrestore(&priv->lock, flags);
0608 queue_work(priv->wq, &priv->cm.rx_reap_task);
0609 }
0610 return;
0611 }
0612 }
0613
0614 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
0615 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
0616 spin_lock_irqsave(&priv->lock, flags);
0617 p->jiffies = jiffies;
0618
0619
0620 if (p->state == IPOIB_CM_RX_LIVE)
0621 list_move(&p->list, &priv->cm.passive_ids);
0622 spin_unlock_irqrestore(&priv->lock, flags);
0623 }
0624 }
0625
0626 if (wc->byte_len < IPOIB_CM_COPYBREAK) {
0627 int dlen = wc->byte_len;
0628
0629 small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
0630 if (small_skb) {
0631 skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
0632 ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
0633 dlen, DMA_FROM_DEVICE);
0634 skb_copy_from_linear_data(skb, small_skb->data, dlen);
0635 ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
0636 dlen, DMA_FROM_DEVICE);
0637 skb_put(small_skb, dlen);
0638 skb = small_skb;
0639 goto copied;
0640 }
0641 }
0642
0643 frags = PAGE_ALIGN(wc->byte_len -
0644 min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
0645 PAGE_SIZE;
0646
0647 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
0648 mapping, GFP_ATOMIC);
0649 if (unlikely(!newskb)) {
0650
0651
0652
0653
0654 ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
0655 ++dev->stats.rx_dropped;
0656 goto repost;
0657 }
0658
0659 ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
0660 memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
0661
0662 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
0663 wc->byte_len, wc->slid);
0664
0665 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
0666
0667 copied:
0668 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
0669 skb_add_pseudo_hdr(skb);
0670
0671 ++dev->stats.rx_packets;
0672 dev->stats.rx_bytes += skb->len;
0673
0674 skb->dev = dev;
0675
0676 skb->pkt_type = PACKET_HOST;
0677 netif_receive_skb(skb);
0678
0679 repost:
0680 if (has_srq) {
0681 if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
0682 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
0683 "for buf %d\n", wr_id);
0684 } else {
0685 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
0686 &priv->cm.rx_wr,
0687 priv->cm.rx_sge,
0688 wr_id))) {
0689 --p->recv_count;
0690 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
0691 "for buf %d\n", wr_id);
0692 }
0693 }
0694 }
0695
0696 static inline int post_send(struct ipoib_dev_priv *priv,
0697 struct ipoib_cm_tx *tx,
0698 unsigned int wr_id,
0699 struct ipoib_tx_buf *tx_req)
0700 {
0701 ipoib_build_sge(priv, tx_req);
0702
0703 priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM;
0704
0705 return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
0706 }
0707
0708 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
0709 {
0710 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0711 struct ipoib_tx_buf *tx_req;
0712 int rc;
0713 unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
0714
0715 if (unlikely(skb->len > tx->mtu)) {
0716 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
0717 skb->len, tx->mtu);
0718 ++dev->stats.tx_dropped;
0719 ++dev->stats.tx_errors;
0720 ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
0721 return;
0722 }
0723 if (skb_shinfo(skb)->nr_frags > usable_sge) {
0724 if (skb_linearize(skb) < 0) {
0725 ipoib_warn(priv, "skb could not be linearized\n");
0726 ++dev->stats.tx_dropped;
0727 ++dev->stats.tx_errors;
0728 dev_kfree_skb_any(skb);
0729 return;
0730 }
0731
0732 if (skb_shinfo(skb)->nr_frags > usable_sge) {
0733 ipoib_warn(priv, "too many frags after skb linearize\n");
0734 ++dev->stats.tx_dropped;
0735 ++dev->stats.tx_errors;
0736 dev_kfree_skb_any(skb);
0737 return;
0738 }
0739 }
0740 ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
0741 tx->tx_head, skb->len, tx->qp->qp_num);
0742
0743
0744
0745
0746
0747
0748
0749
0750 tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
0751 tx_req->skb = skb;
0752
0753 if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
0754 ++dev->stats.tx_errors;
0755 dev_kfree_skb_any(skb);
0756 return;
0757 }
0758
0759 if ((priv->global_tx_head - priv->global_tx_tail) ==
0760 ipoib_sendq_size - 1) {
0761 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
0762 tx->qp->qp_num);
0763 netif_stop_queue(dev);
0764 }
0765
0766 skb_orphan(skb);
0767 skb_dst_drop(skb);
0768
0769 if (netif_queue_stopped(dev)) {
0770 rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
0771 IB_CQ_REPORT_MISSED_EVENTS);
0772 if (unlikely(rc < 0))
0773 ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
0774 else if (rc)
0775 napi_schedule(&priv->send_napi);
0776 }
0777
0778 rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
0779 if (unlikely(rc)) {
0780 ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc);
0781 ++dev->stats.tx_errors;
0782 ipoib_dma_unmap_tx(priv, tx_req);
0783 dev_kfree_skb_any(skb);
0784
0785 if (netif_queue_stopped(dev))
0786 netif_wake_queue(dev);
0787 } else {
0788 netif_trans_update(dev);
0789 ++tx->tx_head;
0790 ++priv->global_tx_head;
0791 }
0792 }
0793
0794 void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
0795 {
0796 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0797 struct ipoib_cm_tx *tx = wc->qp->qp_context;
0798 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
0799 struct ipoib_tx_buf *tx_req;
0800 unsigned long flags;
0801
0802 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
0803 wr_id, wc->status);
0804
0805 if (unlikely(wr_id >= ipoib_sendq_size)) {
0806 ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
0807 wr_id, ipoib_sendq_size);
0808 return;
0809 }
0810
0811 tx_req = &tx->tx_ring[wr_id];
0812
0813 ipoib_dma_unmap_tx(priv, tx_req);
0814
0815
0816 ++dev->stats.tx_packets;
0817 dev->stats.tx_bytes += tx_req->skb->len;
0818
0819 dev_kfree_skb_any(tx_req->skb);
0820
0821 netif_tx_lock(dev);
0822
0823 ++tx->tx_tail;
0824 ++priv->global_tx_tail;
0825
0826 if (unlikely(netif_queue_stopped(dev) &&
0827 ((priv->global_tx_head - priv->global_tx_tail) <=
0828 ipoib_sendq_size >> 1) &&
0829 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
0830 netif_wake_queue(dev);
0831
0832 if (wc->status != IB_WC_SUCCESS &&
0833 wc->status != IB_WC_WR_FLUSH_ERR) {
0834 struct ipoib_neigh *neigh;
0835
0836
0837
0838
0839 if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
0840 wc->status == IB_WC_RETRY_EXC_ERR)
0841 ipoib_dbg(priv,
0842 "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
0843 __func__, wc->status, wr_id, wc->vendor_err);
0844 else
0845 ipoib_warn(priv,
0846 "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
0847 __func__, wc->status, wr_id, wc->vendor_err);
0848
0849 spin_lock_irqsave(&priv->lock, flags);
0850 neigh = tx->neigh;
0851
0852 if (neigh) {
0853 neigh->cm = NULL;
0854 ipoib_neigh_free(neigh);
0855
0856 tx->neigh = NULL;
0857 }
0858
0859 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
0860 list_move(&tx->list, &priv->cm.reap_list);
0861 queue_work(priv->wq, &priv->cm.reap_task);
0862 }
0863
0864 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
0865
0866 spin_unlock_irqrestore(&priv->lock, flags);
0867 }
0868
0869 netif_tx_unlock(dev);
0870 }
0871
0872 int ipoib_cm_dev_open(struct net_device *dev)
0873 {
0874 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0875 int ret;
0876
0877 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
0878 return 0;
0879
0880 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
0881 if (IS_ERR(priv->cm.id)) {
0882 pr_warn("%s: failed to create CM ID\n", priv->ca->name);
0883 ret = PTR_ERR(priv->cm.id);
0884 goto err_cm;
0885 }
0886
0887 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
0888 0);
0889 if (ret) {
0890 pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
0891 IPOIB_CM_IETF_ID | priv->qp->qp_num);
0892 goto err_listen;
0893 }
0894
0895 return 0;
0896
0897 err_listen:
0898 ib_destroy_cm_id(priv->cm.id);
0899 err_cm:
0900 priv->cm.id = NULL;
0901 return ret;
0902 }
0903
0904 static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
0905 {
0906 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0907 struct ipoib_cm_rx *rx, *n;
0908 LIST_HEAD(list);
0909
0910 spin_lock_irq(&priv->lock);
0911 list_splice_init(&priv->cm.rx_reap_list, &list);
0912 spin_unlock_irq(&priv->lock);
0913
0914 list_for_each_entry_safe(rx, n, &list, list) {
0915 ib_destroy_cm_id(rx->id);
0916 ib_destroy_qp(rx->qp);
0917 if (!ipoib_cm_has_srq(dev)) {
0918 ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
0919 spin_lock_irq(&priv->lock);
0920 --priv->cm.nonsrq_conn_qp;
0921 spin_unlock_irq(&priv->lock);
0922 }
0923 kfree(rx);
0924 }
0925 }
0926
0927 void ipoib_cm_dev_stop(struct net_device *dev)
0928 {
0929 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0930 struct ipoib_cm_rx *p;
0931 unsigned long begin;
0932 int ret;
0933
0934 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
0935 return;
0936
0937 ib_destroy_cm_id(priv->cm.id);
0938 priv->cm.id = NULL;
0939
0940 spin_lock_irq(&priv->lock);
0941 while (!list_empty(&priv->cm.passive_ids)) {
0942 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
0943 list_move(&p->list, &priv->cm.rx_error_list);
0944 p->state = IPOIB_CM_RX_ERROR;
0945 spin_unlock_irq(&priv->lock);
0946 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
0947 if (ret)
0948 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
0949 spin_lock_irq(&priv->lock);
0950 }
0951
0952
0953 begin = jiffies;
0954
0955 while (!list_empty(&priv->cm.rx_error_list) ||
0956 !list_empty(&priv->cm.rx_flush_list) ||
0957 !list_empty(&priv->cm.rx_drain_list)) {
0958 if (time_after(jiffies, begin + 5 * HZ)) {
0959 ipoib_warn(priv, "RX drain timing out\n");
0960
0961
0962
0963
0964 list_splice_init(&priv->cm.rx_flush_list,
0965 &priv->cm.rx_reap_list);
0966 list_splice_init(&priv->cm.rx_error_list,
0967 &priv->cm.rx_reap_list);
0968 list_splice_init(&priv->cm.rx_drain_list,
0969 &priv->cm.rx_reap_list);
0970 break;
0971 }
0972 spin_unlock_irq(&priv->lock);
0973 usleep_range(1000, 2000);
0974 ipoib_drain_cq(dev);
0975 spin_lock_irq(&priv->lock);
0976 }
0977
0978 spin_unlock_irq(&priv->lock);
0979
0980 ipoib_cm_free_rx_reap_list(dev);
0981
0982 cancel_delayed_work(&priv->cm.stale_task);
0983 }
0984
0985 static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
0986 const struct ib_cm_event *event)
0987 {
0988 struct ipoib_cm_tx *p = cm_id->context;
0989 struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
0990 struct ipoib_cm_data *data = event->private_data;
0991 struct sk_buff_head skqueue;
0992 struct ib_qp_attr qp_attr;
0993 int qp_attr_mask, ret;
0994 struct sk_buff *skb;
0995
0996 p->mtu = be32_to_cpu(data->mtu);
0997
0998 if (p->mtu <= IPOIB_ENCAP_LEN) {
0999 ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
1000 p->mtu, IPOIB_ENCAP_LEN);
1001 return -EINVAL;
1002 }
1003
1004 qp_attr.qp_state = IB_QPS_RTR;
1005 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
1006 if (ret) {
1007 ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
1008 return ret;
1009 }
1010
1011 qp_attr.rq_psn = 0 ;
1012 ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
1013 if (ret) {
1014 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
1015 return ret;
1016 }
1017
1018 qp_attr.qp_state = IB_QPS_RTS;
1019 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
1020 if (ret) {
1021 ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
1022 return ret;
1023 }
1024 ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
1025 if (ret) {
1026 ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
1027 return ret;
1028 }
1029
1030 skb_queue_head_init(&skqueue);
1031
1032 netif_tx_lock_bh(p->dev);
1033 spin_lock_irq(&priv->lock);
1034 set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
1035 if (p->neigh)
1036 while ((skb = __skb_dequeue(&p->neigh->queue)))
1037 __skb_queue_tail(&skqueue, skb);
1038 spin_unlock_irq(&priv->lock);
1039 netif_tx_unlock_bh(p->dev);
1040
1041 while ((skb = __skb_dequeue(&skqueue))) {
1042 skb->dev = p->dev;
1043 ret = dev_queue_xmit(skb);
1044 if (ret)
1045 ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
1046 __func__, ret);
1047 }
1048
1049 ret = ib_send_cm_rtu(cm_id, NULL, 0);
1050 if (ret) {
1051 ipoib_warn(priv, "failed to send RTU: %d\n", ret);
1052 return ret;
1053 }
1054 return 0;
1055 }
1056
1057 static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
1058 {
1059 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1060 struct ib_qp_init_attr attr = {
1061 .send_cq = priv->send_cq,
1062 .recv_cq = priv->recv_cq,
1063 .srq = priv->cm.srq,
1064 .cap.max_send_wr = ipoib_sendq_size,
1065 .cap.max_send_sge = 1,
1066 .sq_sig_type = IB_SIGNAL_ALL_WR,
1067 .qp_type = IB_QPT_RC,
1068 .qp_context = tx,
1069 .create_flags = 0
1070 };
1071 struct ib_qp *tx_qp;
1072
1073 if (dev->features & NETIF_F_SG)
1074 attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
1075 MAX_SKB_FRAGS + 1);
1076
1077 tx_qp = ib_create_qp(priv->pd, &attr);
1078 tx->max_send_sge = attr.cap.max_send_sge;
1079 return tx_qp;
1080 }
1081
1082 static int ipoib_cm_send_req(struct net_device *dev,
1083 struct ib_cm_id *id, struct ib_qp *qp,
1084 u32 qpn,
1085 struct sa_path_rec *pathrec)
1086 {
1087 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1088 struct ipoib_cm_data data = {};
1089 struct ib_cm_req_param req = {};
1090
1091 data.qpn = cpu_to_be32(priv->qp->qp_num);
1092 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
1093
1094 req.primary_path = pathrec;
1095 req.alternate_path = NULL;
1096 req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
1097 req.qp_num = qp->qp_num;
1098 req.qp_type = qp->qp_type;
1099 req.private_data = &data;
1100 req.private_data_len = sizeof(data);
1101 req.flow_control = 0;
1102
1103 req.starting_psn = 0;
1104
1105
1106
1107
1108
1109 req.responder_resources = 4;
1110 req.remote_cm_response_timeout = 20;
1111 req.local_cm_response_timeout = 20;
1112 req.retry_count = 0;
1113 req.rnr_retry_count = 0;
1114 req.max_cm_retries = 15;
1115 req.srq = ipoib_cm_has_srq(dev);
1116 return ib_send_cm_req(id, &req);
1117 }
1118
1119 static int ipoib_cm_modify_tx_init(struct net_device *dev,
1120 struct ib_cm_id *cm_id, struct ib_qp *qp)
1121 {
1122 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1123 struct ib_qp_attr qp_attr;
1124 int qp_attr_mask, ret;
1125
1126 qp_attr.pkey_index = priv->pkey_index;
1127 qp_attr.qp_state = IB_QPS_INIT;
1128 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
1129 qp_attr.port_num = priv->port;
1130 qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
1131
1132 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
1133 if (ret) {
1134 ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
1135 return ret;
1136 }
1137 return 0;
1138 }
1139
1140 static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
1141 struct sa_path_rec *pathrec)
1142 {
1143 struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
1144 unsigned int noio_flag;
1145 int ret;
1146
1147 noio_flag = memalloc_noio_save();
1148 p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring)));
1149 if (!p->tx_ring) {
1150 memalloc_noio_restore(noio_flag);
1151 ret = -ENOMEM;
1152 goto err_tx;
1153 }
1154
1155 p->qp = ipoib_cm_create_tx_qp(p->dev, p);
1156 memalloc_noio_restore(noio_flag);
1157 if (IS_ERR(p->qp)) {
1158 ret = PTR_ERR(p->qp);
1159 ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
1160 goto err_qp;
1161 }
1162
1163 p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
1164 if (IS_ERR(p->id)) {
1165 ret = PTR_ERR(p->id);
1166 ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
1167 goto err_id;
1168 }
1169
1170 ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
1171 if (ret) {
1172 ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
1173 goto err_modify_send;
1174 }
1175
1176 ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
1177 if (ret) {
1178 ipoib_warn(priv, "failed to send cm req: %d\n", ret);
1179 goto err_modify_send;
1180 }
1181
1182 ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
1183 p->qp->qp_num, pathrec->dgid.raw, qpn);
1184
1185 return 0;
1186
1187 err_modify_send:
1188 ib_destroy_cm_id(p->id);
1189 err_id:
1190 p->id = NULL;
1191 ib_destroy_qp(p->qp);
1192 err_qp:
1193 p->qp = NULL;
1194 vfree(p->tx_ring);
1195 err_tx:
1196 return ret;
1197 }
1198
1199 static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
1200 {
1201 struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
1202 struct ipoib_tx_buf *tx_req;
1203 unsigned long begin;
1204
1205 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
1206 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
1207
1208 if (p->id)
1209 ib_destroy_cm_id(p->id);
1210
1211 if (p->tx_ring) {
1212
1213 begin = jiffies;
1214 while ((int) p->tx_tail - (int) p->tx_head < 0) {
1215 if (time_after(jiffies, begin + 5 * HZ)) {
1216 ipoib_warn(priv, "timing out; %d sends not completed\n",
1217 p->tx_head - p->tx_tail);
1218 goto timeout;
1219 }
1220
1221 usleep_range(1000, 2000);
1222 }
1223 }
1224
1225 timeout:
1226
1227 while ((int) p->tx_tail - (int) p->tx_head < 0) {
1228 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
1229 ipoib_dma_unmap_tx(priv, tx_req);
1230 dev_kfree_skb_any(tx_req->skb);
1231 netif_tx_lock_bh(p->dev);
1232 ++p->tx_tail;
1233 ++priv->global_tx_tail;
1234 if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
1235 ipoib_sendq_size >> 1) &&
1236 netif_queue_stopped(p->dev) &&
1237 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
1238 netif_wake_queue(p->dev);
1239 netif_tx_unlock_bh(p->dev);
1240 }
1241
1242 if (p->qp)
1243 ib_destroy_qp(p->qp);
1244
1245 vfree(p->tx_ring);
1246 kfree(p);
1247 }
1248
1249 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1250 const struct ib_cm_event *event)
1251 {
1252 struct ipoib_cm_tx *tx = cm_id->context;
1253 struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
1254 struct net_device *dev = priv->dev;
1255 struct ipoib_neigh *neigh;
1256 unsigned long flags;
1257 int ret;
1258
1259 switch (event->event) {
1260 case IB_CM_DREQ_RECEIVED:
1261 ipoib_dbg(priv, "DREQ received.\n");
1262 ib_send_cm_drep(cm_id, NULL, 0);
1263 break;
1264 case IB_CM_REP_RECEIVED:
1265 ipoib_dbg(priv, "REP received.\n");
1266 ret = ipoib_cm_rep_handler(cm_id, event);
1267 if (ret)
1268 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1269 NULL, 0, NULL, 0);
1270 break;
1271 case IB_CM_REQ_ERROR:
1272 case IB_CM_REJ_RECEIVED:
1273 case IB_CM_TIMEWAIT_EXIT:
1274 ipoib_dbg(priv, "CM error %d.\n", event->event);
1275 netif_tx_lock_bh(dev);
1276 spin_lock_irqsave(&priv->lock, flags);
1277 neigh = tx->neigh;
1278
1279 if (neigh) {
1280 neigh->cm = NULL;
1281 ipoib_neigh_free(neigh);
1282
1283 tx->neigh = NULL;
1284 }
1285
1286 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1287 list_move(&tx->list, &priv->cm.reap_list);
1288 queue_work(priv->wq, &priv->cm.reap_task);
1289 }
1290
1291 spin_unlock_irqrestore(&priv->lock, flags);
1292 netif_tx_unlock_bh(dev);
1293 break;
1294 default:
1295 break;
1296 }
1297
1298 return 0;
1299 }
1300
1301 struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
1302 struct ipoib_neigh *neigh)
1303 {
1304 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1305 struct ipoib_cm_tx *tx;
1306
1307 tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
1308 if (!tx)
1309 return NULL;
1310
1311 neigh->cm = tx;
1312 tx->neigh = neigh;
1313 tx->dev = dev;
1314 list_add(&tx->list, &priv->cm.start_list);
1315 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
1316 queue_work(priv->wq, &priv->cm.start_task);
1317 return tx;
1318 }
1319
1320 void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1321 {
1322 struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
1323 unsigned long flags;
1324 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1325 spin_lock_irqsave(&priv->lock, flags);
1326 list_move(&tx->list, &priv->cm.reap_list);
1327 queue_work(priv->wq, &priv->cm.reap_task);
1328 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1329 tx->neigh->daddr + 4);
1330 tx->neigh = NULL;
1331 spin_unlock_irqrestore(&priv->lock, flags);
1332 }
1333 }
1334
1335 #define QPN_AND_OPTIONS_OFFSET 4
1336
1337 static void ipoib_cm_tx_start(struct work_struct *work)
1338 {
1339 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1340 cm.start_task);
1341 struct net_device *dev = priv->dev;
1342 struct ipoib_neigh *neigh;
1343 struct ipoib_cm_tx *p;
1344 unsigned long flags;
1345 struct ipoib_path *path;
1346 int ret;
1347
1348 struct sa_path_rec pathrec;
1349 u32 qpn;
1350
1351 netif_tx_lock_bh(dev);
1352 spin_lock_irqsave(&priv->lock, flags);
1353
1354 while (!list_empty(&priv->cm.start_list)) {
1355 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
1356 list_del_init(&p->list);
1357 neigh = p->neigh;
1358
1359 qpn = IPOIB_QPN(neigh->daddr);
1360
1361
1362
1363
1364 path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
1365 if (!path) {
1366 pr_info("%s ignore not valid path %pI6\n",
1367 __func__,
1368 neigh->daddr + QPN_AND_OPTIONS_OFFSET);
1369 goto free_neigh;
1370 }
1371 memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
1372
1373 spin_unlock_irqrestore(&priv->lock, flags);
1374 netif_tx_unlock_bh(dev);
1375
1376 ret = ipoib_cm_tx_init(p, qpn, &pathrec);
1377
1378 netif_tx_lock_bh(dev);
1379 spin_lock_irqsave(&priv->lock, flags);
1380
1381 if (ret) {
1382 free_neigh:
1383 neigh = p->neigh;
1384 if (neigh) {
1385 neigh->cm = NULL;
1386 ipoib_neigh_free(neigh);
1387 }
1388 list_del(&p->list);
1389 kfree(p);
1390 }
1391 }
1392
1393 spin_unlock_irqrestore(&priv->lock, flags);
1394 netif_tx_unlock_bh(dev);
1395 }
1396
1397 static void ipoib_cm_tx_reap(struct work_struct *work)
1398 {
1399 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1400 cm.reap_task);
1401 struct net_device *dev = priv->dev;
1402 struct ipoib_cm_tx *p;
1403 unsigned long flags;
1404
1405 netif_tx_lock_bh(dev);
1406 spin_lock_irqsave(&priv->lock, flags);
1407
1408 while (!list_empty(&priv->cm.reap_list)) {
1409 p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
1410 list_del_init(&p->list);
1411 spin_unlock_irqrestore(&priv->lock, flags);
1412 netif_tx_unlock_bh(dev);
1413 ipoib_cm_tx_destroy(p);
1414 netif_tx_lock_bh(dev);
1415 spin_lock_irqsave(&priv->lock, flags);
1416 }
1417
1418 spin_unlock_irqrestore(&priv->lock, flags);
1419 netif_tx_unlock_bh(dev);
1420 }
1421
1422 static void ipoib_cm_skb_reap(struct work_struct *work)
1423 {
1424 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1425 cm.skb_task);
1426 struct net_device *dev = priv->dev;
1427 struct sk_buff *skb;
1428 unsigned long flags;
1429 unsigned int mtu = priv->mcast_mtu;
1430
1431 netif_tx_lock_bh(dev);
1432 spin_lock_irqsave(&priv->lock, flags);
1433
1434 while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
1435 spin_unlock_irqrestore(&priv->lock, flags);
1436 netif_tx_unlock_bh(dev);
1437
1438 if (skb->protocol == htons(ETH_P_IP)) {
1439 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
1440 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1441 }
1442 #if IS_ENABLED(CONFIG_IPV6)
1443 else if (skb->protocol == htons(ETH_P_IPV6)) {
1444 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
1445 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1446 }
1447 #endif
1448 dev_kfree_skb_any(skb);
1449
1450 netif_tx_lock_bh(dev);
1451 spin_lock_irqsave(&priv->lock, flags);
1452 }
1453
1454 spin_unlock_irqrestore(&priv->lock, flags);
1455 netif_tx_unlock_bh(dev);
1456 }
1457
1458 void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1459 unsigned int mtu)
1460 {
1461 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1462 int e = skb_queue_empty(&priv->cm.skb_queue);
1463
1464 skb_dst_update_pmtu(skb, mtu);
1465
1466 skb_queue_tail(&priv->cm.skb_queue, skb);
1467 if (e)
1468 queue_work(priv->wq, &priv->cm.skb_task);
1469 }
1470
1471 static void ipoib_cm_rx_reap(struct work_struct *work)
1472 {
1473 ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv,
1474 cm.rx_reap_task)->dev);
1475 }
1476
1477 static void ipoib_cm_stale_task(struct work_struct *work)
1478 {
1479 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1480 cm.stale_task.work);
1481 struct ipoib_cm_rx *p;
1482 int ret;
1483
1484 spin_lock_irq(&priv->lock);
1485 while (!list_empty(&priv->cm.passive_ids)) {
1486
1487
1488 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1489 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1490 break;
1491 list_move(&p->list, &priv->cm.rx_error_list);
1492 p->state = IPOIB_CM_RX_ERROR;
1493 spin_unlock_irq(&priv->lock);
1494 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
1495 if (ret)
1496 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
1497 spin_lock_irq(&priv->lock);
1498 }
1499
1500 if (!list_empty(&priv->cm.passive_ids))
1501 queue_delayed_work(priv->wq,
1502 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1503 spin_unlock_irq(&priv->lock);
1504 }
1505
1506 static ssize_t mode_show(struct device *d, struct device_attribute *attr,
1507 char *buf)
1508 {
1509 struct net_device *dev = to_net_dev(d);
1510 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1511
1512 if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
1513 return sysfs_emit(buf, "connected\n");
1514 else
1515 return sysfs_emit(buf, "datagram\n");
1516 }
1517
1518 static ssize_t mode_store(struct device *d, struct device_attribute *attr,
1519 const char *buf, size_t count)
1520 {
1521 struct net_device *dev = to_net_dev(d);
1522 int ret;
1523
1524 if (!rtnl_trylock()) {
1525 return restart_syscall();
1526 }
1527
1528 if (dev->reg_state != NETREG_REGISTERED) {
1529 rtnl_unlock();
1530 return -EPERM;
1531 }
1532
1533 ret = ipoib_set_mode(dev, buf);
1534
1535
1536
1537
1538
1539 if (ret != -EBUSY)
1540 rtnl_unlock();
1541
1542 return (!ret || ret == -EBUSY) ? count : ret;
1543 }
1544
1545 static DEVICE_ATTR_RW(mode);
1546
1547 int ipoib_cm_add_mode_attr(struct net_device *dev)
1548 {
1549 return device_create_file(&dev->dev, &dev_attr_mode);
1550 }
1551
1552 static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1553 {
1554 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1555 struct ib_srq_init_attr srq_init_attr = {
1556 .srq_type = IB_SRQT_BASIC,
1557 .attr = {
1558 .max_wr = ipoib_recvq_size,
1559 .max_sge = max_sge
1560 }
1561 };
1562
1563 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1564 if (IS_ERR(priv->cm.srq)) {
1565 if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
1566 pr_warn("%s: failed to allocate SRQ, error %ld\n",
1567 priv->ca->name, PTR_ERR(priv->cm.srq));
1568 priv->cm.srq = NULL;
1569 return;
1570 }
1571
1572 priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size,
1573 sizeof(*priv->cm.srq_ring)));
1574 if (!priv->cm.srq_ring) {
1575 ib_destroy_srq(priv->cm.srq);
1576 priv->cm.srq = NULL;
1577 return;
1578 }
1579
1580 }
1581
1582 int ipoib_cm_dev_init(struct net_device *dev)
1583 {
1584 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1585 int max_srq_sge, i;
1586 u8 addr;
1587
1588 INIT_LIST_HEAD(&priv->cm.passive_ids);
1589 INIT_LIST_HEAD(&priv->cm.reap_list);
1590 INIT_LIST_HEAD(&priv->cm.start_list);
1591 INIT_LIST_HEAD(&priv->cm.rx_error_list);
1592 INIT_LIST_HEAD(&priv->cm.rx_flush_list);
1593 INIT_LIST_HEAD(&priv->cm.rx_drain_list);
1594 INIT_LIST_HEAD(&priv->cm.rx_reap_list);
1595 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
1596 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
1597 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
1598 INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
1599 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
1600
1601 skb_queue_head_init(&priv->cm.skb_queue);
1602
1603 ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
1604
1605 max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
1606 ipoib_cm_create_srq(dev, max_srq_sge);
1607 if (ipoib_cm_has_srq(dev)) {
1608 priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
1609 priv->cm.num_frags = max_srq_sge;
1610 ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
1611 priv->cm.max_cm_mtu, priv->cm.num_frags);
1612 } else {
1613 priv->cm.max_cm_mtu = IPOIB_CM_MTU;
1614 priv->cm.num_frags = IPOIB_CM_RX_SG;
1615 }
1616
1617 ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
1618
1619 if (ipoib_cm_has_srq(dev)) {
1620 for (i = 0; i < ipoib_recvq_size; ++i) {
1621 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1622 priv->cm.num_frags - 1,
1623 priv->cm.srq_ring[i].mapping,
1624 GFP_KERNEL)) {
1625 ipoib_warn(priv, "failed to allocate "
1626 "receive buffer %d\n", i);
1627 ipoib_cm_dev_cleanup(dev);
1628 return -ENOMEM;
1629 }
1630
1631 if (ipoib_cm_post_receive_srq(dev, i)) {
1632 ipoib_warn(priv, "ipoib_cm_post_receive_srq "
1633 "failed for buf %d\n", i);
1634 ipoib_cm_dev_cleanup(dev);
1635 return -EIO;
1636 }
1637 }
1638 }
1639
1640 addr = IPOIB_FLAGS_RC;
1641 dev_addr_mod(dev, 0, &addr, 1);
1642 return 0;
1643 }
1644
1645 void ipoib_cm_dev_cleanup(struct net_device *dev)
1646 {
1647 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1648
1649 if (!priv->cm.srq)
1650 return;
1651
1652 ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
1653
1654 ib_destroy_srq(priv->cm.srq);
1655 priv->cm.srq = NULL;
1656 if (!priv->cm.srq_ring)
1657 return;
1658
1659 ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
1660 priv->cm.srq_ring = NULL;
1661 }