0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036 #include <linux/delay.h>
0037 #include <linux/moduleparam.h>
0038 #include <linux/dma-mapping.h>
0039 #include <linux/slab.h>
0040
0041 #include <linux/ip.h>
0042 #include <linux/tcp.h>
0043 #include <rdma/ib_cache.h>
0044
0045 #include "ipoib.h"
0046
0047 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
0048 static int data_debug_level;
0049
0050 module_param(data_debug_level, int, 0644);
0051 MODULE_PARM_DESC(data_debug_level,
0052 "Enable data path debug tracing if > 0");
0053 #endif
0054
0055 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
0056 struct ib_pd *pd, struct rdma_ah_attr *attr)
0057 {
0058 struct ipoib_ah *ah;
0059 struct ib_ah *vah;
0060
0061 ah = kmalloc(sizeof(*ah), GFP_KERNEL);
0062 if (!ah)
0063 return ERR_PTR(-ENOMEM);
0064
0065 ah->dev = dev;
0066 ah->last_send = 0;
0067 kref_init(&ah->ref);
0068
0069 vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
0070 if (IS_ERR(vah)) {
0071 kfree(ah);
0072 ah = (struct ipoib_ah *)vah;
0073 } else {
0074 ah->ah = vah;
0075 ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
0076 }
0077
0078 return ah;
0079 }
0080
0081 void ipoib_free_ah(struct kref *kref)
0082 {
0083 struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
0084 struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
0085
0086 unsigned long flags;
0087
0088 spin_lock_irqsave(&priv->lock, flags);
0089 list_add_tail(&ah->list, &priv->dead_ahs);
0090 spin_unlock_irqrestore(&priv->lock, flags);
0091 }
0092
0093 static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
0094 u64 mapping[IPOIB_UD_RX_SG])
0095 {
0096 ib_dma_unmap_single(priv->ca, mapping[0],
0097 IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
0098 DMA_FROM_DEVICE);
0099 }
0100
0101 static int ipoib_ib_post_receive(struct net_device *dev, int id)
0102 {
0103 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0104 int ret;
0105
0106 priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
0107 priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
0108 priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
0109
0110
0111 ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
0112 if (unlikely(ret)) {
0113 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
0114 ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
0115 dev_kfree_skb_any(priv->rx_ring[id].skb);
0116 priv->rx_ring[id].skb = NULL;
0117 }
0118
0119 return ret;
0120 }
0121
0122 static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
0123 {
0124 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0125 struct sk_buff *skb;
0126 int buf_size;
0127 u64 *mapping;
0128
0129 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
0130
0131 skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
0132 if (unlikely(!skb))
0133 return NULL;
0134
0135
0136
0137
0138
0139 skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
0140
0141 mapping = priv->rx_ring[id].mapping;
0142 mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
0143 DMA_FROM_DEVICE);
0144 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
0145 goto error;
0146
0147 priv->rx_ring[id].skb = skb;
0148 return skb;
0149 error:
0150 dev_kfree_skb_any(skb);
0151 return NULL;
0152 }
0153
0154 static int ipoib_ib_post_receives(struct net_device *dev)
0155 {
0156 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0157 int i;
0158
0159 for (i = 0; i < ipoib_recvq_size; ++i) {
0160 if (!ipoib_alloc_rx_skb(dev, i)) {
0161 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
0162 return -ENOMEM;
0163 }
0164 if (ipoib_ib_post_receive(dev, i)) {
0165 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
0166 return -EIO;
0167 }
0168 }
0169
0170 return 0;
0171 }
0172
0173 static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
0174 {
0175 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0176 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
0177 struct sk_buff *skb;
0178 u64 mapping[IPOIB_UD_RX_SG];
0179 union ib_gid *dgid;
0180 union ib_gid *sgid;
0181
0182 ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
0183 wr_id, wc->status);
0184
0185 if (unlikely(wr_id >= ipoib_recvq_size)) {
0186 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
0187 wr_id, ipoib_recvq_size);
0188 return;
0189 }
0190
0191 skb = priv->rx_ring[wr_id].skb;
0192
0193 if (unlikely(wc->status != IB_WC_SUCCESS)) {
0194 if (wc->status != IB_WC_WR_FLUSH_ERR)
0195 ipoib_warn(priv,
0196 "failed recv event (status=%d, wrid=%d vend_err %#x)\n",
0197 wc->status, wr_id, wc->vendor_err);
0198 ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
0199 dev_kfree_skb_any(skb);
0200 priv->rx_ring[wr_id].skb = NULL;
0201 return;
0202 }
0203
0204 memcpy(mapping, priv->rx_ring[wr_id].mapping,
0205 IPOIB_UD_RX_SG * sizeof(*mapping));
0206
0207
0208
0209
0210
0211 if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) {
0212 ++dev->stats.rx_dropped;
0213 goto repost;
0214 }
0215
0216 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
0217 wc->byte_len, wc->slid);
0218
0219 ipoib_ud_dma_unmap_rx(priv, mapping);
0220
0221 skb_put(skb, wc->byte_len);
0222
0223
0224 dgid = &((struct ib_grh *)skb->data)->dgid;
0225
0226 if (!(wc->wc_flags & IB_WC_GRH) || dgid->raw[0] != 0xff)
0227 skb->pkt_type = PACKET_HOST;
0228 else if (memcmp(dgid, dev->broadcast + 4, sizeof(union ib_gid)) == 0)
0229 skb->pkt_type = PACKET_BROADCAST;
0230 else
0231 skb->pkt_type = PACKET_MULTICAST;
0232
0233 sgid = &((struct ib_grh *)skb->data)->sgid;
0234
0235
0236
0237
0238
0239 if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
0240 int need_repost = 1;
0241
0242 if ((wc->wc_flags & IB_WC_GRH) &&
0243 sgid->global.interface_id != priv->local_gid.global.interface_id)
0244 need_repost = 0;
0245
0246 if (need_repost) {
0247 dev_kfree_skb_any(skb);
0248 goto repost;
0249 }
0250 }
0251
0252 skb_pull(skb, IB_GRH_BYTES);
0253
0254 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
0255 skb_add_pseudo_hdr(skb);
0256
0257 ++dev->stats.rx_packets;
0258 dev->stats.rx_bytes += skb->len;
0259 if (skb->pkt_type == PACKET_MULTICAST)
0260 dev->stats.multicast++;
0261
0262 skb->dev = dev;
0263 if ((dev->features & NETIF_F_RXCSUM) &&
0264 likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
0265 skb->ip_summed = CHECKSUM_UNNECESSARY;
0266
0267 napi_gro_receive(&priv->recv_napi, skb);
0268
0269 repost:
0270 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
0271 ipoib_warn(priv, "ipoib_ib_post_receive failed "
0272 "for buf %d\n", wr_id);
0273 }
0274
0275 int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
0276 {
0277 struct sk_buff *skb = tx_req->skb;
0278 u64 *mapping = tx_req->mapping;
0279 int i;
0280 int off;
0281
0282 if (skb_headlen(skb)) {
0283 mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
0284 DMA_TO_DEVICE);
0285 if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
0286 return -EIO;
0287
0288 off = 1;
0289 } else
0290 off = 0;
0291
0292 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
0293 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
0294 mapping[i + off] = ib_dma_map_page(ca,
0295 skb_frag_page(frag),
0296 skb_frag_off(frag),
0297 skb_frag_size(frag),
0298 DMA_TO_DEVICE);
0299 if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
0300 goto partial_error;
0301 }
0302 return 0;
0303
0304 partial_error:
0305 for (; i > 0; --i) {
0306 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
0307
0308 ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE);
0309 }
0310
0311 if (off)
0312 ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
0313
0314 return -EIO;
0315 }
0316
0317 void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
0318 struct ipoib_tx_buf *tx_req)
0319 {
0320 struct sk_buff *skb = tx_req->skb;
0321 u64 *mapping = tx_req->mapping;
0322 int i;
0323 int off;
0324
0325 if (skb_headlen(skb)) {
0326 ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb),
0327 DMA_TO_DEVICE);
0328 off = 1;
0329 } else
0330 off = 0;
0331
0332 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
0333 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
0334
0335 ib_dma_unmap_page(priv->ca, mapping[i + off],
0336 skb_frag_size(frag), DMA_TO_DEVICE);
0337 }
0338 }
0339
0340
0341
0342
0343
0344
0345 static void ipoib_qp_state_validate_work(struct work_struct *work)
0346 {
0347 struct ipoib_qp_state_validate *qp_work =
0348 container_of(work, struct ipoib_qp_state_validate, work);
0349
0350 struct ipoib_dev_priv *priv = qp_work->priv;
0351 struct ib_qp_attr qp_attr;
0352 struct ib_qp_init_attr query_init_attr;
0353 int ret;
0354
0355 ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
0356 if (ret) {
0357 ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
0358 __func__, ret);
0359 goto free_res;
0360 }
0361 pr_info("%s: QP: 0x%x is in state: %d\n",
0362 __func__, priv->qp->qp_num, qp_attr.qp_state);
0363
0364
0365 if (qp_attr.qp_state == IB_QPS_SQE) {
0366 qp_attr.qp_state = IB_QPS_RTS;
0367
0368 ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
0369 if (ret) {
0370 pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
0371 ret, priv->qp->qp_num);
0372 goto free_res;
0373 }
0374 pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
0375 __func__, priv->qp->qp_num);
0376 } else {
0377 pr_warn("QP (%d) will stay in state: %d\n",
0378 priv->qp->qp_num, qp_attr.qp_state);
0379 }
0380
0381 free_res:
0382 kfree(qp_work);
0383 }
0384
0385 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
0386 {
0387 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0388 unsigned int wr_id = wc->wr_id;
0389 struct ipoib_tx_buf *tx_req;
0390
0391 ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
0392 wr_id, wc->status);
0393
0394 if (unlikely(wr_id >= ipoib_sendq_size)) {
0395 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
0396 wr_id, ipoib_sendq_size);
0397 return;
0398 }
0399
0400 tx_req = &priv->tx_ring[wr_id];
0401
0402 ipoib_dma_unmap_tx(priv, tx_req);
0403
0404 ++dev->stats.tx_packets;
0405 dev->stats.tx_bytes += tx_req->skb->len;
0406
0407 dev_kfree_skb_any(tx_req->skb);
0408
0409 ++priv->tx_tail;
0410 ++priv->global_tx_tail;
0411
0412 if (unlikely(netif_queue_stopped(dev) &&
0413 ((priv->global_tx_head - priv->global_tx_tail) <=
0414 ipoib_sendq_size >> 1) &&
0415 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
0416 netif_wake_queue(dev);
0417
0418 if (wc->status != IB_WC_SUCCESS &&
0419 wc->status != IB_WC_WR_FLUSH_ERR) {
0420 struct ipoib_qp_state_validate *qp_work;
0421 ipoib_warn(priv,
0422 "failed send event (status=%d, wrid=%d vend_err %#x)\n",
0423 wc->status, wr_id, wc->vendor_err);
0424 qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
0425 if (!qp_work)
0426 return;
0427
0428 INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
0429 qp_work->priv = priv;
0430 queue_work(priv->wq, &qp_work->work);
0431 }
0432 }
0433
0434 static int poll_tx(struct ipoib_dev_priv *priv)
0435 {
0436 int n, i;
0437 struct ib_wc *wc;
0438
0439 n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
0440 for (i = 0; i < n; ++i) {
0441 wc = priv->send_wc + i;
0442 if (wc->wr_id & IPOIB_OP_CM)
0443 ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i);
0444 else
0445 ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
0446 }
0447 return n == MAX_SEND_CQE;
0448 }
0449
0450 int ipoib_rx_poll(struct napi_struct *napi, int budget)
0451 {
0452 struct ipoib_dev_priv *priv =
0453 container_of(napi, struct ipoib_dev_priv, recv_napi);
0454 struct net_device *dev = priv->dev;
0455 int done;
0456 int t;
0457 int n, i;
0458
0459 done = 0;
0460
0461 poll_more:
0462 while (done < budget) {
0463 int max = (budget - done);
0464
0465 t = min(IPOIB_NUM_WC, max);
0466 n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
0467
0468 for (i = 0; i < n; i++) {
0469 struct ib_wc *wc = priv->ibwc + i;
0470
0471 if (wc->wr_id & IPOIB_OP_RECV) {
0472 ++done;
0473 if (wc->wr_id & IPOIB_OP_CM)
0474 ipoib_cm_handle_rx_wc(dev, wc);
0475 else
0476 ipoib_ib_handle_rx_wc(dev, wc);
0477 } else {
0478 pr_warn("%s: Got unexpected wqe id\n", __func__);
0479 }
0480 }
0481
0482 if (n != t)
0483 break;
0484 }
0485
0486 if (done < budget) {
0487 napi_complete(napi);
0488 if (unlikely(ib_req_notify_cq(priv->recv_cq,
0489 IB_CQ_NEXT_COMP |
0490 IB_CQ_REPORT_MISSED_EVENTS)) &&
0491 napi_reschedule(napi))
0492 goto poll_more;
0493 }
0494
0495 return done;
0496 }
0497
0498 int ipoib_tx_poll(struct napi_struct *napi, int budget)
0499 {
0500 struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv,
0501 send_napi);
0502 struct net_device *dev = priv->dev;
0503 int n, i;
0504 struct ib_wc *wc;
0505
0506 poll_more:
0507 n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
0508
0509 for (i = 0; i < n; i++) {
0510 wc = priv->send_wc + i;
0511 if (wc->wr_id & IPOIB_OP_CM)
0512 ipoib_cm_handle_tx_wc(dev, wc);
0513 else
0514 ipoib_ib_handle_tx_wc(dev, wc);
0515 }
0516
0517 if (n < budget) {
0518 napi_complete(napi);
0519 if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
0520 IB_CQ_REPORT_MISSED_EVENTS)) &&
0521 napi_reschedule(napi))
0522 goto poll_more;
0523 }
0524 return n < 0 ? 0 : n;
0525 }
0526
0527 void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr)
0528 {
0529 struct ipoib_dev_priv *priv = ctx_ptr;
0530
0531 napi_schedule(&priv->recv_napi);
0532 }
0533
0534 void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr)
0535 {
0536 struct ipoib_dev_priv *priv = ctx_ptr;
0537
0538 napi_schedule(&priv->send_napi);
0539 }
0540
0541 static inline int post_send(struct ipoib_dev_priv *priv,
0542 unsigned int wr_id,
0543 struct ib_ah *address, u32 dqpn,
0544 struct ipoib_tx_buf *tx_req,
0545 void *head, int hlen)
0546 {
0547 struct sk_buff *skb = tx_req->skb;
0548
0549 ipoib_build_sge(priv, tx_req);
0550
0551 priv->tx_wr.wr.wr_id = wr_id;
0552 priv->tx_wr.remote_qpn = dqpn;
0553 priv->tx_wr.ah = address;
0554
0555 if (head) {
0556 priv->tx_wr.mss = skb_shinfo(skb)->gso_size;
0557 priv->tx_wr.header = head;
0558 priv->tx_wr.hlen = hlen;
0559 priv->tx_wr.wr.opcode = IB_WR_LSO;
0560 } else
0561 priv->tx_wr.wr.opcode = IB_WR_SEND;
0562
0563 return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
0564 }
0565
0566 int ipoib_send(struct net_device *dev, struct sk_buff *skb,
0567 struct ib_ah *address, u32 dqpn)
0568 {
0569 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0570 struct ipoib_tx_buf *tx_req;
0571 int hlen, rc;
0572 void *phead;
0573 unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
0574
0575 if (skb_is_gso(skb)) {
0576 hlen = skb_tcp_all_headers(skb);
0577 phead = skb->data;
0578 if (unlikely(!skb_pull(skb, hlen))) {
0579 ipoib_warn(priv, "linear data too small\n");
0580 ++dev->stats.tx_dropped;
0581 ++dev->stats.tx_errors;
0582 dev_kfree_skb_any(skb);
0583 return -1;
0584 }
0585 } else {
0586 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
0587 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
0588 skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
0589 ++dev->stats.tx_dropped;
0590 ++dev->stats.tx_errors;
0591 ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
0592 return -1;
0593 }
0594 phead = NULL;
0595 hlen = 0;
0596 }
0597 if (skb_shinfo(skb)->nr_frags > usable_sge) {
0598 if (skb_linearize(skb) < 0) {
0599 ipoib_warn(priv, "skb could not be linearized\n");
0600 ++dev->stats.tx_dropped;
0601 ++dev->stats.tx_errors;
0602 dev_kfree_skb_any(skb);
0603 return -1;
0604 }
0605
0606 if (skb_shinfo(skb)->nr_frags > usable_sge) {
0607 ipoib_warn(priv, "too many frags after skb linearize\n");
0608 ++dev->stats.tx_dropped;
0609 ++dev->stats.tx_errors;
0610 dev_kfree_skb_any(skb);
0611 return -1;
0612 }
0613 }
0614
0615 ipoib_dbg_data(priv,
0616 "sending packet, length=%d address=%p dqpn=0x%06x\n",
0617 skb->len, address, dqpn);
0618
0619
0620
0621
0622
0623
0624
0625
0626 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
0627 tx_req->skb = skb;
0628 if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
0629 ++dev->stats.tx_errors;
0630 dev_kfree_skb_any(skb);
0631 return -1;
0632 }
0633
0634 if (skb->ip_summed == CHECKSUM_PARTIAL)
0635 priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
0636 else
0637 priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
0638
0639 if ((priv->global_tx_head - priv->global_tx_tail) ==
0640 ipoib_sendq_size - 1) {
0641 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
0642 netif_stop_queue(dev);
0643 }
0644
0645 skb_orphan(skb);
0646 skb_dst_drop(skb);
0647
0648 if (netif_queue_stopped(dev))
0649 if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
0650 IB_CQ_REPORT_MISSED_EVENTS) < 0)
0651 ipoib_warn(priv, "request notify on send CQ failed\n");
0652
0653 rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
0654 address, dqpn, tx_req, phead, hlen);
0655 if (unlikely(rc)) {
0656 ipoib_warn(priv, "post_send failed, error %d\n", rc);
0657 ++dev->stats.tx_errors;
0658 ipoib_dma_unmap_tx(priv, tx_req);
0659 dev_kfree_skb_any(skb);
0660 if (netif_queue_stopped(dev))
0661 netif_wake_queue(dev);
0662 rc = 0;
0663 } else {
0664 netif_trans_update(dev);
0665
0666 rc = priv->tx_head;
0667 ++priv->tx_head;
0668 ++priv->global_tx_head;
0669 }
0670 return rc;
0671 }
0672
0673 static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv)
0674 {
0675 struct ipoib_ah *ah, *tah;
0676 unsigned long flags;
0677
0678 netif_tx_lock_bh(priv->dev);
0679 spin_lock_irqsave(&priv->lock, flags);
0680
0681 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
0682 if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
0683 list_del(&ah->list);
0684 rdma_destroy_ah(ah->ah, 0);
0685 kfree(ah);
0686 }
0687
0688 spin_unlock_irqrestore(&priv->lock, flags);
0689 netif_tx_unlock_bh(priv->dev);
0690 }
0691
0692 void ipoib_reap_ah(struct work_struct *work)
0693 {
0694 struct ipoib_dev_priv *priv =
0695 container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
0696
0697 ipoib_reap_dead_ahs(priv);
0698
0699 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
0700 queue_delayed_work(priv->wq, &priv->ah_reap_task,
0701 round_jiffies_relative(HZ));
0702 }
0703
0704 static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv)
0705 {
0706 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
0707 queue_delayed_work(priv->wq, &priv->ah_reap_task,
0708 round_jiffies_relative(HZ));
0709 }
0710
0711 static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv)
0712 {
0713 set_bit(IPOIB_STOP_REAPER, &priv->flags);
0714 cancel_delayed_work(&priv->ah_reap_task);
0715
0716
0717
0718
0719
0720 }
0721
0722 static int recvs_pending(struct net_device *dev)
0723 {
0724 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0725 int pending = 0;
0726 int i;
0727
0728 for (i = 0; i < ipoib_recvq_size; ++i)
0729 if (priv->rx_ring[i].skb)
0730 ++pending;
0731
0732 return pending;
0733 }
0734
0735 static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
0736 struct ib_qp *qp,
0737 enum ib_qp_state new_state)
0738 {
0739 struct ib_qp_attr qp_attr;
0740 struct ib_qp_init_attr query_init_attr;
0741 int ret;
0742
0743 ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
0744 if (ret) {
0745 ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
0746 return;
0747 }
0748
0749 if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
0750 ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
0751 else
0752 ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
0753 new_state, qp_attr.qp_state);
0754 }
0755
0756 static void ipoib_napi_enable(struct net_device *dev)
0757 {
0758 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0759
0760 napi_enable(&priv->recv_napi);
0761 napi_enable(&priv->send_napi);
0762 }
0763
0764 static void ipoib_napi_disable(struct net_device *dev)
0765 {
0766 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0767
0768 napi_disable(&priv->recv_napi);
0769 napi_disable(&priv->send_napi);
0770 }
0771
0772 int ipoib_ib_dev_stop_default(struct net_device *dev)
0773 {
0774 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0775 struct ib_qp_attr qp_attr;
0776 unsigned long begin;
0777 struct ipoib_tx_buf *tx_req;
0778 int i;
0779
0780 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
0781 ipoib_napi_disable(dev);
0782
0783 ipoib_cm_dev_stop(dev);
0784
0785
0786
0787
0788
0789 qp_attr.qp_state = IB_QPS_ERR;
0790 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
0791 check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
0792
0793
0794 begin = jiffies;
0795
0796 while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
0797 if (time_after(jiffies, begin + 5 * HZ)) {
0798 ipoib_warn(priv,
0799 "timing out; %d sends %d receives not completed\n",
0800 priv->tx_head - priv->tx_tail,
0801 recvs_pending(dev));
0802
0803
0804
0805
0806
0807 while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
0808 tx_req = &priv->tx_ring[priv->tx_tail &
0809 (ipoib_sendq_size - 1)];
0810 ipoib_dma_unmap_tx(priv, tx_req);
0811 dev_kfree_skb_any(tx_req->skb);
0812 ++priv->tx_tail;
0813 ++priv->global_tx_tail;
0814 }
0815
0816 for (i = 0; i < ipoib_recvq_size; ++i) {
0817 struct ipoib_rx_buf *rx_req;
0818
0819 rx_req = &priv->rx_ring[i];
0820 if (!rx_req->skb)
0821 continue;
0822 ipoib_ud_dma_unmap_rx(priv,
0823 priv->rx_ring[i].mapping);
0824 dev_kfree_skb_any(rx_req->skb);
0825 rx_req->skb = NULL;
0826 }
0827
0828 goto timeout;
0829 }
0830
0831 ipoib_drain_cq(dev);
0832
0833 usleep_range(1000, 2000);
0834 }
0835
0836 ipoib_dbg(priv, "All sends and receives done.\n");
0837
0838 timeout:
0839 qp_attr.qp_state = IB_QPS_RESET;
0840 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
0841 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
0842
0843 ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
0844
0845 return 0;
0846 }
0847
0848 int ipoib_ib_dev_open_default(struct net_device *dev)
0849 {
0850 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0851 int ret;
0852
0853 ret = ipoib_init_qp(dev);
0854 if (ret) {
0855 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
0856 return -1;
0857 }
0858
0859 ret = ipoib_ib_post_receives(dev);
0860 if (ret) {
0861 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
0862 goto out;
0863 }
0864
0865 ret = ipoib_cm_dev_open(dev);
0866 if (ret) {
0867 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
0868 goto out;
0869 }
0870
0871 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
0872 ipoib_napi_enable(dev);
0873
0874 return 0;
0875 out:
0876 return -1;
0877 }
0878
0879 int ipoib_ib_dev_open(struct net_device *dev)
0880 {
0881 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0882
0883 ipoib_pkey_dev_check_presence(dev);
0884
0885 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
0886 ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
0887 (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
0888 return -1;
0889 }
0890
0891 ipoib_start_ah_reaper(priv);
0892 if (priv->rn_ops->ndo_open(dev)) {
0893 pr_warn("%s: Failed to open dev\n", dev->name);
0894 goto dev_stop;
0895 }
0896
0897 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
0898
0899 return 0;
0900
0901 dev_stop:
0902 ipoib_stop_ah_reaper(priv);
0903 return -1;
0904 }
0905
0906 void ipoib_ib_dev_stop(struct net_device *dev)
0907 {
0908 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0909
0910 priv->rn_ops->ndo_stop(dev);
0911
0912 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
0913 ipoib_stop_ah_reaper(priv);
0914 }
0915
0916 void ipoib_pkey_dev_check_presence(struct net_device *dev)
0917 {
0918 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0919 struct rdma_netdev *rn = netdev_priv(dev);
0920
0921 if (!(priv->pkey & 0x7fff) ||
0922 ib_find_pkey(priv->ca, priv->port, priv->pkey,
0923 &priv->pkey_index)) {
0924 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
0925 } else {
0926 if (rn->set_id)
0927 rn->set_id(dev, priv->pkey_index);
0928 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
0929 }
0930 }
0931
0932 void ipoib_ib_dev_up(struct net_device *dev)
0933 {
0934 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0935
0936 ipoib_pkey_dev_check_presence(dev);
0937
0938 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
0939 ipoib_dbg(priv, "PKEY is not assigned.\n");
0940 return;
0941 }
0942
0943 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
0944
0945 ipoib_mcast_start_thread(dev);
0946 }
0947
0948 void ipoib_ib_dev_down(struct net_device *dev)
0949 {
0950 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0951
0952 ipoib_dbg(priv, "downing ib_dev\n");
0953
0954 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
0955 netif_carrier_off(dev);
0956
0957 ipoib_mcast_stop_thread(dev);
0958 ipoib_mcast_dev_flush(dev);
0959
0960 ipoib_flush_paths(dev);
0961 }
0962
0963 void ipoib_drain_cq(struct net_device *dev)
0964 {
0965 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0966 int i, n;
0967
0968
0969
0970
0971
0972
0973 local_bh_disable();
0974
0975 do {
0976 n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
0977 for (i = 0; i < n; ++i) {
0978
0979
0980
0981
0982
0983 if (priv->ibwc[i].status == IB_WC_SUCCESS)
0984 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
0985
0986 if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
0987 if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
0988 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
0989 else
0990 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
0991 } else {
0992 pr_warn("%s: Got unexpected wqe id\n", __func__);
0993 }
0994 }
0995 } while (n == IPOIB_NUM_WC);
0996
0997 while (poll_tx(priv))
0998 ;
0999
1000 local_bh_enable();
1001 }
1002
1003
1004
1005
1006
1007 static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
1008 {
1009 int result;
1010 u16 prev_pkey;
1011
1012 prev_pkey = priv->pkey;
1013 result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
1014 if (result) {
1015 ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
1016 priv->port, result);
1017 return result;
1018 }
1019
1020 priv->pkey |= 0x8000;
1021
1022 if (prev_pkey != priv->pkey) {
1023 ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
1024 prev_pkey, priv->pkey);
1025
1026
1027
1028
1029 priv->dev->broadcast[8] = priv->pkey >> 8;
1030 priv->dev->broadcast[9] = priv->pkey & 0xff;
1031 return 0;
1032 }
1033
1034 return 1;
1035 }
1036
1037
1038
1039 static inline int update_child_pkey(struct ipoib_dev_priv *priv)
1040 {
1041 u16 old_index = priv->pkey_index;
1042
1043 priv->pkey_index = 0;
1044 ipoib_pkey_dev_check_presence(priv->dev);
1045
1046 if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
1047 (old_index == priv->pkey_index))
1048 return 1;
1049 return 0;
1050 }
1051
1052
1053
1054
1055
1056 static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1057 {
1058 union ib_gid search_gid;
1059 union ib_gid gid0;
1060 int err;
1061 u16 index;
1062 u32 port;
1063 bool ret = false;
1064
1065 if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
1066 return false;
1067
1068 netif_addr_lock_bh(priv->dev);
1069
1070
1071
1072
1073 priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1074 dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix,
1075 sizeof(gid0.global.subnet_prefix));
1076 search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1077
1078 search_gid.global.interface_id = priv->local_gid.global.interface_id;
1079
1080 netif_addr_unlock_bh(priv->dev);
1081
1082 err = ib_find_gid(priv->ca, &search_gid, &port, &index);
1083
1084 netif_addr_lock_bh(priv->dev);
1085
1086 if (search_gid.global.interface_id !=
1087 priv->local_gid.global.interface_id)
1088
1089
1090
1091 goto out;
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
1119 if (!err && port == priv->port) {
1120 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1121 if (index == 0)
1122 clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
1123 &priv->flags);
1124 else
1125 set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
1126 ret = true;
1127 } else {
1128 ret = false;
1129 }
1130 } else {
1131 if (!err && port == priv->port) {
1132 ret = true;
1133 } else {
1134 if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
1135 memcpy(&priv->local_gid, &gid0,
1136 sizeof(priv->local_gid));
1137 dev_addr_mod(priv->dev, 4, (u8 *)&gid0,
1138 sizeof(priv->local_gid));
1139 ret = true;
1140 }
1141 }
1142 }
1143
1144 out:
1145 netif_addr_unlock_bh(priv->dev);
1146
1147 return ret;
1148 }
1149
1150 static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1151 enum ipoib_flush_level level,
1152 int nesting)
1153 {
1154 struct ipoib_dev_priv *cpriv;
1155 struct net_device *dev = priv->dev;
1156 int result;
1157
1158 down_read_nested(&priv->vlan_rwsem, nesting);
1159
1160
1161
1162
1163
1164 list_for_each_entry(cpriv, &priv->child_intfs, list)
1165 __ipoib_ib_dev_flush(cpriv, level, nesting + 1);
1166
1167 up_read(&priv->vlan_rwsem);
1168
1169 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
1170 level != IPOIB_FLUSH_HEAVY) {
1171
1172 if (level == IPOIB_FLUSH_LIGHT)
1173 ipoib_dev_addr_changed_valid(priv);
1174 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
1175 return;
1176 }
1177
1178 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1179
1180 if (level == IPOIB_FLUSH_HEAVY) {
1181 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
1182 update_parent_pkey(priv);
1183 else
1184 update_child_pkey(priv);
1185 } else if (level == IPOIB_FLUSH_LIGHT)
1186 ipoib_dev_addr_changed_valid(priv);
1187 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
1188 return;
1189 }
1190
1191 if (level == IPOIB_FLUSH_HEAVY) {
1192
1193
1194
1195 if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
1196 result = update_child_pkey(priv);
1197 if (result) {
1198
1199 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
1200 return;
1201 }
1202
1203 } else {
1204 result = update_parent_pkey(priv);
1205
1206 if (result) {
1207 ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
1208 return;
1209 }
1210 }
1211 }
1212
1213 if (level == IPOIB_FLUSH_LIGHT) {
1214 int oper_up;
1215 ipoib_mark_paths_invalid(dev);
1216
1217
1218
1219
1220
1221 oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
1222 ipoib_mcast_dev_flush(dev);
1223 if (oper_up)
1224 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
1225 ipoib_reap_dead_ahs(priv);
1226 }
1227
1228 if (level >= IPOIB_FLUSH_NORMAL)
1229 ipoib_ib_dev_down(dev);
1230
1231 if (level == IPOIB_FLUSH_HEAVY) {
1232 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1233 ipoib_ib_dev_stop(dev);
1234
1235 if (ipoib_ib_dev_open(dev))
1236 return;
1237
1238 if (netif_queue_stopped(dev))
1239 netif_start_queue(dev);
1240 }
1241
1242
1243
1244
1245
1246 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1247 if (level >= IPOIB_FLUSH_NORMAL)
1248 ipoib_ib_dev_up(dev);
1249 if (ipoib_dev_addr_changed_valid(priv))
1250 ipoib_mcast_restart_task(&priv->restart_task);
1251 }
1252 }
1253
1254 void ipoib_ib_dev_flush_light(struct work_struct *work)
1255 {
1256 struct ipoib_dev_priv *priv =
1257 container_of(work, struct ipoib_dev_priv, flush_light);
1258
1259 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0);
1260 }
1261
1262 void ipoib_ib_dev_flush_normal(struct work_struct *work)
1263 {
1264 struct ipoib_dev_priv *priv =
1265 container_of(work, struct ipoib_dev_priv, flush_normal);
1266
1267 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0);
1268 }
1269
1270 void ipoib_ib_dev_flush_heavy(struct work_struct *work)
1271 {
1272 struct ipoib_dev_priv *priv =
1273 container_of(work, struct ipoib_dev_priv, flush_heavy);
1274
1275 rtnl_lock();
1276 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
1277 rtnl_unlock();
1278 }
1279
1280 void ipoib_ib_dev_cleanup(struct net_device *dev)
1281 {
1282 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1283
1284 ipoib_dbg(priv, "cleaning up ib_dev\n");
1285
1286
1287
1288
1289 ipoib_flush_paths(dev);
1290
1291 ipoib_mcast_stop_thread(dev);
1292 ipoib_mcast_dev_flush(dev);
1293
1294
1295
1296
1297
1298
1299
1300 ipoib_reap_dead_ahs(priv);
1301
1302 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
1303
1304 priv->rn_ops->ndo_uninit(dev);
1305
1306 if (priv->pd) {
1307 ib_dealloc_pd(priv->pd);
1308 priv->pd = NULL;
1309 }
1310 }
1311