0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #include <linux/skbuff.h>
0036 #include <linux/rtnetlink.h>
0037 #include <linux/moduleparam.h>
0038 #include <linux/ip.h>
0039 #include <linux/in.h>
0040 #include <linux/igmp.h>
0041 #include <linux/inetdevice.h>
0042 #include <linux/delay.h>
0043 #include <linux/completion.h>
0044 #include <linux/slab.h>
0045
0046 #include <net/dst.h>
0047
0048 #include "ipoib.h"
0049
0050 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
0051 static int mcast_debug_level;
0052
0053 module_param(mcast_debug_level, int, 0644);
0054 MODULE_PARM_DESC(mcast_debug_level,
0055 "Enable multicast debug tracing if > 0");
0056 #endif
0057
0058 struct ipoib_mcast_iter {
0059 struct net_device *dev;
0060 union ib_gid mgid;
0061 unsigned long created;
0062 unsigned int queuelen;
0063 unsigned int complete;
0064 unsigned int send_only;
0065 };
0066
0067
0068 #define SENDONLY_FULLMEMBER_JOIN 8
0069
0070
0071
0072
0073 static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
0074 struct ipoib_mcast *mcast,
0075 bool delay)
0076 {
0077 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0078 return;
0079
0080
0081
0082
0083
0084 cancel_delayed_work(&priv->mcast_task);
0085 if (mcast && delay) {
0086
0087
0088
0089 mcast->backoff *= 2;
0090 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
0091 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
0092 mcast->delay_until = jiffies + (mcast->backoff * HZ);
0093
0094
0095
0096
0097
0098
0099
0100 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
0101 } else if (delay) {
0102
0103
0104
0105
0106
0107 queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
0108 } else
0109 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
0110 }
0111
0112 static void ipoib_mcast_free(struct ipoib_mcast *mcast)
0113 {
0114 struct net_device *dev = mcast->dev;
0115 int tx_dropped = 0;
0116
0117 ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
0118 mcast->mcmember.mgid.raw);
0119
0120
0121 ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw);
0122
0123 if (mcast->ah)
0124 ipoib_put_ah(mcast->ah);
0125
0126 while (!skb_queue_empty(&mcast->pkt_queue)) {
0127 ++tx_dropped;
0128 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
0129 }
0130
0131 netif_tx_lock_bh(dev);
0132 dev->stats.tx_dropped += tx_dropped;
0133 netif_tx_unlock_bh(dev);
0134
0135 kfree(mcast);
0136 }
0137
0138 static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev)
0139 {
0140 struct ipoib_mcast *mcast;
0141
0142 mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
0143 if (!mcast)
0144 return NULL;
0145
0146 mcast->dev = dev;
0147 mcast->created = jiffies;
0148 mcast->delay_until = jiffies;
0149 mcast->backoff = 1;
0150
0151 INIT_LIST_HEAD(&mcast->list);
0152 INIT_LIST_HEAD(&mcast->neigh_list);
0153 skb_queue_head_init(&mcast->pkt_queue);
0154
0155 return mcast;
0156 }
0157
0158 static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
0159 {
0160 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0161 struct rb_node *n = priv->multicast_tree.rb_node;
0162
0163 while (n) {
0164 struct ipoib_mcast *mcast;
0165 int ret;
0166
0167 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
0168
0169 ret = memcmp(mgid, mcast->mcmember.mgid.raw,
0170 sizeof (union ib_gid));
0171 if (ret < 0)
0172 n = n->rb_left;
0173 else if (ret > 0)
0174 n = n->rb_right;
0175 else
0176 return mcast;
0177 }
0178
0179 return NULL;
0180 }
0181
0182 static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
0183 {
0184 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0185 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
0186
0187 while (*n) {
0188 struct ipoib_mcast *tmcast;
0189 int ret;
0190
0191 pn = *n;
0192 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
0193
0194 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
0195 sizeof (union ib_gid));
0196 if (ret < 0)
0197 n = &pn->rb_left;
0198 else if (ret > 0)
0199 n = &pn->rb_right;
0200 else
0201 return -EEXIST;
0202 }
0203
0204 rb_link_node(&mcast->rb_node, pn, n);
0205 rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
0206
0207 return 0;
0208 }
0209
0210 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
0211 struct ib_sa_mcmember_rec *mcmember)
0212 {
0213 struct net_device *dev = mcast->dev;
0214 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0215 struct rdma_netdev *rn = netdev_priv(dev);
0216 struct ipoib_ah *ah;
0217 struct rdma_ah_attr av;
0218 int ret;
0219 int set_qkey = 0;
0220 int mtu;
0221
0222 mcast->mcmember = *mcmember;
0223
0224
0225
0226
0227 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
0228 sizeof (union ib_gid))) {
0229 spin_lock_irq(&priv->lock);
0230 if (!priv->broadcast) {
0231 spin_unlock_irq(&priv->lock);
0232 return -EAGAIN;
0233 }
0234
0235 priv->broadcast->mcmember.qkey = mcmember->qkey;
0236 priv->broadcast->mcmember.mtu = mcmember->mtu;
0237 priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
0238 priv->broadcast->mcmember.rate = mcmember->rate;
0239 priv->broadcast->mcmember.sl = mcmember->sl;
0240 priv->broadcast->mcmember.flow_label = mcmember->flow_label;
0241 priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
0242
0243 mtu = rdma_mtu_enum_to_int(priv->ca, priv->port,
0244 priv->broadcast->mcmember.mtu);
0245 if (priv->mcast_mtu == priv->admin_mtu)
0246 priv->admin_mtu = IPOIB_UD_MTU(mtu);
0247 priv->mcast_mtu = IPOIB_UD_MTU(mtu);
0248 rn->mtu = priv->mcast_mtu;
0249
0250 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
0251 spin_unlock_irq(&priv->lock);
0252 priv->tx_wr.remote_qkey = priv->qkey;
0253 set_qkey = 1;
0254 }
0255
0256 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
0257 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
0258 ipoib_warn(priv, "multicast group %pI6 already attached\n",
0259 mcast->mcmember.mgid.raw);
0260
0261 return 0;
0262 }
0263
0264 ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
0265 be16_to_cpu(mcast->mcmember.mlid),
0266 set_qkey, priv->qkey);
0267 if (ret < 0) {
0268 ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
0269 mcast->mcmember.mgid.raw);
0270
0271 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
0272 return ret;
0273 }
0274 }
0275
0276 memset(&av, 0, sizeof(av));
0277 av.type = rdma_ah_find_type(priv->ca, priv->port);
0278 rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid));
0279 rdma_ah_set_port_num(&av, priv->port);
0280 rdma_ah_set_sl(&av, mcast->mcmember.sl);
0281 rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
0282
0283 rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
0284 be32_to_cpu(mcast->mcmember.flow_label),
0285 0, mcast->mcmember.hop_limit,
0286 mcast->mcmember.traffic_class);
0287
0288 ah = ipoib_create_ah(dev, priv->pd, &av);
0289 if (IS_ERR(ah)) {
0290 ipoib_warn(priv, "ib_address_create failed %ld\n",
0291 -PTR_ERR(ah));
0292
0293 return PTR_ERR(ah);
0294 }
0295 spin_lock_irq(&priv->lock);
0296 mcast->ah = ah;
0297 spin_unlock_irq(&priv->lock);
0298
0299 ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
0300 mcast->mcmember.mgid.raw,
0301 mcast->ah->ah,
0302 be16_to_cpu(mcast->mcmember.mlid),
0303 mcast->mcmember.sl);
0304
0305
0306 netif_tx_lock_bh(dev);
0307 while (!skb_queue_empty(&mcast->pkt_queue)) {
0308 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
0309
0310 netif_tx_unlock_bh(dev);
0311
0312 skb->dev = dev;
0313
0314 ret = dev_queue_xmit(skb);
0315 if (ret)
0316 ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
0317 __func__, ret);
0318 netif_tx_lock_bh(dev);
0319 }
0320 netif_tx_unlock_bh(dev);
0321
0322 return 0;
0323 }
0324
0325 void ipoib_mcast_carrier_on_task(struct work_struct *work)
0326 {
0327 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
0328 carrier_on_task);
0329 struct ib_port_attr attr;
0330
0331 if (ib_query_port(priv->ca, priv->port, &attr) ||
0332 attr.state != IB_PORT_ACTIVE) {
0333 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
0334 return;
0335 }
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345 while (!rtnl_trylock()) {
0346 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0347 return;
0348 else
0349 msleep(20);
0350 }
0351 if (!ipoib_cm_admin_enabled(priv->dev))
0352 dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
0353 netif_carrier_on(priv->dev);
0354 rtnl_unlock();
0355 }
0356
0357 static int ipoib_mcast_join_complete(int status,
0358 struct ib_sa_multicast *multicast)
0359 {
0360 struct ipoib_mcast *mcast = multicast->context;
0361 struct net_device *dev = mcast->dev;
0362 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0363
0364 ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
0365 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
0366 "sendonly " : "",
0367 mcast->mcmember.mgid.raw, status);
0368
0369
0370 if (status == -ENETRESET) {
0371 status = 0;
0372 goto out;
0373 }
0374
0375 if (!status)
0376 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
0377
0378 if (!status) {
0379 mcast->backoff = 1;
0380 mcast->delay_until = jiffies;
0381
0382
0383
0384
0385
0386
0387
0388
0389 if (mcast == priv->broadcast) {
0390 spin_lock_irq(&priv->lock);
0391 queue_work(priv->wq, &priv->carrier_on_task);
0392 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
0393 goto out_locked;
0394 }
0395 } else {
0396 bool silent_fail =
0397 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
0398 status == -EINVAL;
0399
0400 if (mcast->logcount < 20) {
0401 if (status == -ETIMEDOUT || status == -EAGAIN ||
0402 silent_fail) {
0403 ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
0404 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
0405 mcast->mcmember.mgid.raw, status);
0406 } else {
0407 ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
0408 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
0409 mcast->mcmember.mgid.raw, status);
0410 }
0411
0412 if (!silent_fail)
0413 mcast->logcount++;
0414 }
0415
0416 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
0417 mcast->backoff >= 2) {
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427 mcast->backoff = 1;
0428 netif_tx_lock_bh(dev);
0429 while (!skb_queue_empty(&mcast->pkt_queue)) {
0430 ++dev->stats.tx_dropped;
0431 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
0432 }
0433 netif_tx_unlock_bh(dev);
0434 } else {
0435 spin_lock_irq(&priv->lock);
0436
0437 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
0438 goto out_locked;
0439 }
0440 }
0441 out:
0442 spin_lock_irq(&priv->lock);
0443 out_locked:
0444
0445
0446
0447
0448 if (status)
0449 mcast->mc = NULL;
0450 else
0451 mcast->mc = multicast;
0452 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
0453 spin_unlock_irq(&priv->lock);
0454 complete(&mcast->done);
0455
0456 return status;
0457 }
0458
0459
0460
0461
0462 static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
0463 {
0464 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0465 struct ib_sa_multicast *multicast;
0466 struct ib_sa_mcmember_rec rec = {
0467 .join_state = 1
0468 };
0469 ib_sa_comp_mask comp_mask;
0470 int ret = 0;
0471
0472 if (!priv->broadcast ||
0473 !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0474 return -EINVAL;
0475
0476 init_completion(&mcast->done);
0477 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
0478
0479 ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
0480
0481 rec.mgid = mcast->mcmember.mgid;
0482 rec.port_gid = priv->local_gid;
0483 rec.pkey = cpu_to_be16(priv->pkey);
0484
0485 comp_mask =
0486 IB_SA_MCMEMBER_REC_MGID |
0487 IB_SA_MCMEMBER_REC_PORT_GID |
0488 IB_SA_MCMEMBER_REC_PKEY |
0489 IB_SA_MCMEMBER_REC_JOIN_STATE;
0490
0491 if (mcast != priv->broadcast) {
0492
0493
0494
0495
0496
0497
0498
0499 comp_mask |=
0500 IB_SA_MCMEMBER_REC_QKEY |
0501 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
0502 IB_SA_MCMEMBER_REC_MTU |
0503 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
0504 IB_SA_MCMEMBER_REC_RATE_SELECTOR |
0505 IB_SA_MCMEMBER_REC_RATE |
0506 IB_SA_MCMEMBER_REC_SL |
0507 IB_SA_MCMEMBER_REC_FLOW_LABEL |
0508 IB_SA_MCMEMBER_REC_HOP_LIMIT;
0509
0510 rec.qkey = priv->broadcast->mcmember.qkey;
0511 rec.mtu_selector = IB_SA_EQ;
0512 rec.mtu = priv->broadcast->mcmember.mtu;
0513 rec.traffic_class = priv->broadcast->mcmember.traffic_class;
0514 rec.rate_selector = IB_SA_EQ;
0515 rec.rate = priv->broadcast->mcmember.rate;
0516 rec.sl = priv->broadcast->mcmember.sl;
0517 rec.flow_label = priv->broadcast->mcmember.flow_label;
0518 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
0532 rec.join_state = SENDONLY_FULLMEMBER_JOIN;
0533 }
0534 spin_unlock_irq(&priv->lock);
0535
0536 multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
0537 &rec, comp_mask, GFP_KERNEL,
0538 ipoib_mcast_join_complete, mcast);
0539 spin_lock_irq(&priv->lock);
0540 if (IS_ERR(multicast)) {
0541 ret = PTR_ERR(multicast);
0542 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
0543
0544 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
0545 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
0546 spin_unlock_irq(&priv->lock);
0547 complete(&mcast->done);
0548 spin_lock_irq(&priv->lock);
0549 }
0550 return 0;
0551 }
0552
0553 void ipoib_mcast_join_task(struct work_struct *work)
0554 {
0555 struct ipoib_dev_priv *priv =
0556 container_of(work, struct ipoib_dev_priv, mcast_task.work);
0557 struct net_device *dev = priv->dev;
0558 struct ib_port_attr port_attr;
0559 unsigned long delay_until = 0;
0560 struct ipoib_mcast *mcast = NULL;
0561
0562 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0563 return;
0564
0565 if (ib_query_port(priv->ca, priv->port, &port_attr)) {
0566 ipoib_dbg(priv, "ib_query_port() failed\n");
0567 return;
0568 }
0569 if (port_attr.state != IB_PORT_ACTIVE) {
0570 ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
0571 port_attr.state);
0572 return;
0573 }
0574 priv->local_lid = port_attr.lid;
0575 netif_addr_lock_bh(dev);
0576
0577 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
0578 netif_addr_unlock_bh(dev);
0579 return;
0580 }
0581 netif_addr_unlock_bh(dev);
0582
0583 spin_lock_irq(&priv->lock);
0584 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0585 goto out;
0586
0587 if (!priv->broadcast) {
0588 struct ipoib_mcast *broadcast;
0589
0590 broadcast = ipoib_mcast_alloc(dev);
0591 if (!broadcast) {
0592 ipoib_warn(priv, "failed to allocate broadcast group\n");
0593
0594
0595
0596
0597
0598
0599 __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
0600 goto out;
0601 }
0602
0603 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
0604 sizeof (union ib_gid));
0605 priv->broadcast = broadcast;
0606
0607 __ipoib_mcast_add(dev, priv->broadcast);
0608 }
0609
0610 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
0611 if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
0612 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
0613 mcast = priv->broadcast;
0614 if (mcast->backoff > 1 &&
0615 time_before(jiffies, mcast->delay_until)) {
0616 delay_until = mcast->delay_until;
0617 mcast = NULL;
0618 }
0619 }
0620 goto out;
0621 }
0622
0623
0624
0625
0626
0627 list_for_each_entry(mcast, &priv->multicast_list, list) {
0628 if (IS_ERR_OR_NULL(mcast->mc) &&
0629 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
0630 (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
0631 !skb_queue_empty(&mcast->pkt_queue))) {
0632 if (mcast->backoff == 1 ||
0633 time_after_eq(jiffies, mcast->delay_until)) {
0634
0635 if (ipoib_mcast_join(dev, mcast)) {
0636 spin_unlock_irq(&priv->lock);
0637 return;
0638 }
0639 } else if (!delay_until ||
0640 time_before(mcast->delay_until, delay_until))
0641 delay_until = mcast->delay_until;
0642 }
0643 }
0644
0645 mcast = NULL;
0646 ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
0647
0648 out:
0649 if (delay_until) {
0650 cancel_delayed_work(&priv->mcast_task);
0651 queue_delayed_work(priv->wq, &priv->mcast_task,
0652 delay_until - jiffies);
0653 }
0654 if (mcast)
0655 ipoib_mcast_join(dev, mcast);
0656
0657 spin_unlock_irq(&priv->lock);
0658 }
0659
0660 void ipoib_mcast_start_thread(struct net_device *dev)
0661 {
0662 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0663 unsigned long flags;
0664
0665 ipoib_dbg_mcast(priv, "starting multicast thread\n");
0666
0667 spin_lock_irqsave(&priv->lock, flags);
0668 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
0669 spin_unlock_irqrestore(&priv->lock, flags);
0670 }
0671
0672 void ipoib_mcast_stop_thread(struct net_device *dev)
0673 {
0674 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0675
0676 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
0677
0678 cancel_delayed_work_sync(&priv->mcast_task);
0679 }
0680
0681 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
0682 {
0683 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0684 struct rdma_netdev *rn = netdev_priv(dev);
0685 int ret = 0;
0686
0687 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
0688 ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
0689
0690 if (!IS_ERR_OR_NULL(mcast->mc))
0691 ib_sa_free_multicast(mcast->mc);
0692
0693 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
0694 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
0695 mcast->mcmember.mgid.raw);
0696
0697
0698 ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
0699 be16_to_cpu(mcast->mcmember.mlid));
0700 if (ret)
0701 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
0702 } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
0703 ipoib_dbg(priv, "leaving with no mcmember but not a "
0704 "SENDONLY join\n");
0705
0706 return 0;
0707 }
0708
0709
0710
0711
0712
0713 void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
0714 struct list_head *remove_list)
0715 {
0716
0717 if (*mgid == 0xff) {
0718 struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
0719
0720 if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
0721 list_del(&mcast->list);
0722 rb_erase(&mcast->rb_node, &priv->multicast_tree);
0723 list_add_tail(&mcast->list, remove_list);
0724 }
0725 }
0726 }
0727
0728 void ipoib_mcast_remove_list(struct list_head *remove_list)
0729 {
0730 struct ipoib_mcast *mcast, *tmcast;
0731
0732
0733
0734
0735
0736 list_for_each_entry_safe(mcast, tmcast, remove_list, list)
0737 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
0738 wait_for_completion(&mcast->done);
0739
0740 list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
0741 ipoib_mcast_leave(mcast->dev, mcast);
0742 ipoib_mcast_free(mcast);
0743 }
0744 }
0745
0746 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
0747 {
0748 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0749 struct rdma_netdev *rn = netdev_priv(dev);
0750 struct ipoib_mcast *mcast;
0751 unsigned long flags;
0752 void *mgid = daddr + 4;
0753
0754 spin_lock_irqsave(&priv->lock, flags);
0755
0756 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
0757 !priv->broadcast ||
0758 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
0759 ++dev->stats.tx_dropped;
0760 dev_kfree_skb_any(skb);
0761 goto unlock;
0762 }
0763
0764 mcast = __ipoib_mcast_find(dev, mgid);
0765 if (!mcast || !mcast->ah) {
0766 if (!mcast) {
0767
0768 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
0769 mgid);
0770
0771 mcast = ipoib_mcast_alloc(dev);
0772 if (!mcast) {
0773 ipoib_warn(priv, "unable to allocate memory "
0774 "for multicast structure\n");
0775 ++dev->stats.tx_dropped;
0776 dev_kfree_skb_any(skb);
0777 goto unlock;
0778 }
0779
0780 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
0781 memcpy(mcast->mcmember.mgid.raw, mgid,
0782 sizeof (union ib_gid));
0783 __ipoib_mcast_add(dev, mcast);
0784 list_add_tail(&mcast->list, &priv->multicast_list);
0785 }
0786 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) {
0787
0788 skb_push(skb, sizeof(struct ipoib_pseudo_header));
0789 skb_queue_tail(&mcast->pkt_queue, skb);
0790 } else {
0791 ++dev->stats.tx_dropped;
0792 dev_kfree_skb_any(skb);
0793 }
0794 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
0795 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
0796 }
0797 } else {
0798 struct ipoib_neigh *neigh;
0799
0800 spin_unlock_irqrestore(&priv->lock, flags);
0801 neigh = ipoib_neigh_get(dev, daddr);
0802 spin_lock_irqsave(&priv->lock, flags);
0803 if (!neigh) {
0804 neigh = ipoib_neigh_alloc(daddr, dev);
0805
0806
0807
0808 if (neigh && list_empty(&neigh->list)) {
0809 kref_get(&mcast->ah->ref);
0810 neigh->ah = mcast->ah;
0811 neigh->ah->valid = 1;
0812 list_add_tail(&neigh->list, &mcast->neigh_list);
0813 }
0814 }
0815 spin_unlock_irqrestore(&priv->lock, flags);
0816 mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
0817 IB_MULTICAST_QPN);
0818 if (neigh)
0819 ipoib_neigh_put(neigh);
0820 return;
0821 }
0822
0823 unlock:
0824 spin_unlock_irqrestore(&priv->lock, flags);
0825 }
0826
0827 void ipoib_mcast_dev_flush(struct net_device *dev)
0828 {
0829 struct ipoib_dev_priv *priv = ipoib_priv(dev);
0830 LIST_HEAD(remove_list);
0831 struct ipoib_mcast *mcast, *tmcast;
0832 unsigned long flags;
0833
0834 mutex_lock(&priv->mcast_mutex);
0835 ipoib_dbg_mcast(priv, "flushing multicast list\n");
0836
0837 spin_lock_irqsave(&priv->lock, flags);
0838
0839 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
0840 list_del(&mcast->list);
0841 rb_erase(&mcast->rb_node, &priv->multicast_tree);
0842 list_add_tail(&mcast->list, &remove_list);
0843 }
0844
0845 if (priv->broadcast) {
0846 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
0847 list_add_tail(&priv->broadcast->list, &remove_list);
0848 priv->broadcast = NULL;
0849 }
0850
0851 spin_unlock_irqrestore(&priv->lock, flags);
0852
0853 ipoib_mcast_remove_list(&remove_list);
0854 mutex_unlock(&priv->mcast_mutex);
0855 }
0856
0857 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
0858 {
0859
0860 if (memcmp(addr, broadcast, 6))
0861 return 0;
0862
0863 if (memcmp(addr + 7, broadcast + 7, 3))
0864 return 0;
0865 return 1;
0866 }
0867
0868 void ipoib_mcast_restart_task(struct work_struct *work)
0869 {
0870 struct ipoib_dev_priv *priv =
0871 container_of(work, struct ipoib_dev_priv, restart_task);
0872 struct net_device *dev = priv->dev;
0873 struct netdev_hw_addr *ha;
0874 struct ipoib_mcast *mcast, *tmcast;
0875 LIST_HEAD(remove_list);
0876 struct ib_sa_mcmember_rec rec;
0877
0878 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
0879
0880
0881
0882
0883 return;
0884
0885 ipoib_dbg_mcast(priv, "restarting multicast task\n");
0886
0887 netif_addr_lock_bh(dev);
0888 spin_lock_irq(&priv->lock);
0889
0890
0891
0892
0893
0894
0895
0896
0897 list_for_each_entry(mcast, &priv->multicast_list, list)
0898 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
0899
0900
0901 netdev_for_each_mc_addr(ha, dev) {
0902 union ib_gid mgid;
0903
0904 if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
0905 continue;
0906
0907 memcpy(mgid.raw, ha->addr + 4, sizeof(mgid));
0908
0909 mcast = __ipoib_mcast_find(dev, &mgid);
0910 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
0911 struct ipoib_mcast *nmcast;
0912
0913
0914 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
0915 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
0916 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n",
0917 mgid.raw);
0918 continue;
0919 }
0920
0921
0922 ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n",
0923 mgid.raw);
0924
0925 nmcast = ipoib_mcast_alloc(dev);
0926 if (!nmcast) {
0927 ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
0928 continue;
0929 }
0930
0931 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
0932
0933 nmcast->mcmember.mgid = mgid;
0934
0935 if (mcast) {
0936
0937 list_move_tail(&mcast->list, &remove_list);
0938
0939 rb_replace_node(&mcast->rb_node,
0940 &nmcast->rb_node,
0941 &priv->multicast_tree);
0942 } else
0943 __ipoib_mcast_add(dev, nmcast);
0944
0945 list_add_tail(&nmcast->list, &priv->multicast_list);
0946 }
0947
0948 if (mcast)
0949 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
0950 }
0951
0952
0953 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
0954 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
0955 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
0956 ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n",
0957 mcast->mcmember.mgid.raw);
0958
0959 rb_erase(&mcast->rb_node, &priv->multicast_tree);
0960
0961
0962 list_move_tail(&mcast->list, &remove_list);
0963 }
0964 }
0965
0966 spin_unlock_irq(&priv->lock);
0967 netif_addr_unlock_bh(dev);
0968
0969 ipoib_mcast_remove_list(&remove_list);
0970
0971
0972
0973
0974 if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
0975 spin_lock_irq(&priv->lock);
0976 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
0977 spin_unlock_irq(&priv->lock);
0978 }
0979 }
0980
0981 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
0982
0983 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
0984 {
0985 struct ipoib_mcast_iter *iter;
0986
0987 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
0988 if (!iter)
0989 return NULL;
0990
0991 iter->dev = dev;
0992 memset(iter->mgid.raw, 0, 16);
0993
0994 if (ipoib_mcast_iter_next(iter)) {
0995 kfree(iter);
0996 return NULL;
0997 }
0998
0999 return iter;
1000 }
1001
1002 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
1003 {
1004 struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
1005 struct rb_node *n;
1006 struct ipoib_mcast *mcast;
1007 int ret = 1;
1008
1009 spin_lock_irq(&priv->lock);
1010
1011 n = rb_first(&priv->multicast_tree);
1012
1013 while (n) {
1014 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
1015
1016 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
1017 sizeof (union ib_gid)) < 0) {
1018 iter->mgid = mcast->mcmember.mgid;
1019 iter->created = mcast->created;
1020 iter->queuelen = skb_queue_len(&mcast->pkt_queue);
1021 iter->complete = !!mcast->ah;
1022 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
1023
1024 ret = 0;
1025
1026 break;
1027 }
1028
1029 n = rb_next(n);
1030 }
1031
1032 spin_unlock_irq(&priv->lock);
1033
1034 return ret;
1035 }
1036
1037 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
1038 union ib_gid *mgid,
1039 unsigned long *created,
1040 unsigned int *queuelen,
1041 unsigned int *complete,
1042 unsigned int *send_only)
1043 {
1044 *mgid = iter->mgid;
1045 *created = iter->created;
1046 *queuelen = iter->queuelen;
1047 *complete = iter->complete;
1048 *send_only = iter->send_only;
1049 }
1050
1051 #endif