0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <rdma/ib_mad.h>
0034
0035 #include <linux/mlx4/cmd.h>
0036 #include <linux/rbtree.h>
0037 #include <linux/idr.h>
0038 #include <rdma/ib_cm.h>
0039
0040 #include "mlx4_ib.h"
0041
0042 #define CM_CLEANUP_CACHE_TIMEOUT (30 * HZ)
0043
0044 struct id_map_entry {
0045 struct rb_node node;
0046
0047 u32 sl_cm_id;
0048 u32 pv_cm_id;
0049 int slave_id;
0050 int scheduled_delete;
0051 struct mlx4_ib_dev *dev;
0052
0053 struct list_head list;
0054 struct delayed_work timeout;
0055 };
0056
0057 struct rej_tmout_entry {
0058 int slave;
0059 u32 rem_pv_cm_id;
0060 struct delayed_work timeout;
0061 struct xarray *xa_rej_tmout;
0062 };
0063
0064 struct cm_generic_msg {
0065 struct ib_mad_hdr hdr;
0066
0067 __be32 local_comm_id;
0068 __be32 remote_comm_id;
0069 unsigned char unused[2];
0070 __be16 rej_reason;
0071 };
0072
0073 struct cm_sidr_generic_msg {
0074 struct ib_mad_hdr hdr;
0075 __be32 request_id;
0076 };
0077
0078 struct cm_req_msg {
0079 unsigned char unused[0x60];
0080 union ib_gid primary_path_sgid;
0081 };
0082
0083 static struct workqueue_struct *cm_wq;
0084
0085 static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
0086 {
0087 if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
0088 struct cm_sidr_generic_msg *msg =
0089 (struct cm_sidr_generic_msg *)mad;
0090 msg->request_id = cpu_to_be32(cm_id);
0091 } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
0092 pr_err("trying to set local_comm_id in SIDR_REP\n");
0093 return;
0094 } else {
0095 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
0096 msg->local_comm_id = cpu_to_be32(cm_id);
0097 }
0098 }
0099
0100 static u32 get_local_comm_id(struct ib_mad *mad)
0101 {
0102 if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
0103 struct cm_sidr_generic_msg *msg =
0104 (struct cm_sidr_generic_msg *)mad;
0105 return be32_to_cpu(msg->request_id);
0106 } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
0107 pr_err("trying to set local_comm_id in SIDR_REP\n");
0108 return -1;
0109 } else {
0110 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
0111 return be32_to_cpu(msg->local_comm_id);
0112 }
0113 }
0114
0115 static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
0116 {
0117 if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
0118 struct cm_sidr_generic_msg *msg =
0119 (struct cm_sidr_generic_msg *)mad;
0120 msg->request_id = cpu_to_be32(cm_id);
0121 } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
0122 pr_err("trying to set remote_comm_id in SIDR_REQ\n");
0123 return;
0124 } else {
0125 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
0126 msg->remote_comm_id = cpu_to_be32(cm_id);
0127 }
0128 }
0129
0130 static u32 get_remote_comm_id(struct ib_mad *mad)
0131 {
0132 if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
0133 struct cm_sidr_generic_msg *msg =
0134 (struct cm_sidr_generic_msg *)mad;
0135 return be32_to_cpu(msg->request_id);
0136 } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
0137 pr_err("trying to set remote_comm_id in SIDR_REQ\n");
0138 return -1;
0139 } else {
0140 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
0141 return be32_to_cpu(msg->remote_comm_id);
0142 }
0143 }
0144
0145 static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
0146 {
0147 struct cm_req_msg *msg = (struct cm_req_msg *)mad;
0148
0149 return msg->primary_path_sgid;
0150 }
0151
0152
0153 static struct id_map_entry *
0154 id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
0155 {
0156 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
0157 struct rb_node *node = sl_id_map->rb_node;
0158
0159 while (node) {
0160 struct id_map_entry *id_map_entry =
0161 rb_entry(node, struct id_map_entry, node);
0162
0163 if (id_map_entry->sl_cm_id > sl_cm_id)
0164 node = node->rb_left;
0165 else if (id_map_entry->sl_cm_id < sl_cm_id)
0166 node = node->rb_right;
0167 else if (id_map_entry->slave_id > slave_id)
0168 node = node->rb_left;
0169 else if (id_map_entry->slave_id < slave_id)
0170 node = node->rb_right;
0171 else
0172 return id_map_entry;
0173 }
0174 return NULL;
0175 }
0176
0177 static void id_map_ent_timeout(struct work_struct *work)
0178 {
0179 struct delayed_work *delay = to_delayed_work(work);
0180 struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
0181 struct id_map_entry *found_ent;
0182 struct mlx4_ib_dev *dev = ent->dev;
0183 struct mlx4_ib_sriov *sriov = &dev->sriov;
0184 struct rb_root *sl_id_map = &sriov->sl_id_map;
0185
0186 spin_lock(&sriov->id_map_lock);
0187 if (!xa_erase(&sriov->pv_id_table, ent->pv_cm_id))
0188 goto out;
0189 found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
0190 if (found_ent && found_ent == ent)
0191 rb_erase(&found_ent->node, sl_id_map);
0192
0193 out:
0194 list_del(&ent->list);
0195 spin_unlock(&sriov->id_map_lock);
0196 kfree(ent);
0197 }
0198
0199 static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
0200 {
0201 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
0202 struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
0203 struct id_map_entry *ent;
0204 int slave_id = new->slave_id;
0205 int sl_cm_id = new->sl_cm_id;
0206
0207 ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
0208 if (ent) {
0209 pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
0210 sl_cm_id);
0211
0212 rb_replace_node(&ent->node, &new->node, sl_id_map);
0213 return;
0214 }
0215
0216
0217 while (*link) {
0218 parent = *link;
0219 ent = rb_entry(parent, struct id_map_entry, node);
0220
0221 if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
0222 link = &(*link)->rb_left;
0223 else
0224 link = &(*link)->rb_right;
0225 }
0226
0227 rb_link_node(&new->node, parent, link);
0228 rb_insert_color(&new->node, sl_id_map);
0229 }
0230
0231 static struct id_map_entry *
0232 id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
0233 {
0234 int ret;
0235 struct id_map_entry *ent;
0236 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
0237
0238 ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
0239 if (!ent)
0240 return ERR_PTR(-ENOMEM);
0241
0242 ent->sl_cm_id = sl_cm_id;
0243 ent->slave_id = slave_id;
0244 ent->scheduled_delete = 0;
0245 ent->dev = to_mdev(ibdev);
0246 INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
0247
0248 ret = xa_alloc_cyclic(&sriov->pv_id_table, &ent->pv_cm_id, ent,
0249 xa_limit_32b, &sriov->pv_id_next, GFP_KERNEL);
0250 if (ret >= 0) {
0251 spin_lock(&sriov->id_map_lock);
0252 sl_id_map_add(ibdev, ent);
0253 list_add_tail(&ent->list, &sriov->cm_list);
0254 spin_unlock(&sriov->id_map_lock);
0255 return ent;
0256 }
0257
0258
0259 kfree(ent);
0260 mlx4_ib_warn(ibdev, "Allocation failed (err:0x%x)\n", ret);
0261 return ERR_PTR(-ENOMEM);
0262 }
0263
0264 static struct id_map_entry *
0265 id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id)
0266 {
0267 struct id_map_entry *ent;
0268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
0269
0270 spin_lock(&sriov->id_map_lock);
0271 if (*pv_cm_id == -1) {
0272 ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
0273 if (ent)
0274 *pv_cm_id = (int) ent->pv_cm_id;
0275 } else
0276 ent = xa_load(&sriov->pv_id_table, *pv_cm_id);
0277 spin_unlock(&sriov->id_map_lock);
0278
0279 return ent;
0280 }
0281
0282 static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
0283 {
0284 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
0285 unsigned long flags;
0286
0287 spin_lock(&sriov->id_map_lock);
0288 spin_lock_irqsave(&sriov->going_down_lock, flags);
0289
0290 if (!sriov->is_going_down && !id->scheduled_delete) {
0291 id->scheduled_delete = 1;
0292 queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
0293 } else if (id->scheduled_delete) {
0294
0295 mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
0296 }
0297 spin_unlock_irqrestore(&sriov->going_down_lock, flags);
0298 spin_unlock(&sriov->id_map_lock);
0299 }
0300
0301 #define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
0302 int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
0303 struct ib_mad *mad)
0304 {
0305 struct id_map_entry *id;
0306 u32 sl_cm_id;
0307 int pv_cm_id = -1;
0308
0309 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
0310 mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
0311 mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
0312 mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
0313 (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
0314 sl_cm_id = get_local_comm_id(mad);
0315 id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
0316 if (id)
0317 goto cont;
0318 id = id_map_alloc(ibdev, slave_id, sl_cm_id);
0319 if (IS_ERR(id)) {
0320 mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
0321 __func__, slave_id, sl_cm_id);
0322 return PTR_ERR(id);
0323 }
0324 } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
0325 mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
0326 return 0;
0327 } else {
0328 sl_cm_id = get_local_comm_id(mad);
0329 id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
0330 }
0331
0332 if (!id) {
0333 pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
0334 slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
0335 return -EINVAL;
0336 }
0337
0338 cont:
0339 set_local_comm_id(mad, id->pv_cm_id);
0340
0341 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
0342 schedule_delayed(ibdev, id);
0343 return 0;
0344 }
0345
0346 static void rej_tmout_timeout(struct work_struct *work)
0347 {
0348 struct delayed_work *delay = to_delayed_work(work);
0349 struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
0350 struct rej_tmout_entry *deleted;
0351
0352 deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
0353
0354 if (deleted != item)
0355 pr_debug("deleted(%p) != item(%p)\n", deleted, item);
0356
0357 kfree(item);
0358 }
0359
0360 static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
0361 {
0362 struct rej_tmout_entry *item;
0363 struct rej_tmout_entry *old;
0364 int ret = 0;
0365
0366 xa_lock(&sriov->xa_rej_tmout);
0367 item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
0368
0369 if (item) {
0370 if (xa_err(item))
0371 ret = xa_err(item);
0372 else
0373
0374 mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
0375 goto err_or_exists;
0376 }
0377 xa_unlock(&sriov->xa_rej_tmout);
0378
0379 item = kmalloc(sizeof(*item), GFP_KERNEL);
0380 if (!item)
0381 return -ENOMEM;
0382
0383 INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
0384 item->slave = slave;
0385 item->rem_pv_cm_id = rem_pv_cm_id;
0386 item->xa_rej_tmout = &sriov->xa_rej_tmout;
0387
0388 old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
0389 if (old) {
0390 pr_debug(
0391 "Non-null old entry (%p) or error (%d) when inserting\n",
0392 old, xa_err(old));
0393 kfree(item);
0394 return xa_err(old);
0395 }
0396
0397 queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
0398
0399 return 0;
0400
0401 err_or_exists:
0402 xa_unlock(&sriov->xa_rej_tmout);
0403 return ret;
0404 }
0405
0406 static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
0407 {
0408 struct rej_tmout_entry *item;
0409 int slave;
0410
0411 xa_lock(&sriov->xa_rej_tmout);
0412 item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
0413
0414 if (!item || xa_err(item)) {
0415 pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
0416 rem_pv_cm_id, xa_err(item));
0417 slave = !item ? -ENOENT : xa_err(item);
0418 } else {
0419 slave = item->slave;
0420 }
0421 xa_unlock(&sriov->xa_rej_tmout);
0422
0423 return slave;
0424 }
0425
0426 int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
0427 struct ib_mad *mad)
0428 {
0429 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
0430 u32 rem_pv_cm_id = get_local_comm_id(mad);
0431 u32 pv_cm_id;
0432 struct id_map_entry *id;
0433 int sts;
0434
0435 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
0436 mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
0437 union ib_gid gid;
0438
0439 if (!slave)
0440 return 0;
0441
0442 gid = gid_from_req_msg(ibdev, mad);
0443 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
0444 if (*slave < 0) {
0445 mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
0446 be64_to_cpu(gid.global.interface_id));
0447 return -ENOENT;
0448 }
0449
0450 sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
0451 if (sts)
0452
0453 pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
0454 rem_pv_cm_id, *slave, sts);
0455
0456 return 0;
0457 }
0458
0459 pv_cm_id = get_remote_comm_id(mad);
0460 id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
0461
0462 if (!id) {
0463 if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
0464 REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
0465 *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
0466
0467 return (*slave < 0) ? *slave : 0;
0468 }
0469 pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
0470 pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
0471 return -ENOENT;
0472 }
0473
0474 if (slave)
0475 *slave = id->slave_id;
0476 set_remote_comm_id(mad, id->sl_cm_id);
0477
0478 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
0479 mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
0480 schedule_delayed(ibdev, id);
0481
0482 return 0;
0483 }
0484
0485 void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
0486 {
0487 spin_lock_init(&dev->sriov.id_map_lock);
0488 INIT_LIST_HEAD(&dev->sriov.cm_list);
0489 dev->sriov.sl_id_map = RB_ROOT;
0490 xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
0491 xa_init(&dev->sriov.xa_rej_tmout);
0492 }
0493
0494 static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
0495 {
0496 struct rej_tmout_entry *item;
0497 bool flush_needed = false;
0498 unsigned long id;
0499 int cnt = 0;
0500
0501 xa_lock(&sriov->xa_rej_tmout);
0502 xa_for_each(&sriov->xa_rej_tmout, id, item) {
0503 if (slave < 0 || slave == item->slave) {
0504 mod_delayed_work(cm_wq, &item->timeout, 0);
0505 flush_needed = true;
0506 ++cnt;
0507 }
0508 }
0509 xa_unlock(&sriov->xa_rej_tmout);
0510
0511 if (flush_needed) {
0512 flush_workqueue(cm_wq);
0513 pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
0514 cnt, slave);
0515 }
0516
0517 if (slave < 0)
0518 WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
0519 }
0520
0521
0522
0523 void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
0524 {
0525 struct mlx4_ib_sriov *sriov = &dev->sriov;
0526 struct rb_root *sl_id_map = &sriov->sl_id_map;
0527 struct list_head lh;
0528 struct rb_node *nd;
0529 int need_flush = 0;
0530 struct id_map_entry *map, *tmp_map;
0531
0532 INIT_LIST_HEAD(&lh);
0533 spin_lock(&sriov->id_map_lock);
0534 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
0535 if (slave < 0 || slave == map->slave_id) {
0536 if (map->scheduled_delete)
0537 need_flush |= !cancel_delayed_work(&map->timeout);
0538 }
0539 }
0540
0541 spin_unlock(&sriov->id_map_lock);
0542
0543 if (need_flush)
0544 flush_workqueue(cm_wq);
0545
0546
0547 spin_lock(&sriov->id_map_lock);
0548 if (slave < 0) {
0549 while (rb_first(sl_id_map)) {
0550 struct id_map_entry *ent =
0551 rb_entry(rb_first(sl_id_map),
0552 struct id_map_entry, node);
0553
0554 rb_erase(&ent->node, sl_id_map);
0555 xa_erase(&sriov->pv_id_table, ent->pv_cm_id);
0556 }
0557 list_splice_init(&dev->sriov.cm_list, &lh);
0558 } else {
0559
0560 nd = rb_first(sl_id_map);
0561 while (nd) {
0562 struct id_map_entry *ent =
0563 rb_entry(nd, struct id_map_entry, node);
0564 nd = rb_next(nd);
0565 if (ent->slave_id == slave)
0566 list_move_tail(&ent->list, &lh);
0567 }
0568
0569 list_for_each_entry_safe(map, tmp_map, &lh, list) {
0570 rb_erase(&map->node, sl_id_map);
0571 xa_erase(&sriov->pv_id_table, map->pv_cm_id);
0572 }
0573
0574
0575 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
0576 if (slave == map->slave_id)
0577 list_move_tail(&map->list, &lh);
0578 }
0579 }
0580
0581 spin_unlock(&sriov->id_map_lock);
0582
0583
0584 list_for_each_entry_safe(map, tmp_map, &lh, list) {
0585 list_del(&map->list);
0586 kfree(map);
0587 }
0588
0589 rej_tmout_xa_cleanup(sriov, slave);
0590 }
0591
0592 int mlx4_ib_cm_init(void)
0593 {
0594 cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0);
0595 if (!cm_wq)
0596 return -ENOMEM;
0597
0598 return 0;
0599 }
0600
0601 void mlx4_ib_cm_destroy(void)
0602 {
0603 destroy_workqueue(cm_wq);
0604 }