0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <rdma/ib_mad.h>
0034 #include <rdma/ib_smi.h>
0035 #include <rdma/ib_cache.h>
0036 #include <rdma/ib_sa.h>
0037
0038 #include <linux/mlx4/cmd.h>
0039 #include <linux/rbtree.h>
0040 #include <linux/delay.h>
0041
0042 #include "mlx4_ib.h"
0043
0044 #define MAX_VFS 80
0045 #define MAX_PEND_REQS_PER_FUNC 4
0046 #define MAD_TIMEOUT_MS 2000
0047
0048 #define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
0049 #define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
0050 #define mcg_warn_group(group, format, arg...) \
0051 pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
0052 (group)->name, group->demux->port, ## arg)
0053
0054 #define mcg_debug_group(group, format, arg...) \
0055 pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
0056 (group)->name, (group)->demux->port, ## arg)
0057
0058 #define mcg_error_group(group, format, arg...) \
0059 pr_err(" %16s: " format, (group)->name, ## arg)
0060
0061
0062 static union ib_gid mgid0;
0063
0064 static struct workqueue_struct *clean_wq;
0065
0066 enum mcast_state {
0067 MCAST_NOT_MEMBER = 0,
0068 MCAST_MEMBER,
0069 };
0070
0071 enum mcast_group_state {
0072 MCAST_IDLE,
0073 MCAST_JOIN_SENT,
0074 MCAST_LEAVE_SENT,
0075 MCAST_RESP_READY
0076 };
0077
0078 struct mcast_member {
0079 enum mcast_state state;
0080 uint8_t join_state;
0081 int num_pend_reqs;
0082 struct list_head pending;
0083 };
0084
0085 struct ib_sa_mcmember_data {
0086 union ib_gid mgid;
0087 union ib_gid port_gid;
0088 __be32 qkey;
0089 __be16 mlid;
0090 u8 mtusel_mtu;
0091 u8 tclass;
0092 __be16 pkey;
0093 u8 ratesel_rate;
0094 u8 lifetmsel_lifetm;
0095 __be32 sl_flowlabel_hoplimit;
0096 u8 scope_join_state;
0097 u8 proxy_join;
0098 u8 reserved[2];
0099 } __packed __aligned(4);
0100
0101 struct mcast_group {
0102 struct ib_sa_mcmember_data rec;
0103 struct rb_node node;
0104 struct list_head mgid0_list;
0105 struct mlx4_ib_demux_ctx *demux;
0106 struct mcast_member func[MAX_VFS];
0107 struct mutex lock;
0108 struct work_struct work;
0109 struct list_head pending_list;
0110 int members[3];
0111 enum mcast_group_state state;
0112 enum mcast_group_state prev_state;
0113 struct ib_sa_mad response_sa_mad;
0114 __be64 last_req_tid;
0115
0116 char name[33];
0117 struct device_attribute dentry;
0118
0119
0120
0121
0122
0123
0124 atomic_t refcount;
0125
0126
0127 struct delayed_work timeout_work;
0128 struct list_head cleanup_list;
0129 };
0130
0131 struct mcast_req {
0132 int func;
0133 struct ib_sa_mad sa_mad;
0134 struct list_head group_list;
0135 struct list_head func_list;
0136 struct mcast_group *group;
0137 int clean;
0138 };
0139
0140
0141 #define safe_atomic_dec(ref) \
0142 do {\
0143 if (atomic_dec_and_test(ref)) \
0144 mcg_warn_group(group, "did not expect to reach zero\n"); \
0145 } while (0)
0146
0147 static const char *get_state_string(enum mcast_group_state state)
0148 {
0149 switch (state) {
0150 case MCAST_IDLE:
0151 return "MCAST_IDLE";
0152 case MCAST_JOIN_SENT:
0153 return "MCAST_JOIN_SENT";
0154 case MCAST_LEAVE_SENT:
0155 return "MCAST_LEAVE_SENT";
0156 case MCAST_RESP_READY:
0157 return "MCAST_RESP_READY";
0158 }
0159 return "Invalid State";
0160 }
0161
0162 static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
0163 union ib_gid *mgid)
0164 {
0165 struct rb_node *node = ctx->mcg_table.rb_node;
0166 struct mcast_group *group;
0167 int ret;
0168
0169 while (node) {
0170 group = rb_entry(node, struct mcast_group, node);
0171 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
0172 if (!ret)
0173 return group;
0174
0175 if (ret < 0)
0176 node = node->rb_left;
0177 else
0178 node = node->rb_right;
0179 }
0180 return NULL;
0181 }
0182
0183 static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
0184 struct mcast_group *group)
0185 {
0186 struct rb_node **link = &ctx->mcg_table.rb_node;
0187 struct rb_node *parent = NULL;
0188 struct mcast_group *cur_group;
0189 int ret;
0190
0191 while (*link) {
0192 parent = *link;
0193 cur_group = rb_entry(parent, struct mcast_group, node);
0194
0195 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
0196 sizeof group->rec.mgid);
0197 if (ret < 0)
0198 link = &(*link)->rb_left;
0199 else if (ret > 0)
0200 link = &(*link)->rb_right;
0201 else
0202 return cur_group;
0203 }
0204 rb_link_node(&group->node, parent, link);
0205 rb_insert_color(&group->node, &ctx->mcg_table);
0206 return NULL;
0207 }
0208
0209 static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
0210 {
0211 struct mlx4_ib_dev *dev = ctx->dev;
0212 struct rdma_ah_attr ah_attr;
0213 unsigned long flags;
0214
0215 spin_lock_irqsave(&dev->sm_lock, flags);
0216 if (!dev->sm_ah[ctx->port - 1]) {
0217
0218 spin_unlock_irqrestore(&dev->sm_lock, flags);
0219 return -EAGAIN;
0220 }
0221 mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
0222 spin_unlock_irqrestore(&dev->sm_lock, flags);
0223 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
0224 ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
0225 &ah_attr, NULL, 0xffff, mad);
0226 }
0227
0228 static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
0229 struct ib_mad *mad)
0230 {
0231 struct mlx4_ib_dev *dev = ctx->dev;
0232 struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
0233 struct ib_wc wc;
0234 struct rdma_ah_attr ah_attr;
0235
0236
0237 if (!agent)
0238 return -EAGAIN;
0239
0240 rdma_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
0241
0242 if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
0243 return -EINVAL;
0244 wc.sl = 0;
0245 wc.dlid_path_bits = 0;
0246 wc.port_num = ctx->port;
0247 wc.slid = rdma_ah_get_dlid(&ah_attr);
0248 wc.src_qp = 1;
0249 return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
0250 }
0251
0252 static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
0253 {
0254 struct ib_sa_mad mad;
0255 struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
0256 int ret;
0257
0258
0259 memcpy(&mad, sa_mad, sizeof mad);
0260
0261
0262 sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
0263
0264
0265 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
0266 group->last_req_tid = mad.mad_hdr.tid;
0267
0268 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
0269
0270 if (!ret) {
0271
0272 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
0273 msecs_to_jiffies(MAD_TIMEOUT_MS));
0274 }
0275
0276 return ret;
0277 }
0278
0279 static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
0280 {
0281 struct ib_sa_mad mad;
0282 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
0283 int ret;
0284
0285 memset(&mad, 0, sizeof mad);
0286 mad.mad_hdr.base_version = 1;
0287 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
0288 mad.mad_hdr.class_version = 2;
0289 mad.mad_hdr.method = IB_SA_METHOD_DELETE;
0290 mad.mad_hdr.status = cpu_to_be16(0);
0291 mad.mad_hdr.class_specific = cpu_to_be16(0);
0292 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
0293 group->last_req_tid = mad.mad_hdr.tid;
0294 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
0295 mad.mad_hdr.attr_mod = cpu_to_be32(0);
0296 mad.sa_hdr.sm_key = 0x0;
0297 mad.sa_hdr.attr_offset = cpu_to_be16(7);
0298 mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
0299 IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
0300
0301 *sa_data = group->rec;
0302 sa_data->scope_join_state = join_state;
0303
0304 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
0305 if (ret)
0306 group->state = MCAST_IDLE;
0307
0308
0309 if (!ret) {
0310
0311 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
0312 msecs_to_jiffies(MAD_TIMEOUT_MS));
0313 }
0314
0315 return ret;
0316 }
0317
0318 static int send_reply_to_slave(int slave, struct mcast_group *group,
0319 struct ib_sa_mad *req_sa_mad, u16 status)
0320 {
0321 struct ib_sa_mad mad;
0322 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
0323 struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
0324 int ret;
0325
0326 memset(&mad, 0, sizeof mad);
0327 mad.mad_hdr.base_version = 1;
0328 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
0329 mad.mad_hdr.class_version = 2;
0330 mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
0331 mad.mad_hdr.status = cpu_to_be16(status);
0332 mad.mad_hdr.class_specific = cpu_to_be16(0);
0333 mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
0334 *(u8 *)&mad.mad_hdr.tid = 0;
0335 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
0336 mad.mad_hdr.attr_mod = cpu_to_be32(0);
0337 mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
0338 mad.sa_hdr.attr_offset = cpu_to_be16(7);
0339 mad.sa_hdr.comp_mask = 0;
0340
0341 *sa_data = group->rec;
0342
0343
0344 sa_data->scope_join_state &= 0xf0;
0345 sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
0346 memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
0347
0348 ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
0349 return ret;
0350 }
0351
0352 static int check_selector(ib_sa_comp_mask comp_mask,
0353 ib_sa_comp_mask selector_mask,
0354 ib_sa_comp_mask value_mask,
0355 u8 src_value, u8 dst_value)
0356 {
0357 int err;
0358 u8 selector = dst_value >> 6;
0359 dst_value &= 0x3f;
0360 src_value &= 0x3f;
0361
0362 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
0363 return 0;
0364
0365 switch (selector) {
0366 case IB_SA_GT:
0367 err = (src_value <= dst_value);
0368 break;
0369 case IB_SA_LT:
0370 err = (src_value >= dst_value);
0371 break;
0372 case IB_SA_EQ:
0373 err = (src_value != dst_value);
0374 break;
0375 default:
0376 err = 0;
0377 break;
0378 }
0379
0380 return err;
0381 }
0382
0383 static u16 cmp_rec(struct ib_sa_mcmember_data *src,
0384 struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
0385 {
0386
0387
0388
0389
0390 #define MAD_STATUS_REQ_INVALID 0x0200
0391 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
0392 return MAD_STATUS_REQ_INVALID;
0393 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
0394 return MAD_STATUS_REQ_INVALID;
0395 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
0396 IB_SA_MCMEMBER_REC_MTU,
0397 src->mtusel_mtu, dst->mtusel_mtu))
0398 return MAD_STATUS_REQ_INVALID;
0399 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
0400 src->tclass != dst->tclass)
0401 return MAD_STATUS_REQ_INVALID;
0402 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
0403 return MAD_STATUS_REQ_INVALID;
0404 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
0405 IB_SA_MCMEMBER_REC_RATE,
0406 src->ratesel_rate, dst->ratesel_rate))
0407 return MAD_STATUS_REQ_INVALID;
0408 if (check_selector(comp_mask,
0409 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
0410 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
0411 src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
0412 return MAD_STATUS_REQ_INVALID;
0413 if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
0414 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
0415 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
0416 return MAD_STATUS_REQ_INVALID;
0417 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
0418 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
0419 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
0420 return MAD_STATUS_REQ_INVALID;
0421 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
0422 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
0423 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
0424 return MAD_STATUS_REQ_INVALID;
0425 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
0426 (src->scope_join_state & 0xf0) !=
0427 (dst->scope_join_state & 0xf0))
0428 return MAD_STATUS_REQ_INVALID;
0429
0430
0431
0432 return 0;
0433 }
0434
0435
0436
0437 static int release_group(struct mcast_group *group, int from_timeout_handler)
0438 {
0439 struct mlx4_ib_demux_ctx *ctx = group->demux;
0440 int nzgroup;
0441
0442 mutex_lock(&ctx->mcg_table_lock);
0443 mutex_lock(&group->lock);
0444 if (atomic_dec_and_test(&group->refcount)) {
0445 if (!from_timeout_handler) {
0446 if (group->state != MCAST_IDLE &&
0447 !cancel_delayed_work(&group->timeout_work)) {
0448 atomic_inc(&group->refcount);
0449 mutex_unlock(&group->lock);
0450 mutex_unlock(&ctx->mcg_table_lock);
0451 return 0;
0452 }
0453 }
0454
0455 nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
0456 if (nzgroup)
0457 del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
0458 if (!list_empty(&group->pending_list))
0459 mcg_warn_group(group, "releasing a group with non empty pending list\n");
0460 if (nzgroup)
0461 rb_erase(&group->node, &ctx->mcg_table);
0462 list_del_init(&group->mgid0_list);
0463 mutex_unlock(&group->lock);
0464 mutex_unlock(&ctx->mcg_table_lock);
0465 kfree(group);
0466 return 1;
0467 } else {
0468 mutex_unlock(&group->lock);
0469 mutex_unlock(&ctx->mcg_table_lock);
0470 }
0471 return 0;
0472 }
0473
0474 static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
0475 {
0476 int i;
0477
0478 for (i = 0; i < 3; i++, join_state >>= 1)
0479 if (join_state & 0x1)
0480 group->members[i] += inc;
0481 }
0482
0483 static u8 get_leave_state(struct mcast_group *group)
0484 {
0485 u8 leave_state = 0;
0486 int i;
0487
0488 for (i = 0; i < 3; i++)
0489 if (!group->members[i])
0490 leave_state |= (1 << i);
0491
0492 return leave_state & (group->rec.scope_join_state & 0xf);
0493 }
0494
0495 static int join_group(struct mcast_group *group, int slave, u8 join_mask)
0496 {
0497 int ret = 0;
0498 u8 join_state;
0499
0500
0501 join_state = join_mask & (~group->func[slave].join_state);
0502 adjust_membership(group, join_state, 1);
0503 group->func[slave].join_state |= join_state;
0504 if (group->func[slave].state != MCAST_MEMBER && join_state) {
0505 group->func[slave].state = MCAST_MEMBER;
0506 ret = 1;
0507 }
0508 return ret;
0509 }
0510
0511 static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
0512 {
0513 int ret = 0;
0514
0515 adjust_membership(group, leave_state, -1);
0516 group->func[slave].join_state &= ~leave_state;
0517 if (!group->func[slave].join_state) {
0518 group->func[slave].state = MCAST_NOT_MEMBER;
0519 ret = 1;
0520 }
0521 return ret;
0522 }
0523
0524 static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
0525 {
0526 if (group->func[slave].state != MCAST_MEMBER)
0527 return MAD_STATUS_REQ_INVALID;
0528
0529
0530 if (~group->func[slave].join_state & leave_mask)
0531 return MAD_STATUS_REQ_INVALID;
0532
0533 if (!leave_mask)
0534 return MAD_STATUS_REQ_INVALID;
0535
0536 return 0;
0537 }
0538
0539 static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
0540 {
0541 struct delayed_work *delay = to_delayed_work(work);
0542 struct mcast_group *group;
0543 struct mcast_req *req = NULL;
0544
0545 group = container_of(delay, typeof(*group), timeout_work);
0546
0547 mutex_lock(&group->lock);
0548 if (group->state == MCAST_JOIN_SENT) {
0549 if (!list_empty(&group->pending_list)) {
0550 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
0551 list_del(&req->group_list);
0552 list_del(&req->func_list);
0553 --group->func[req->func].num_pend_reqs;
0554 mutex_unlock(&group->lock);
0555 kfree(req);
0556 if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
0557 if (release_group(group, 1))
0558 return;
0559 } else {
0560 kfree(group);
0561 return;
0562 }
0563 mutex_lock(&group->lock);
0564 } else
0565 mcg_warn_group(group, "DRIVER BUG\n");
0566 } else if (group->state == MCAST_LEAVE_SENT) {
0567 if (group->rec.scope_join_state & 0xf)
0568 group->rec.scope_join_state &= 0xf0;
0569 group->state = MCAST_IDLE;
0570 mutex_unlock(&group->lock);
0571 if (release_group(group, 1))
0572 return;
0573 mutex_lock(&group->lock);
0574 } else
0575 mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
0576 group->state = MCAST_IDLE;
0577 atomic_inc(&group->refcount);
0578 if (!queue_work(group->demux->mcg_wq, &group->work))
0579 safe_atomic_dec(&group->refcount);
0580
0581 mutex_unlock(&group->lock);
0582 }
0583
0584 static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
0585 struct mcast_req *req)
0586 {
0587 u16 status;
0588
0589 if (req->clean)
0590 leave_mask = group->func[req->func].join_state;
0591
0592 status = check_leave(group, req->func, leave_mask);
0593 if (!status)
0594 leave_group(group, req->func, leave_mask);
0595
0596 if (!req->clean)
0597 send_reply_to_slave(req->func, group, &req->sa_mad, status);
0598 --group->func[req->func].num_pend_reqs;
0599 list_del(&req->group_list);
0600 list_del(&req->func_list);
0601 kfree(req);
0602 return 1;
0603 }
0604
0605 static int handle_join_req(struct mcast_group *group, u8 join_mask,
0606 struct mcast_req *req)
0607 {
0608 u8 group_join_state = group->rec.scope_join_state & 0xf;
0609 int ref = 0;
0610 u16 status;
0611 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
0612
0613 if (join_mask == (group_join_state & join_mask)) {
0614
0615 status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
0616 if (!status)
0617 join_group(group, req->func, join_mask);
0618
0619 --group->func[req->func].num_pend_reqs;
0620 send_reply_to_slave(req->func, group, &req->sa_mad, status);
0621 list_del(&req->group_list);
0622 list_del(&req->func_list);
0623 kfree(req);
0624 ++ref;
0625 } else {
0626
0627 group->prev_state = group->state;
0628 if (send_join_to_wire(group, &req->sa_mad)) {
0629 --group->func[req->func].num_pend_reqs;
0630 list_del(&req->group_list);
0631 list_del(&req->func_list);
0632 kfree(req);
0633 ref = 1;
0634 group->state = group->prev_state;
0635 } else
0636 group->state = MCAST_JOIN_SENT;
0637 }
0638
0639 return ref;
0640 }
0641
0642 static void mlx4_ib_mcg_work_handler(struct work_struct *work)
0643 {
0644 struct mcast_group *group;
0645 struct mcast_req *req = NULL;
0646 struct ib_sa_mcmember_data *sa_data;
0647 u8 req_join_state;
0648 int rc = 1;
0649 u16 status;
0650 u8 method;
0651
0652 group = container_of(work, typeof(*group), work);
0653
0654 mutex_lock(&group->lock);
0655
0656
0657
0658
0659
0660 if (group->state == MCAST_RESP_READY) {
0661
0662 cancel_delayed_work(&group->timeout_work);
0663 status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
0664 method = group->response_sa_mad.mad_hdr.method;
0665 if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
0666 mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
0667 be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
0668 be64_to_cpu(group->last_req_tid));
0669 group->state = group->prev_state;
0670 goto process_requests;
0671 }
0672 if (status) {
0673 if (!list_empty(&group->pending_list))
0674 req = list_first_entry(&group->pending_list,
0675 struct mcast_req, group_list);
0676 if (method == IB_MGMT_METHOD_GET_RESP) {
0677 if (req) {
0678 send_reply_to_slave(req->func, group, &req->sa_mad, status);
0679 --group->func[req->func].num_pend_reqs;
0680 list_del(&req->group_list);
0681 list_del(&req->func_list);
0682 kfree(req);
0683 ++rc;
0684 } else
0685 mcg_warn_group(group, "no request for failed join\n");
0686 } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
0687 ++rc;
0688 } else {
0689 u8 resp_join_state;
0690 u8 cur_join_state;
0691
0692 resp_join_state = ((struct ib_sa_mcmember_data *)
0693 group->response_sa_mad.data)->scope_join_state & 0xf;
0694 cur_join_state = group->rec.scope_join_state & 0xf;
0695
0696 if (method == IB_MGMT_METHOD_GET_RESP) {
0697
0698 if (!cur_join_state && resp_join_state)
0699 --rc;
0700 } else if (!resp_join_state)
0701 ++rc;
0702 memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
0703 }
0704 group->state = MCAST_IDLE;
0705 }
0706
0707 process_requests:
0708
0709 while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
0710 req = list_first_entry(&group->pending_list, struct mcast_req,
0711 group_list);
0712 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
0713 req_join_state = sa_data->scope_join_state & 0xf;
0714
0715
0716
0717
0718 if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
0719 rc += handle_leave_req(group, req_join_state, req);
0720 else
0721 rc += handle_join_req(group, req_join_state, req);
0722 }
0723
0724
0725 if (group->state == MCAST_IDLE) {
0726 req_join_state = get_leave_state(group);
0727 if (req_join_state) {
0728 group->rec.scope_join_state &= ~req_join_state;
0729 group->prev_state = group->state;
0730 if (send_leave_to_wire(group, req_join_state)) {
0731 group->state = group->prev_state;
0732 ++rc;
0733 } else
0734 group->state = MCAST_LEAVE_SENT;
0735 }
0736 }
0737
0738 if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
0739 goto process_requests;
0740 mutex_unlock(&group->lock);
0741
0742 while (rc--)
0743 release_group(group, 0);
0744 }
0745
0746 static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
0747 __be64 tid,
0748 union ib_gid *new_mgid)
0749 {
0750 struct mcast_group *group = NULL, *cur_group, *n;
0751 struct mcast_req *req;
0752
0753 mutex_lock(&ctx->mcg_table_lock);
0754 list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
0755 mutex_lock(&group->lock);
0756 if (group->last_req_tid == tid) {
0757 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
0758 group->rec.mgid = *new_mgid;
0759 sprintf(group->name, "%016llx%016llx",
0760 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
0761 be64_to_cpu(group->rec.mgid.global.interface_id));
0762 list_del_init(&group->mgid0_list);
0763 cur_group = mcast_insert(ctx, group);
0764 if (cur_group) {
0765
0766 req = list_first_entry(&group->pending_list,
0767 struct mcast_req, group_list);
0768 --group->func[req->func].num_pend_reqs;
0769 list_del(&req->group_list);
0770 list_del(&req->func_list);
0771 kfree(req);
0772 mutex_unlock(&group->lock);
0773 mutex_unlock(&ctx->mcg_table_lock);
0774 release_group(group, 0);
0775 return NULL;
0776 }
0777
0778 atomic_inc(&group->refcount);
0779 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
0780 mutex_unlock(&group->lock);
0781 mutex_unlock(&ctx->mcg_table_lock);
0782 return group;
0783 } else {
0784 struct mcast_req *tmp1, *tmp2;
0785
0786 list_del(&group->mgid0_list);
0787 if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
0788 cancel_delayed_work_sync(&group->timeout_work);
0789
0790 list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
0791 list_del(&tmp1->group_list);
0792 kfree(tmp1);
0793 }
0794 mutex_unlock(&group->lock);
0795 mutex_unlock(&ctx->mcg_table_lock);
0796 kfree(group);
0797 return NULL;
0798 }
0799 }
0800 mutex_unlock(&group->lock);
0801 }
0802 mutex_unlock(&ctx->mcg_table_lock);
0803
0804 return NULL;
0805 }
0806
0807 static ssize_t sysfs_show_group(struct device *dev,
0808 struct device_attribute *attr, char *buf);
0809
0810 static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
0811 union ib_gid *mgid, int create)
0812 {
0813 struct mcast_group *group, *cur_group;
0814 int is_mgid0;
0815 int i;
0816
0817 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
0818 if (!is_mgid0) {
0819 group = mcast_find(ctx, mgid);
0820 if (group)
0821 goto found;
0822 }
0823
0824 if (!create)
0825 return ERR_PTR(-ENOENT);
0826
0827 group = kzalloc(sizeof(*group), GFP_KERNEL);
0828 if (!group)
0829 return ERR_PTR(-ENOMEM);
0830
0831 group->demux = ctx;
0832 group->rec.mgid = *mgid;
0833 INIT_LIST_HEAD(&group->pending_list);
0834 INIT_LIST_HEAD(&group->mgid0_list);
0835 for (i = 0; i < MAX_VFS; ++i)
0836 INIT_LIST_HEAD(&group->func[i].pending);
0837 INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
0838 INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
0839 mutex_init(&group->lock);
0840 sprintf(group->name, "%016llx%016llx",
0841 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
0842 be64_to_cpu(group->rec.mgid.global.interface_id));
0843 sysfs_attr_init(&group->dentry.attr);
0844 group->dentry.show = sysfs_show_group;
0845 group->dentry.store = NULL;
0846 group->dentry.attr.name = group->name;
0847 group->dentry.attr.mode = 0400;
0848 group->state = MCAST_IDLE;
0849
0850 if (is_mgid0) {
0851 list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
0852 goto found;
0853 }
0854
0855 cur_group = mcast_insert(ctx, group);
0856 if (cur_group) {
0857 mcg_warn("group just showed up %s - confused\n", cur_group->name);
0858 kfree(group);
0859 return ERR_PTR(-EINVAL);
0860 }
0861
0862 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
0863
0864 found:
0865 atomic_inc(&group->refcount);
0866 return group;
0867 }
0868
0869 static void queue_req(struct mcast_req *req)
0870 {
0871 struct mcast_group *group = req->group;
0872
0873 atomic_inc(&group->refcount);
0874 atomic_inc(&group->refcount);
0875 list_add_tail(&req->group_list, &group->pending_list);
0876 list_add_tail(&req->func_list, &group->func[req->func].pending);
0877
0878 if (!queue_work(group->demux->mcg_wq, &group->work))
0879 safe_atomic_dec(&group->refcount);
0880 }
0881
0882 int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
0883 struct ib_sa_mad *mad)
0884 {
0885 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0886 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
0887 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
0888 struct mcast_group *group;
0889
0890 switch (mad->mad_hdr.method) {
0891 case IB_MGMT_METHOD_GET_RESP:
0892 case IB_SA_METHOD_DELETE_RESP:
0893 mutex_lock(&ctx->mcg_table_lock);
0894 group = acquire_group(ctx, &rec->mgid, 0);
0895 mutex_unlock(&ctx->mcg_table_lock);
0896 if (IS_ERR(group)) {
0897 if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
0898 __be64 tid = mad->mad_hdr.tid;
0899 *(u8 *)(&tid) = (u8)slave;
0900 group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
0901 } else
0902 group = NULL;
0903 }
0904
0905 if (!group)
0906 return 1;
0907
0908 mutex_lock(&group->lock);
0909 group->response_sa_mad = *mad;
0910 group->prev_state = group->state;
0911 group->state = MCAST_RESP_READY;
0912
0913 atomic_inc(&group->refcount);
0914 if (!queue_work(ctx->mcg_wq, &group->work))
0915 safe_atomic_dec(&group->refcount);
0916 mutex_unlock(&group->lock);
0917 release_group(group, 0);
0918 return 1;
0919 case IB_MGMT_METHOD_SET:
0920 case IB_SA_METHOD_GET_TABLE:
0921 case IB_SA_METHOD_GET_TABLE_RESP:
0922 case IB_SA_METHOD_DELETE:
0923 return 0;
0924 default:
0925 mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
0926 port, mad->mad_hdr.method);
0927 return 1;
0928 }
0929 }
0930
0931 int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
0932 int slave, struct ib_sa_mad *sa_mad)
0933 {
0934 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0935 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
0936 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
0937 struct mcast_group *group;
0938 struct mcast_req *req;
0939 int may_create = 0;
0940
0941 if (ctx->flushing)
0942 return -EAGAIN;
0943
0944 switch (sa_mad->mad_hdr.method) {
0945 case IB_MGMT_METHOD_SET:
0946 may_create = 1;
0947 fallthrough;
0948 case IB_SA_METHOD_DELETE:
0949 req = kzalloc(sizeof *req, GFP_KERNEL);
0950 if (!req)
0951 return -ENOMEM;
0952
0953 req->func = slave;
0954 req->sa_mad = *sa_mad;
0955
0956 mutex_lock(&ctx->mcg_table_lock);
0957 group = acquire_group(ctx, &rec->mgid, may_create);
0958 mutex_unlock(&ctx->mcg_table_lock);
0959 if (IS_ERR(group)) {
0960 kfree(req);
0961 return PTR_ERR(group);
0962 }
0963 mutex_lock(&group->lock);
0964 if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
0965 mutex_unlock(&group->lock);
0966 mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
0967 port, slave, MAX_PEND_REQS_PER_FUNC);
0968 release_group(group, 0);
0969 kfree(req);
0970 return -ENOMEM;
0971 }
0972 ++group->func[slave].num_pend_reqs;
0973 req->group = group;
0974 queue_req(req);
0975 mutex_unlock(&group->lock);
0976 release_group(group, 0);
0977 return 1;
0978 case IB_SA_METHOD_GET_TABLE:
0979 case IB_MGMT_METHOD_GET_RESP:
0980 case IB_SA_METHOD_GET_TABLE_RESP:
0981 case IB_SA_METHOD_DELETE_RESP:
0982 return 0;
0983 default:
0984 mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
0985 port, slave, sa_mad->mad_hdr.method);
0986 return 1;
0987 }
0988 }
0989
0990 static ssize_t sysfs_show_group(struct device *dev,
0991 struct device_attribute *attr, char *buf)
0992 {
0993 struct mcast_group *group =
0994 container_of(attr, struct mcast_group, dentry);
0995 struct mcast_req *req = NULL;
0996 char state_str[40];
0997 char pending_str[40];
0998 int len;
0999 int i;
1000 u32 hoplimit;
1001
1002 if (group->state == MCAST_IDLE)
1003 scnprintf(state_str, sizeof(state_str), "%s",
1004 get_state_string(group->state));
1005 else
1006 scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)",
1007 get_state_string(group->state),
1008 be64_to_cpu(group->last_req_tid));
1009
1010 if (list_empty(&group->pending_list)) {
1011 scnprintf(pending_str, sizeof(pending_str), "No");
1012 } else {
1013 req = list_first_entry(&group->pending_list, struct mcast_req,
1014 group_list);
1015 scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)",
1016 be64_to_cpu(req->sa_mad.mad_hdr.tid));
1017 }
1018
1019 len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
1020 group->rec.scope_join_state & 0xf,
1021 group->members[2],
1022 group->members[1],
1023 group->members[0],
1024 atomic_read(&group->refcount),
1025 pending_str,
1026 state_str);
1027
1028 for (i = 0; i < MAX_VFS; i++) {
1029 if (group->func[i].state == MCAST_MEMBER)
1030 len += sysfs_emit_at(buf, len, "%d[%1x] ", i,
1031 group->func[i].join_state);
1032 }
1033
1034 hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit);
1035 len += sysfs_emit_at(buf, len,
1036 "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n",
1037 be16_to_cpu(group->rec.pkey),
1038 be32_to_cpu(group->rec.qkey),
1039 (group->rec.mtusel_mtu & 0xc0) >> 6,
1040 (group->rec.mtusel_mtu & 0x3f),
1041 group->rec.tclass,
1042 (group->rec.ratesel_rate & 0xc0) >> 6,
1043 (group->rec.ratesel_rate & 0x3f),
1044 (hoplimit & 0xf0000000) >> 28,
1045 (hoplimit & 0x0fffff00) >> 8,
1046 (hoplimit & 0x000000ff),
1047 group->rec.proxy_join);
1048
1049 return len;
1050 }
1051
1052 int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1053 {
1054 char name[20];
1055
1056 atomic_set(&ctx->tid, 0);
1057 sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1058 ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
1059 if (!ctx->mcg_wq)
1060 return -ENOMEM;
1061
1062 mutex_init(&ctx->mcg_table_lock);
1063 ctx->mcg_table = RB_ROOT;
1064 INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1065 ctx->flushing = 0;
1066
1067 return 0;
1068 }
1069
1070 static void force_clean_group(struct mcast_group *group)
1071 {
1072 struct mcast_req *req, *tmp
1073 ;
1074 list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1075 list_del(&req->group_list);
1076 kfree(req);
1077 }
1078 del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1079 rb_erase(&group->node, &group->demux->mcg_table);
1080 kfree(group);
1081 }
1082
1083 static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1084 {
1085 int i;
1086 struct rb_node *p;
1087 struct mcast_group *group;
1088 unsigned long end;
1089 int count;
1090
1091 for (i = 0; i < MAX_VFS; ++i)
1092 clean_vf_mcast(ctx, i);
1093
1094 end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1095 do {
1096 count = 0;
1097 mutex_lock(&ctx->mcg_table_lock);
1098 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1099 ++count;
1100 mutex_unlock(&ctx->mcg_table_lock);
1101 if (!count)
1102 break;
1103
1104 usleep_range(1000, 2000);
1105 } while (time_after(end, jiffies));
1106
1107 flush_workqueue(ctx->mcg_wq);
1108 if (destroy_wq)
1109 destroy_workqueue(ctx->mcg_wq);
1110
1111 mutex_lock(&ctx->mcg_table_lock);
1112 while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1113 group = rb_entry(p, struct mcast_group, node);
1114 if (atomic_read(&group->refcount))
1115 mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
1116 atomic_read(&group->refcount), group);
1117
1118 force_clean_group(group);
1119 }
1120 mutex_unlock(&ctx->mcg_table_lock);
1121 }
1122
1123 struct clean_work {
1124 struct work_struct work;
1125 struct mlx4_ib_demux_ctx *ctx;
1126 int destroy_wq;
1127 };
1128
1129 static void mcg_clean_task(struct work_struct *work)
1130 {
1131 struct clean_work *cw = container_of(work, struct clean_work, work);
1132
1133 _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1134 cw->ctx->flushing = 0;
1135 kfree(cw);
1136 }
1137
1138 void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1139 {
1140 struct clean_work *work;
1141
1142 if (ctx->flushing)
1143 return;
1144
1145 ctx->flushing = 1;
1146
1147 if (destroy_wq) {
1148 _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1149 ctx->flushing = 0;
1150 return;
1151 }
1152
1153 work = kmalloc(sizeof *work, GFP_KERNEL);
1154 if (!work) {
1155 ctx->flushing = 0;
1156 return;
1157 }
1158
1159 work->ctx = ctx;
1160 work->destroy_wq = destroy_wq;
1161 INIT_WORK(&work->work, mcg_clean_task);
1162 queue_work(clean_wq, &work->work);
1163 }
1164
1165 static void build_leave_mad(struct mcast_req *req)
1166 {
1167 struct ib_sa_mad *mad = &req->sa_mad;
1168
1169 mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1170 }
1171
1172
1173 static void clear_pending_reqs(struct mcast_group *group, int vf)
1174 {
1175 struct mcast_req *req, *tmp, *group_first = NULL;
1176 int clear;
1177 int pend = 0;
1178
1179 if (!list_empty(&group->pending_list))
1180 group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1181
1182 list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1183 clear = 1;
1184 if (group_first == req &&
1185 (group->state == MCAST_JOIN_SENT ||
1186 group->state == MCAST_LEAVE_SENT)) {
1187 clear = cancel_delayed_work(&group->timeout_work);
1188 pend = !clear;
1189 group->state = MCAST_IDLE;
1190 }
1191 if (clear) {
1192 --group->func[vf].num_pend_reqs;
1193 list_del(&req->group_list);
1194 list_del(&req->func_list);
1195 kfree(req);
1196 atomic_dec(&group->refcount);
1197 }
1198 }
1199
1200 if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1201 mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1202 list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1203 }
1204 }
1205
1206 static int push_deleteing_req(struct mcast_group *group, int slave)
1207 {
1208 struct mcast_req *req;
1209 struct mcast_req *pend_req;
1210
1211 if (!group->func[slave].join_state)
1212 return 0;
1213
1214 req = kzalloc(sizeof *req, GFP_KERNEL);
1215 if (!req)
1216 return -ENOMEM;
1217
1218 if (!list_empty(&group->func[slave].pending)) {
1219 pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1220 if (pend_req->clean) {
1221 kfree(req);
1222 return 0;
1223 }
1224 }
1225
1226 req->clean = 1;
1227 req->func = slave;
1228 req->group = group;
1229 ++group->func[slave].num_pend_reqs;
1230 build_leave_mad(req);
1231 queue_req(req);
1232 return 0;
1233 }
1234
1235 void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1236 {
1237 struct mcast_group *group;
1238 struct rb_node *p;
1239
1240 mutex_lock(&ctx->mcg_table_lock);
1241 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1242 group = rb_entry(p, struct mcast_group, node);
1243 mutex_lock(&group->lock);
1244 if (atomic_read(&group->refcount)) {
1245
1246 clear_pending_reqs(group, slave);
1247 push_deleteing_req(group, slave);
1248 }
1249 mutex_unlock(&group->lock);
1250 }
1251 mutex_unlock(&ctx->mcg_table_lock);
1252 }
1253
1254
1255 int mlx4_ib_mcg_init(void)
1256 {
1257 clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM);
1258 if (!clean_wq)
1259 return -ENOMEM;
1260
1261 return 0;
1262 }
1263
1264 void mlx4_ib_mcg_destroy(void)
1265 {
1266 destroy_workqueue(clean_wq);
1267 }