0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <rdma/ib_mad.h>
0034 #include <rdma/ib_smi.h>
0035 #include <rdma/ib_sa.h>
0036 #include <rdma/ib_cache.h>
0037
0038 #include <linux/random.h>
0039 #include <linux/mlx4/cmd.h>
0040 #include <linux/gfp.h>
0041 #include <rdma/ib_pma.h>
0042 #include <linux/ip.h>
0043 #include <net/ipv6.h>
0044
0045 #include <linux/mlx4/driver.h>
0046 #include "mlx4_ib.h"
0047
0048 enum {
0049 MLX4_IB_VENDOR_CLASS1 = 0x9,
0050 MLX4_IB_VENDOR_CLASS2 = 0xa
0051 };
0052
0053 #define MLX4_TUN_SEND_WRID_SHIFT 34
0054 #define MLX4_TUN_QPN_SHIFT 32
0055 #define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
0056 #define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
0057
0058 #define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
0059 #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
0060
0061
0062
0063 #define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
0064 #define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
0065 #define NUM_IDX_IN_PKEY_TBL_BLK 32
0066 #define GUID_TBL_ENTRY_SIZE 8
0067 #define GUID_TBL_BLK_NUM_ENTRIES 8
0068 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
0069
0070 struct mlx4_mad_rcv_buf {
0071 struct ib_grh grh;
0072 u8 payload[256];
0073 } __packed;
0074
0075 struct mlx4_mad_snd_buf {
0076 u8 payload[256];
0077 } __packed;
0078
0079 struct mlx4_tunnel_mad {
0080 struct ib_grh grh;
0081 struct mlx4_ib_tunnel_header hdr;
0082 struct ib_mad mad;
0083 } __packed;
0084
0085 struct mlx4_rcv_tunnel_mad {
0086 struct mlx4_rcv_tunnel_hdr hdr;
0087 struct ib_grh grh;
0088 struct ib_mad mad;
0089 } __packed;
0090
0091 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num);
0092 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num);
0093 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
0094 int block, u32 change_bitmap);
0095
0096 __be64 mlx4_ib_gen_node_guid(void)
0097 {
0098 #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
0099 return cpu_to_be64(NODE_GUID_HI | prandom_u32());
0100 }
0101
0102 __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
0103 {
0104 return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
0105 cpu_to_be64(0xff00000000000000LL);
0106 }
0107
0108 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
0109 int port, const struct ib_wc *in_wc,
0110 const struct ib_grh *in_grh,
0111 const void *in_mad, void *response_mad)
0112 {
0113 struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
0114 void *inbox;
0115 int err;
0116 u32 in_modifier = port;
0117 u8 op_modifier = 0;
0118
0119 inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
0120 if (IS_ERR(inmailbox))
0121 return PTR_ERR(inmailbox);
0122 inbox = inmailbox->buf;
0123
0124 outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
0125 if (IS_ERR(outmailbox)) {
0126 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
0127 return PTR_ERR(outmailbox);
0128 }
0129
0130 memcpy(inbox, in_mad, 256);
0131
0132
0133
0134
0135
0136 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
0137 op_modifier |= 0x1;
0138 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
0139 op_modifier |= 0x2;
0140 if (mlx4_is_mfunc(dev->dev) &&
0141 (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
0142 op_modifier |= 0x8;
0143
0144 if (in_wc) {
0145 struct {
0146 __be32 my_qpn;
0147 u32 reserved1;
0148 __be32 rqpn;
0149 u8 sl;
0150 u8 g_path;
0151 u16 reserved2[2];
0152 __be16 pkey;
0153 u32 reserved3[11];
0154 u8 grh[40];
0155 } *ext_info;
0156
0157 memset(inbox + 256, 0, 256);
0158 ext_info = inbox + 256;
0159
0160 ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
0161 ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
0162 ext_info->sl = in_wc->sl << 4;
0163 ext_info->g_path = in_wc->dlid_path_bits |
0164 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
0165 ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
0166
0167 if (in_grh)
0168 memcpy(ext_info->grh, in_grh, 40);
0169
0170 op_modifier |= 0x4;
0171
0172 in_modifier |= ib_lid_cpu16(in_wc->slid) << 16;
0173 }
0174
0175 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
0176 mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
0177 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
0178 (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
0179
0180 if (!err)
0181 memcpy(response_mad, outmailbox->buf, 256);
0182
0183 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
0184 mlx4_free_cmd_mailbox(dev->dev, outmailbox);
0185
0186 return err;
0187 }
0188
0189 static void update_sm_ah(struct mlx4_ib_dev *dev, u32 port_num, u16 lid, u8 sl)
0190 {
0191 struct ib_ah *new_ah;
0192 struct rdma_ah_attr ah_attr;
0193 unsigned long flags;
0194
0195 if (!dev->send_agent[port_num - 1][0])
0196 return;
0197
0198 memset(&ah_attr, 0, sizeof ah_attr);
0199 ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
0200 rdma_ah_set_dlid(&ah_attr, lid);
0201 rdma_ah_set_sl(&ah_attr, sl);
0202 rdma_ah_set_port_num(&ah_attr, port_num);
0203
0204 new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
0205 &ah_attr, 0);
0206 if (IS_ERR(new_ah))
0207 return;
0208
0209 spin_lock_irqsave(&dev->sm_lock, flags);
0210 if (dev->sm_ah[port_num - 1])
0211 rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
0212 dev->sm_ah[port_num - 1] = new_ah;
0213 spin_unlock_irqrestore(&dev->sm_lock, flags);
0214 }
0215
0216
0217
0218
0219
0220 static void smp_snoop(struct ib_device *ibdev, u32 port_num,
0221 const struct ib_mad *mad, u16 prev_lid)
0222 {
0223 struct ib_port_info *pinfo;
0224 u16 lid;
0225 __be16 *base;
0226 u32 bn, pkey_change_bitmap;
0227 int i;
0228
0229
0230 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0231 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
0232 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
0233 mad->mad_hdr.method == IB_MGMT_METHOD_SET)
0234 switch (mad->mad_hdr.attr_id) {
0235 case IB_SMP_ATTR_PORT_INFO:
0236 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
0237 return;
0238 pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
0239 lid = be16_to_cpu(pinfo->lid);
0240
0241 update_sm_ah(dev, port_num,
0242 be16_to_cpu(pinfo->sm_lid),
0243 pinfo->neighbormtu_mastersmsl & 0xf);
0244
0245 if (pinfo->clientrereg_resv_subnetto & 0x80)
0246 handle_client_rereg_event(dev, port_num);
0247
0248 if (prev_lid != lid)
0249 handle_lid_change_event(dev, port_num);
0250 break;
0251
0252 case IB_SMP_ATTR_PKEY_TABLE:
0253 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
0254 return;
0255 if (!mlx4_is_mfunc(dev->dev)) {
0256 mlx4_ib_dispatch_event(dev, port_num,
0257 IB_EVENT_PKEY_CHANGE);
0258 break;
0259 }
0260
0261
0262
0263
0264 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
0265 base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
0266 pkey_change_bitmap = 0;
0267 for (i = 0; i < 32; i++) {
0268 pr_debug("PKEY[%d] = x%x\n",
0269 i + bn*32, be16_to_cpu(base[i]));
0270 if (be16_to_cpu(base[i]) !=
0271 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
0272 pkey_change_bitmap |= (1 << i);
0273 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
0274 be16_to_cpu(base[i]);
0275 }
0276 }
0277 pr_debug("PKEY Change event: port=%u, "
0278 "block=0x%x, change_bitmap=0x%x\n",
0279 port_num, bn, pkey_change_bitmap);
0280
0281 if (pkey_change_bitmap) {
0282 mlx4_ib_dispatch_event(dev, port_num,
0283 IB_EVENT_PKEY_CHANGE);
0284 if (!dev->sriov.is_going_down)
0285 __propagate_pkey_ev(dev, port_num, bn,
0286 pkey_change_bitmap);
0287 }
0288 break;
0289
0290 case IB_SMP_ATTR_GUID_INFO:
0291 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
0292 return;
0293
0294 if (!mlx4_is_master(dev->dev))
0295 mlx4_ib_dispatch_event(dev, port_num,
0296 IB_EVENT_GID_CHANGE);
0297
0298 if (mlx4_is_master(dev->dev) &&
0299 !dev->sriov.is_going_down) {
0300 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
0301 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
0302 (u8 *)(&((struct ib_smp *)mad)->data));
0303 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
0304 (u8 *)(&((struct ib_smp *)mad)->data));
0305 }
0306 break;
0307
0308 case IB_SMP_ATTR_SL_TO_VL_TABLE:
0309
0310
0311
0312 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV &&
0313 dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)
0314 return;
0315 if (!mlx4_is_slave(dev->dev)) {
0316 union sl2vl_tbl_to_u64 sl2vl64;
0317 int jj;
0318
0319 for (jj = 0; jj < 8; jj++) {
0320 sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj];
0321 pr_debug("port %u, sl2vl[%d] = %02x\n",
0322 port_num, jj, sl2vl64.sl8[jj]);
0323 }
0324 atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64);
0325 }
0326 break;
0327
0328 default:
0329 break;
0330 }
0331 }
0332
0333 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
0334 int block, u32 change_bitmap)
0335 {
0336 int i, ix, slave, err;
0337 int have_event = 0;
0338
0339 for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
0340 if (slave == mlx4_master_func_num(dev->dev))
0341 continue;
0342 if (!mlx4_is_slave_active(dev->dev, slave))
0343 continue;
0344
0345 have_event = 0;
0346 for (i = 0; i < 32; i++) {
0347 if (!(change_bitmap & (1 << i)))
0348 continue;
0349 for (ix = 0;
0350 ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
0351 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
0352 [ix] == i + 32 * block) {
0353 err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
0354 pr_debug("propagate_pkey_ev: slave %d,"
0355 " port %d, ix %d (%d)\n",
0356 slave, port_num, ix, err);
0357 have_event = 1;
0358 break;
0359 }
0360 }
0361 if (have_event)
0362 break;
0363 }
0364 }
0365 }
0366
0367 static void node_desc_override(struct ib_device *dev,
0368 struct ib_mad *mad)
0369 {
0370 unsigned long flags;
0371
0372 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
0373 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
0374 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
0375 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
0376 spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
0377 memcpy(((struct ib_smp *) mad)->data, dev->node_desc,
0378 IB_DEVICE_NODE_DESC_MAX);
0379 spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
0380 }
0381 }
0382
0383 static void forward_trap(struct mlx4_ib_dev *dev, u32 port_num,
0384 const struct ib_mad *mad)
0385 {
0386 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
0387 struct ib_mad_send_buf *send_buf;
0388 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
0389 int ret;
0390 unsigned long flags;
0391
0392 if (agent) {
0393 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
0394 IB_MGMT_MAD_DATA, GFP_ATOMIC,
0395 IB_MGMT_BASE_VERSION);
0396 if (IS_ERR(send_buf))
0397 return;
0398
0399
0400
0401
0402
0403
0404 spin_lock_irqsave(&dev->sm_lock, flags);
0405 memcpy(send_buf->mad, mad, sizeof *mad);
0406 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
0407 ret = ib_post_send_mad(send_buf, NULL);
0408 else
0409 ret = -EINVAL;
0410 spin_unlock_irqrestore(&dev->sm_lock, flags);
0411
0412 if (ret)
0413 ib_free_send_mad(send_buf);
0414 }
0415 }
0416
0417 static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
0418 struct ib_sa_mad *sa_mad)
0419 {
0420 int ret = 0;
0421
0422
0423 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
0424 case IB_SA_ATTR_MC_MEMBER_REC:
0425 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
0426 break;
0427 default:
0428 break;
0429 }
0430 return ret;
0431 }
0432
0433 int mlx4_ib_find_real_gid(struct ib_device *ibdev, u32 port, __be64 guid)
0434 {
0435 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0436 int i;
0437
0438 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
0439 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
0440 return i;
0441 }
0442 return -1;
0443 }
0444
0445
0446 static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
0447 u32 port, u16 pkey, u16 *ix)
0448 {
0449 int i, ret;
0450 u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
0451 u16 slot_pkey;
0452
0453 if (slave == mlx4_master_func_num(dev->dev))
0454 return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
0455
0456 unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
0457
0458 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
0459 if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
0460 continue;
0461
0462 pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
0463
0464 ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
0465 if (ret)
0466 continue;
0467 if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
0468 if (slot_pkey & 0x8000) {
0469 *ix = (u16) pkey_ix;
0470 return 0;
0471 } else {
0472
0473 if (partial_ix == 0xFF)
0474 partial_ix = pkey_ix;
0475 }
0476 }
0477 }
0478
0479 if (partial_ix < 0xFF) {
0480 *ix = (u16) partial_ix;
0481 return 0;
0482 }
0483
0484 return -EINVAL;
0485 }
0486
0487 static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
0488 union ib_gid *dgid)
0489 {
0490 int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh);
0491 enum rdma_network_type net_type;
0492
0493 if (version == 4)
0494 net_type = RDMA_NETWORK_IPV4;
0495 else if (version == 6)
0496 net_type = RDMA_NETWORK_IPV6;
0497 else
0498 return -EINVAL;
0499
0500 return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
0501 sgid, dgid);
0502 }
0503
0504 static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
0505 {
0506 int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
0507
0508 return (qpn >= proxy_start && qpn <= proxy_start + 1);
0509 }
0510
0511 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u32 port,
0512 enum ib_qp_type dest_qpt, struct ib_wc *wc,
0513 struct ib_grh *grh, struct ib_mad *mad)
0514 {
0515 struct ib_sge list;
0516 struct ib_ud_wr wr;
0517 const struct ib_send_wr *bad_wr;
0518 struct mlx4_ib_demux_pv_ctx *tun_ctx;
0519 struct mlx4_ib_demux_pv_qp *tun_qp;
0520 struct mlx4_rcv_tunnel_mad *tun_mad;
0521 struct rdma_ah_attr attr;
0522 struct ib_ah *ah;
0523 struct ib_qp *src_qp = NULL;
0524 unsigned tun_tx_ix = 0;
0525 int dqpn;
0526 int ret = 0;
0527 u16 tun_pkey_ix;
0528 u16 cached_pkey;
0529 u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
0530
0531 if (dest_qpt > IB_QPT_GSI) {
0532 pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
0533 return -EINVAL;
0534 }
0535
0536 tun_ctx = dev->sriov.demux[port-1].tun[slave];
0537
0538
0539 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
0540 return -EAGAIN;
0541
0542 if (!dest_qpt)
0543 tun_qp = &tun_ctx->qp[0];
0544 else
0545 tun_qp = &tun_ctx->qp[1];
0546
0547
0548 if (dest_qpt) {
0549 u16 pkey_ix;
0550 ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
0551 if (ret) {
0552 pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
0553 is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
0554 wc->pkey_index, ret);
0555 return -EINVAL;
0556 }
0557
0558 ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
0559 if (ret) {
0560 pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
0561 is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
0562 cached_pkey, ret);
0563 return -EINVAL;
0564 }
0565 tun_pkey_ix = pkey_ix;
0566 } else
0567 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
0568
0569 dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
0570
0571
0572 src_qp = tun_qp->qp;
0573
0574
0575
0576 memset(&attr, 0, sizeof attr);
0577 attr.type = rdma_ah_find_type(&dev->ib_dev, port);
0578
0579 rdma_ah_set_port_num(&attr, port);
0580 if (is_eth) {
0581 union ib_gid sgid;
0582 union ib_gid dgid;
0583
0584 if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
0585 return -EINVAL;
0586 rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
0587 }
0588 ah = rdma_create_ah(tun_ctx->pd, &attr, 0);
0589 if (IS_ERR(ah))
0590 return -ENOMEM;
0591
0592
0593 spin_lock(&tun_qp->tx_lock);
0594 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
0595 (MLX4_NUM_TUNNEL_BUFS - 1))
0596 ret = -EAGAIN;
0597 else
0598 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
0599 spin_unlock(&tun_qp->tx_lock);
0600 if (ret)
0601 goto end;
0602
0603 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
0604 if (tun_qp->tx_ring[tun_tx_ix].ah)
0605 rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
0606 tun_qp->tx_ring[tun_tx_ix].ah = ah;
0607 ib_dma_sync_single_for_cpu(&dev->ib_dev,
0608 tun_qp->tx_ring[tun_tx_ix].buf.map,
0609 sizeof (struct mlx4_rcv_tunnel_mad),
0610 DMA_TO_DEVICE);
0611
0612
0613 if (grh)
0614 memcpy(&tun_mad->grh, grh, sizeof *grh);
0615 memcpy(&tun_mad->mad, mad, sizeof *mad);
0616
0617
0618 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
0619 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
0620 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
0621
0622 if (is_eth) {
0623 u16 vlan = 0;
0624 if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
0625 NULL)) {
0626
0627 if (vlan != wc->vlan_id)
0628
0629
0630
0631 goto out;
0632 else
0633
0634
0635
0636 vlan = 0xffff;
0637 } else {
0638 vlan = wc->vlan_id;
0639 }
0640
0641 tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
0642 memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
0643 memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
0644 } else {
0645 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
0646 tun_mad->hdr.slid_mac_47_32 = ib_lid_be16(wc->slid);
0647 }
0648
0649 ib_dma_sync_single_for_device(&dev->ib_dev,
0650 tun_qp->tx_ring[tun_tx_ix].buf.map,
0651 sizeof (struct mlx4_rcv_tunnel_mad),
0652 DMA_TO_DEVICE);
0653
0654 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
0655 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
0656 list.lkey = tun_ctx->pd->local_dma_lkey;
0657
0658 wr.ah = ah;
0659 wr.port_num = port;
0660 wr.remote_qkey = IB_QP_SET_QKEY;
0661 wr.remote_qpn = dqpn;
0662 wr.wr.next = NULL;
0663 wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
0664 wr.wr.sg_list = &list;
0665 wr.wr.num_sge = 1;
0666 wr.wr.opcode = IB_WR_SEND;
0667 wr.wr.send_flags = IB_SEND_SIGNALED;
0668
0669 ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
0670 if (!ret)
0671 return 0;
0672 out:
0673 spin_lock(&tun_qp->tx_lock);
0674 tun_qp->tx_ix_tail++;
0675 spin_unlock(&tun_qp->tx_lock);
0676 tun_qp->tx_ring[tun_tx_ix].ah = NULL;
0677 end:
0678 rdma_destroy_ah(ah, 0);
0679 return ret;
0680 }
0681
0682 static int mlx4_ib_demux_mad(struct ib_device *ibdev, u32 port,
0683 struct ib_wc *wc, struct ib_grh *grh,
0684 struct ib_mad *mad)
0685 {
0686 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0687 int err, other_port;
0688 int slave = -1;
0689 u8 *slave_id;
0690 int is_eth = 0;
0691
0692 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
0693 is_eth = 0;
0694 else
0695 is_eth = 1;
0696
0697 if (is_eth) {
0698 union ib_gid dgid;
0699 union ib_gid sgid;
0700
0701 if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
0702 return -EINVAL;
0703 if (!(wc->wc_flags & IB_WC_GRH)) {
0704 mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
0705 return -EINVAL;
0706 }
0707 if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
0708 mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
0709 return -EINVAL;
0710 }
0711 err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave);
0712 if (err && mlx4_is_mf_bonded(dev->dev)) {
0713 other_port = (port == 1) ? 2 : 1;
0714 err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave);
0715 if (!err) {
0716 port = other_port;
0717 pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
0718 slave, grh->dgid.raw, port, other_port);
0719 }
0720 }
0721 if (err) {
0722 mlx4_ib_warn(ibdev, "failed matching grh\n");
0723 return -ENOENT;
0724 }
0725 if (slave >= dev->dev->caps.sqp_demux) {
0726 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
0727 slave, dev->dev->caps.sqp_demux);
0728 return -ENOENT;
0729 }
0730
0731 if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
0732 return 0;
0733
0734 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
0735 if (err)
0736 pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
0737 is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
0738 slave, err);
0739 return 0;
0740 }
0741
0742
0743 slave = mlx4_master_func_num(dev->dev);
0744
0745
0746 if (mad->mad_hdr.method & 0x80) {
0747 slave_id = (u8 *) &mad->mad_hdr.tid;
0748 slave = *slave_id;
0749 if (slave != 255)
0750 *slave_id = 0;
0751 }
0752
0753
0754 if (wc->wc_flags & IB_WC_GRH) {
0755 if (grh->dgid.global.interface_id ==
0756 cpu_to_be64(IB_SA_WELL_KNOWN_GUID) &&
0757 grh->dgid.global.subnet_prefix == cpu_to_be64(
0758 atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) {
0759 slave = 0;
0760 } else {
0761 slave = mlx4_ib_find_real_gid(ibdev, port,
0762 grh->dgid.global.interface_id);
0763 if (slave < 0) {
0764 mlx4_ib_warn(ibdev, "failed matching grh\n");
0765 return -ENOENT;
0766 }
0767 }
0768 }
0769
0770 switch (mad->mad_hdr.mgmt_class) {
0771 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
0772 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
0773
0774 if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
0775 if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
0776 return -EPERM;
0777
0778 if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
0779 mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
0780 slave, mad->mad_hdr.mgmt_class,
0781 mad->mad_hdr.method);
0782 return -EINVAL;
0783 }
0784 }
0785 break;
0786 case IB_MGMT_CLASS_SUBN_ADM:
0787 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
0788 (struct ib_sa_mad *) mad))
0789 return 0;
0790 break;
0791 case IB_MGMT_CLASS_CM:
0792 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
0793 return 0;
0794 break;
0795 case IB_MGMT_CLASS_DEVICE_MGMT:
0796 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
0797 return 0;
0798 break;
0799 default:
0800
0801 if (slave != mlx4_master_func_num(dev->dev)) {
0802 pr_debug("dropping unsupported ingress mad from class:%d "
0803 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
0804 return 0;
0805 }
0806 }
0807
0808 if (slave >= dev->dev->caps.sqp_demux) {
0809 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
0810 slave, dev->dev->caps.sqp_demux);
0811 return -ENOENT;
0812 }
0813
0814 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
0815 if (err)
0816 pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
0817 is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
0818 slave, err);
0819 return 0;
0820 }
0821
0822 static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
0823 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
0824 const struct ib_mad *in_mad, struct ib_mad *out_mad)
0825 {
0826 u16 slid, prev_lid = 0;
0827 int err;
0828 struct ib_port_attr pattr;
0829
0830 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
0831
0832 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
0833 forward_trap(to_mdev(ibdev), port_num, in_mad);
0834 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
0835 }
0836
0837 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
0838 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
0839 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
0840 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
0841 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
0842 return IB_MAD_RESULT_SUCCESS;
0843
0844
0845
0846
0847 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
0848 return IB_MAD_RESULT_SUCCESS;
0849 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
0850 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
0851 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
0852 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
0853 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
0854 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
0855 return IB_MAD_RESULT_SUCCESS;
0856 } else
0857 return IB_MAD_RESULT_SUCCESS;
0858
0859 if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
0860 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
0861 in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
0862 in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
0863 !ib_query_port(ibdev, port_num, &pattr))
0864 prev_lid = ib_lid_cpu16(pattr.lid);
0865
0866 err = mlx4_MAD_IFC(to_mdev(ibdev),
0867 (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
0868 (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
0869 MLX4_MAD_IFC_NET_VIEW,
0870 port_num, in_wc, in_grh, in_mad, out_mad);
0871 if (err)
0872 return IB_MAD_RESULT_FAILURE;
0873
0874 if (!out_mad->mad_hdr.status) {
0875 smp_snoop(ibdev, port_num, in_mad, prev_lid);
0876
0877 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
0878 node_desc_override(ibdev, out_mad);
0879 }
0880
0881
0882 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
0883 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
0884
0885 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
0886
0887 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
0888
0889 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
0890 }
0891
0892 static void edit_counter(struct mlx4_counter *cnt, void *counters,
0893 __be16 attr_id)
0894 {
0895 switch (attr_id) {
0896 case IB_PMA_PORT_COUNTERS:
0897 {
0898 struct ib_pma_portcounters *pma_cnt =
0899 (struct ib_pma_portcounters *)counters;
0900
0901 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
0902 (be64_to_cpu(cnt->tx_bytes) >> 2));
0903 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
0904 (be64_to_cpu(cnt->rx_bytes) >> 2));
0905 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
0906 be64_to_cpu(cnt->tx_frames));
0907 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
0908 be64_to_cpu(cnt->rx_frames));
0909 break;
0910 }
0911 case IB_PMA_PORT_COUNTERS_EXT:
0912 {
0913 struct ib_pma_portcounters_ext *pma_cnt_ext =
0914 (struct ib_pma_portcounters_ext *)counters;
0915
0916 pma_cnt_ext->port_xmit_data =
0917 cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
0918 pma_cnt_ext->port_rcv_data =
0919 cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
0920 pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
0921 pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
0922 break;
0923 }
0924 }
0925 }
0926
0927 static int iboe_process_mad_port_info(void *out_mad)
0928 {
0929 struct ib_class_port_info cpi = {};
0930
0931 cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
0932 memcpy(out_mad, &cpi, sizeof(cpi));
0933 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
0934 }
0935
0936 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags,
0937 u32 port_num, const struct ib_wc *in_wc,
0938 const struct ib_grh *in_grh,
0939 const struct ib_mad *in_mad, struct ib_mad *out_mad)
0940 {
0941 struct mlx4_counter counter_stats;
0942 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0943 struct counter_index *tmp_counter;
0944 int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
0945
0946 if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
0947 return -EINVAL;
0948
0949 if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
0950 return iboe_process_mad_port_info((void *)(out_mad->data + 40));
0951
0952 memset(&counter_stats, 0, sizeof(counter_stats));
0953 mutex_lock(&dev->counters_table[port_num - 1].mutex);
0954 list_for_each_entry(tmp_counter,
0955 &dev->counters_table[port_num - 1].counters_list,
0956 list) {
0957 err = mlx4_get_counter_stats(dev->dev,
0958 tmp_counter->index,
0959 &counter_stats, 0);
0960 if (err) {
0961 err = IB_MAD_RESULT_FAILURE;
0962 stats_avail = 0;
0963 break;
0964 }
0965 stats_avail = 1;
0966 }
0967 mutex_unlock(&dev->counters_table[port_num - 1].mutex);
0968 if (stats_avail) {
0969 switch (counter_stats.counter_mode & 0xf) {
0970 case 0:
0971 edit_counter(&counter_stats,
0972 (void *)(out_mad->data + 40),
0973 in_mad->mad_hdr.attr_id);
0974 err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
0975 break;
0976 default:
0977 err = IB_MAD_RESULT_FAILURE;
0978 }
0979 }
0980
0981 return err;
0982 }
0983
0984 int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
0985 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
0986 const struct ib_mad *in, struct ib_mad *out,
0987 size_t *out_mad_size, u16 *out_mad_pkey_index)
0988 {
0989 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0990 enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
0991
0992
0993
0994
0995 if (link == IB_LINK_LAYER_INFINIBAND) {
0996 if (mlx4_is_slave(dev->dev) &&
0997 (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
0998 (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
0999 in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
1000 in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
1001 return iboe_process_mad(ibdev, mad_flags, port_num,
1002 in_wc, in_grh, in, out);
1003
1004 return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
1005 in, out);
1006 }
1007
1008 if (link == IB_LINK_LAYER_ETHERNET)
1009 return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
1010 in_grh, in, out);
1011
1012 return -EINVAL;
1013 }
1014
1015 static void send_handler(struct ib_mad_agent *agent,
1016 struct ib_mad_send_wc *mad_send_wc)
1017 {
1018 if (mad_send_wc->send_buf->context[0])
1019 rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0);
1020 ib_free_send_mad(mad_send_wc->send_buf);
1021 }
1022
1023 int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
1024 {
1025 struct ib_mad_agent *agent;
1026 int p, q;
1027 int ret;
1028 enum rdma_link_layer ll;
1029
1030 for (p = 0; p < dev->num_ports; ++p) {
1031 ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
1032 for (q = 0; q <= 1; ++q) {
1033 if (ll == IB_LINK_LAYER_INFINIBAND) {
1034 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
1035 q ? IB_QPT_GSI : IB_QPT_SMI,
1036 NULL, 0, send_handler,
1037 NULL, NULL, 0);
1038 if (IS_ERR(agent)) {
1039 ret = PTR_ERR(agent);
1040 goto err;
1041 }
1042 dev->send_agent[p][q] = agent;
1043 } else
1044 dev->send_agent[p][q] = NULL;
1045 }
1046 }
1047
1048 return 0;
1049
1050 err:
1051 for (p = 0; p < dev->num_ports; ++p)
1052 for (q = 0; q <= 1; ++q)
1053 if (dev->send_agent[p][q])
1054 ib_unregister_mad_agent(dev->send_agent[p][q]);
1055
1056 return ret;
1057 }
1058
1059 void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
1060 {
1061 struct ib_mad_agent *agent;
1062 int p, q;
1063
1064 for (p = 0; p < dev->num_ports; ++p) {
1065 for (q = 0; q <= 1; ++q) {
1066 agent = dev->send_agent[p][q];
1067 if (agent) {
1068 dev->send_agent[p][q] = NULL;
1069 ib_unregister_mad_agent(agent);
1070 }
1071 }
1072
1073 if (dev->sm_ah[p])
1074 rdma_destroy_ah(dev->sm_ah[p], 0);
1075 }
1076 }
1077
1078 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num)
1079 {
1080 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
1081
1082 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
1083 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
1084 MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
1085 }
1086
1087 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num)
1088 {
1089
1090 if (mlx4_is_master(dev->dev)) {
1091 mlx4_ib_invalidate_all_guid_record(dev, port_num);
1092
1093 if (!dev->sriov.is_going_down) {
1094 mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
1095 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
1096 MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
1097 }
1098 }
1099
1100
1101
1102
1103
1104 if (!mlx4_is_slave(dev->dev) &&
1105 dev->dev->flags & MLX4_FLAG_SECURE_HOST &&
1106 !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) {
1107 if (mlx4_is_master(dev->dev))
1108
1109
1110
1111
1112 mlx4_ib_sl2vl_update(dev, port_num);
1113 else
1114 mlx4_sched_ib_sl2vl_update_work(dev, port_num);
1115 }
1116 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
1117 }
1118
1119 static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
1120 struct mlx4_eqe *eqe)
1121 {
1122 __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
1123 GET_MASK_FROM_EQE(eqe));
1124 }
1125
1126 static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u32 port_num,
1127 u32 guid_tbl_blk_num, u32 change_bitmap)
1128 {
1129 struct ib_smp *in_mad = NULL;
1130 struct ib_smp *out_mad = NULL;
1131 u16 i;
1132
1133 if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
1134 return;
1135
1136 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
1137 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1138 if (!in_mad || !out_mad)
1139 goto out;
1140
1141 guid_tbl_blk_num *= 4;
1142
1143 for (i = 0; i < 4; i++) {
1144 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
1145 continue;
1146 memset(in_mad, 0, sizeof *in_mad);
1147 memset(out_mad, 0, sizeof *out_mad);
1148
1149 in_mad->base_version = 1;
1150 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
1151 in_mad->class_version = 1;
1152 in_mad->method = IB_MGMT_METHOD_GET;
1153 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
1154 in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
1155
1156 if (mlx4_MAD_IFC(dev,
1157 MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
1158 port_num, NULL, NULL, in_mad, out_mad)) {
1159 mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
1160 goto out;
1161 }
1162
1163 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
1164 port_num,
1165 (u8 *)(&((struct ib_smp *)out_mad)->data));
1166 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
1167 port_num,
1168 (u8 *)(&((struct ib_smp *)out_mad)->data));
1169 }
1170
1171 out:
1172 kfree(in_mad);
1173 kfree(out_mad);
1174 return;
1175 }
1176
1177 void handle_port_mgmt_change_event(struct work_struct *work)
1178 {
1179 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
1180 struct mlx4_ib_dev *dev = ew->ib_dev;
1181 struct mlx4_eqe *eqe = &(ew->ib_eqe);
1182 u32 port = eqe->event.port_mgmt_change.port;
1183 u32 changed_attr;
1184 u32 tbl_block;
1185 u32 change_bitmap;
1186
1187 switch (eqe->subtype) {
1188 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
1189 changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
1190
1191
1192
1193 if (changed_attr & MSTR_SM_CHANGE_MASK) {
1194 u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
1195 u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
1196 update_sm_ah(dev, port, lid, sl);
1197 }
1198
1199
1200 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
1201 handle_lid_change_event(dev, port);
1202
1203
1204 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
1205 if (mlx4_is_master(dev->dev)) {
1206 union ib_gid gid;
1207 int err = 0;
1208
1209 if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
1210 err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
1211 else
1212 gid.global.subnet_prefix =
1213 eqe->event.port_mgmt_change.params.port_info.gid_prefix;
1214 if (err) {
1215 pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
1216 port, err);
1217 } else {
1218 pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
1219 port,
1220 (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
1221 be64_to_cpu(gid.global.subnet_prefix));
1222 atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
1223 be64_to_cpu(gid.global.subnet_prefix));
1224 }
1225 }
1226 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
1227
1228 if (mlx4_is_master(dev->dev))
1229 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
1230 MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
1231 }
1232
1233 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
1234 handle_client_rereg_event(dev, port);
1235 break;
1236
1237 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
1238 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
1239 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
1240 propagate_pkey_ev(dev, port, eqe);
1241 break;
1242 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
1243
1244 if (!mlx4_is_master(dev->dev))
1245 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
1246
1247 else if (!dev->sriov.is_going_down) {
1248 tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
1249 change_bitmap = GET_MASK_FROM_EQE(eqe);
1250 handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
1251 }
1252 break;
1253
1254 case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP:
1255
1256
1257
1258 if (!mlx4_is_slave(dev->dev)) {
1259 union sl2vl_tbl_to_u64 sl2vl64;
1260 int jj;
1261
1262 for (jj = 0; jj < 8; jj++) {
1263 sl2vl64.sl8[jj] =
1264 eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj];
1265 pr_debug("port %u, sl2vl[%d] = %02x\n",
1266 port, jj, sl2vl64.sl8[jj]);
1267 }
1268 atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64);
1269 }
1270 break;
1271 default:
1272 pr_warn("Unsupported subtype 0x%x for "
1273 "Port Management Change event\n", eqe->subtype);
1274 }
1275
1276 kfree(ew);
1277 }
1278
1279 void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u32 port_num,
1280 enum ib_event_type type)
1281 {
1282 struct ib_event event;
1283
1284 event.device = &dev->ib_dev;
1285 event.element.port_num = port_num;
1286 event.event = type;
1287
1288 ib_dispatch_event(&event);
1289 }
1290
1291 static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1292 {
1293 unsigned long flags;
1294 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1295 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1296 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1297 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1298 queue_work(ctx->wq, &ctx->work);
1299 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1300 }
1301
1302 static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
1303 {
1304 unsigned long flags;
1305 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1306 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1307
1308 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1309 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1310 queue_work(ctx->wi_wq, &ctx->work);
1311 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1312 }
1313
1314 static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1315 struct mlx4_ib_demux_pv_qp *tun_qp,
1316 int index)
1317 {
1318 struct ib_sge sg_list;
1319 struct ib_recv_wr recv_wr;
1320 const struct ib_recv_wr *bad_recv_wr;
1321 int size;
1322
1323 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1324 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1325
1326 sg_list.addr = tun_qp->ring[index].map;
1327 sg_list.length = size;
1328 sg_list.lkey = ctx->pd->local_dma_lkey;
1329
1330 recv_wr.next = NULL;
1331 recv_wr.sg_list = &sg_list;
1332 recv_wr.num_sge = 1;
1333 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1334 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
1335 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1336 size, DMA_FROM_DEVICE);
1337 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1338 }
1339
1340 static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1341 int slave, struct ib_sa_mad *sa_mad)
1342 {
1343 int ret = 0;
1344
1345
1346 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1347 case IB_SA_ATTR_MC_MEMBER_REC:
1348 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1349 break;
1350 default:
1351 break;
1352 }
1353 return ret;
1354 }
1355
1356 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u32 port,
1357 enum ib_qp_type dest_qpt, u16 pkey_index,
1358 u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
1359 u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
1360 {
1361 struct ib_sge list;
1362 struct ib_ud_wr wr;
1363 const struct ib_send_wr *bad_wr;
1364 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1365 struct mlx4_ib_demux_pv_qp *sqp;
1366 struct mlx4_mad_snd_buf *sqp_mad;
1367 struct ib_ah *ah;
1368 struct ib_qp *send_qp = NULL;
1369 unsigned wire_tx_ix = 0;
1370 u16 wire_pkey_ix;
1371 int src_qpnum;
1372 int ret;
1373
1374 sqp_ctx = dev->sriov.sqps[port-1];
1375
1376
1377 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1378 return -EAGAIN;
1379
1380 if (dest_qpt == IB_QPT_SMI) {
1381 src_qpnum = 0;
1382 sqp = &sqp_ctx->qp[0];
1383 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1384 } else {
1385 src_qpnum = 1;
1386 sqp = &sqp_ctx->qp[1];
1387 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1388 }
1389
1390 send_qp = sqp->qp;
1391
1392 ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah);
1393 if (!ah)
1394 return -ENOMEM;
1395
1396 ah->device = sqp_ctx->pd->device;
1397 ah->pd = sqp_ctx->pd;
1398
1399
1400 ret = mlx4_ib_create_ah_slave(ah, attr,
1401 rdma_ah_retrieve_grh(attr)->sgid_index,
1402 s_mac, vlan_id);
1403 if (ret)
1404 goto out;
1405
1406 spin_lock(&sqp->tx_lock);
1407 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1408 (MLX4_NUM_WIRE_BUFS - 1))
1409 ret = -EAGAIN;
1410 else
1411 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
1412 spin_unlock(&sqp->tx_lock);
1413 if (ret)
1414 goto out;
1415
1416 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1417 kfree(sqp->tx_ring[wire_tx_ix].ah);
1418 sqp->tx_ring[wire_tx_ix].ah = ah;
1419 ib_dma_sync_single_for_cpu(&dev->ib_dev,
1420 sqp->tx_ring[wire_tx_ix].buf.map,
1421 sizeof (struct mlx4_mad_snd_buf),
1422 DMA_TO_DEVICE);
1423
1424 memcpy(&sqp_mad->payload, mad, sizeof *mad);
1425
1426 ib_dma_sync_single_for_device(&dev->ib_dev,
1427 sqp->tx_ring[wire_tx_ix].buf.map,
1428 sizeof (struct mlx4_mad_snd_buf),
1429 DMA_TO_DEVICE);
1430
1431 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1432 list.length = sizeof (struct mlx4_mad_snd_buf);
1433 list.lkey = sqp_ctx->pd->local_dma_lkey;
1434
1435 wr.ah = ah;
1436 wr.port_num = port;
1437 wr.pkey_index = wire_pkey_ix;
1438 wr.remote_qkey = qkey;
1439 wr.remote_qpn = remote_qpn;
1440 wr.wr.next = NULL;
1441 wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1442 wr.wr.sg_list = &list;
1443 wr.wr.num_sge = 1;
1444 wr.wr.opcode = IB_WR_SEND;
1445 wr.wr.send_flags = IB_SEND_SIGNALED;
1446
1447 ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
1448 if (!ret)
1449 return 0;
1450
1451 spin_lock(&sqp->tx_lock);
1452 sqp->tx_ix_tail++;
1453 spin_unlock(&sqp->tx_lock);
1454 sqp->tx_ring[wire_tx_ix].ah = NULL;
1455 out:
1456 kfree(ah);
1457 return ret;
1458 }
1459
1460 static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
1461 {
1462 if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
1463 return slave;
1464 return mlx4_get_base_gid_ix(dev->dev, slave, port);
1465 }
1466
1467 static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
1468 struct rdma_ah_attr *ah_attr)
1469 {
1470 struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
1471 if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
1472 grh->sgid_index = slave;
1473 else
1474 grh->sgid_index += get_slave_base_gid_ix(dev, slave, port);
1475 }
1476
1477 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1478 {
1479 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1480 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1481 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1482 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1483 struct mlx4_ib_ah ah;
1484 struct rdma_ah_attr ah_attr;
1485 u8 *slave_id;
1486 int slave;
1487 int port;
1488 u16 vlan_id;
1489 u8 qos;
1490 u8 *dmac;
1491 int sts;
1492
1493
1494 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1495 wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1496 (wc->src_qp & 0x1) != ctx->port - 1 ||
1497 wc->src_qp & 0x4) {
1498 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1499 return;
1500 }
1501 slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1502 if (slave != ctx->slave) {
1503 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1504 "belongs to another slave\n", wc->src_qp);
1505 return;
1506 }
1507
1508
1509 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1510 sizeof (struct mlx4_tunnel_mad),
1511 DMA_FROM_DEVICE);
1512 switch (tunnel->mad.mad_hdr.method) {
1513 case IB_MGMT_METHOD_SET:
1514 case IB_MGMT_METHOD_GET:
1515 case IB_MGMT_METHOD_REPORT:
1516 case IB_SA_METHOD_GET_TABLE:
1517 case IB_SA_METHOD_DELETE:
1518 case IB_SA_METHOD_GET_MULTI:
1519 case IB_SA_METHOD_GET_TRACE_TBL:
1520 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1521 if (*slave_id) {
1522 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1523 "class:%d slave:%d\n", *slave_id,
1524 tunnel->mad.mad_hdr.mgmt_class, slave);
1525 return;
1526 } else
1527 *slave_id = slave;
1528 break;
1529 default:
1530 ;
1531 }
1532
1533
1534 switch (tunnel->mad.mad_hdr.mgmt_class) {
1535 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
1536 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1537 if (slave != mlx4_master_func_num(dev->dev) &&
1538 !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
1539 return;
1540 break;
1541 case IB_MGMT_CLASS_SUBN_ADM:
1542 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1543 (struct ib_sa_mad *) &tunnel->mad))
1544 return;
1545 break;
1546 case IB_MGMT_CLASS_CM:
1547 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1548 (struct ib_mad *) &tunnel->mad))
1549 return;
1550 break;
1551 case IB_MGMT_CLASS_DEVICE_MGMT:
1552 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1553 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1554 return;
1555 break;
1556 default:
1557
1558 if (slave != mlx4_master_func_num(dev->dev)) {
1559 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1560 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1561 return;
1562 }
1563 }
1564
1565
1566
1567 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1568 ah.ibah.device = ctx->ib_dev;
1569
1570 port = be32_to_cpu(ah.av.ib.port_pd) >> 24;
1571 port = mlx4_slave_convert_port(dev->dev, slave, port);
1572 if (port < 0)
1573 return;
1574 ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
1575 ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port);
1576
1577 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1578 if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
1579 fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
1580 dmac = rdma_ah_retrieve_dmac(&ah_attr);
1581 if (dmac)
1582 memcpy(dmac, tunnel->hdr.mac, ETH_ALEN);
1583 vlan_id = be16_to_cpu(tunnel->hdr.vlan);
1584
1585 if (mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
1586 &vlan_id, &qos))
1587 rdma_ah_set_sl(&ah_attr, qos);
1588
1589 sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
1590 is_proxy_qp0(dev, wc->src_qp, slave) ?
1591 IB_QPT_SMI : IB_QPT_GSI,
1592 be16_to_cpu(tunnel->hdr.pkey_index),
1593 be32_to_cpu(tunnel->hdr.remote_qpn),
1594 be32_to_cpu(tunnel->hdr.qkey),
1595 &ah_attr, wc->smac, vlan_id, &tunnel->mad);
1596 if (sts)
1597 pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
1598 is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
1599 slave, sts);
1600 }
1601
1602 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1603 enum ib_qp_type qp_type, int is_tun)
1604 {
1605 int i;
1606 struct mlx4_ib_demux_pv_qp *tun_qp;
1607 int rx_buf_size, tx_buf_size;
1608 const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
1609
1610 if (qp_type > IB_QPT_GSI)
1611 return -EINVAL;
1612
1613 tun_qp = &ctx->qp[qp_type];
1614
1615 tun_qp->ring = kcalloc(nmbr_bufs,
1616 sizeof(struct mlx4_ib_buf),
1617 GFP_KERNEL);
1618 if (!tun_qp->ring)
1619 return -ENOMEM;
1620
1621 tun_qp->tx_ring = kcalloc(nmbr_bufs,
1622 sizeof (struct mlx4_ib_tun_tx_buf),
1623 GFP_KERNEL);
1624 if (!tun_qp->tx_ring) {
1625 kfree(tun_qp->ring);
1626 tun_qp->ring = NULL;
1627 return -ENOMEM;
1628 }
1629
1630 if (is_tun) {
1631 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1632 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1633 } else {
1634 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1635 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1636 }
1637
1638 for (i = 0; i < nmbr_bufs; i++) {
1639 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1640 if (!tun_qp->ring[i].addr)
1641 goto err;
1642 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1643 tun_qp->ring[i].addr,
1644 rx_buf_size,
1645 DMA_FROM_DEVICE);
1646 if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
1647 kfree(tun_qp->ring[i].addr);
1648 goto err;
1649 }
1650 }
1651
1652 for (i = 0; i < nmbr_bufs; i++) {
1653 tun_qp->tx_ring[i].buf.addr =
1654 kmalloc(tx_buf_size, GFP_KERNEL);
1655 if (!tun_qp->tx_ring[i].buf.addr)
1656 goto tx_err;
1657 tun_qp->tx_ring[i].buf.map =
1658 ib_dma_map_single(ctx->ib_dev,
1659 tun_qp->tx_ring[i].buf.addr,
1660 tx_buf_size,
1661 DMA_TO_DEVICE);
1662 if (ib_dma_mapping_error(ctx->ib_dev,
1663 tun_qp->tx_ring[i].buf.map)) {
1664 kfree(tun_qp->tx_ring[i].buf.addr);
1665 goto tx_err;
1666 }
1667 tun_qp->tx_ring[i].ah = NULL;
1668 }
1669 spin_lock_init(&tun_qp->tx_lock);
1670 tun_qp->tx_ix_head = 0;
1671 tun_qp->tx_ix_tail = 0;
1672 tun_qp->proxy_qpt = qp_type;
1673
1674 return 0;
1675
1676 tx_err:
1677 while (i > 0) {
1678 --i;
1679 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1680 tx_buf_size, DMA_TO_DEVICE);
1681 kfree(tun_qp->tx_ring[i].buf.addr);
1682 }
1683 i = nmbr_bufs;
1684 err:
1685 while (i > 0) {
1686 --i;
1687 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1688 rx_buf_size, DMA_FROM_DEVICE);
1689 kfree(tun_qp->ring[i].addr);
1690 }
1691 kfree(tun_qp->tx_ring);
1692 tun_qp->tx_ring = NULL;
1693 kfree(tun_qp->ring);
1694 tun_qp->ring = NULL;
1695 return -ENOMEM;
1696 }
1697
1698 static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1699 enum ib_qp_type qp_type, int is_tun)
1700 {
1701 int i;
1702 struct mlx4_ib_demux_pv_qp *tun_qp;
1703 int rx_buf_size, tx_buf_size;
1704 const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
1705
1706 if (qp_type > IB_QPT_GSI)
1707 return;
1708
1709 tun_qp = &ctx->qp[qp_type];
1710 if (is_tun) {
1711 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1712 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1713 } else {
1714 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1715 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1716 }
1717
1718
1719 for (i = 0; i < nmbr_bufs; i++) {
1720 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1721 rx_buf_size, DMA_FROM_DEVICE);
1722 kfree(tun_qp->ring[i].addr);
1723 }
1724
1725 for (i = 0; i < nmbr_bufs; i++) {
1726 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1727 tx_buf_size, DMA_TO_DEVICE);
1728 kfree(tun_qp->tx_ring[i].buf.addr);
1729 if (tun_qp->tx_ring[i].ah)
1730 rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0);
1731 }
1732 kfree(tun_qp->tx_ring);
1733 kfree(tun_qp->ring);
1734 }
1735
1736 static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1737 {
1738 struct mlx4_ib_demux_pv_ctx *ctx;
1739 struct mlx4_ib_demux_pv_qp *tun_qp;
1740 struct ib_wc wc;
1741 int ret;
1742 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1743 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1744
1745 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1746 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1747 if (wc.status == IB_WC_SUCCESS) {
1748 switch (wc.opcode) {
1749 case IB_WC_RECV:
1750 mlx4_ib_multiplex_mad(ctx, &wc);
1751 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1752 wc.wr_id &
1753 (MLX4_NUM_TUNNEL_BUFS - 1));
1754 if (ret)
1755 pr_err("Failed reposting tunnel "
1756 "buf:%lld\n", wc.wr_id);
1757 break;
1758 case IB_WC_SEND:
1759 rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1760 (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
1761 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1762 = NULL;
1763 spin_lock(&tun_qp->tx_lock);
1764 tun_qp->tx_ix_tail++;
1765 spin_unlock(&tun_qp->tx_lock);
1766
1767 break;
1768 default:
1769 break;
1770 }
1771 } else {
1772 pr_debug("mlx4_ib: completion error in tunnel: %d."
1773 " status = %d, wrid = 0x%llx\n",
1774 ctx->slave, wc.status, wc.wr_id);
1775 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1776 rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1777 (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
1778 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1779 = NULL;
1780 spin_lock(&tun_qp->tx_lock);
1781 tun_qp->tx_ix_tail++;
1782 spin_unlock(&tun_qp->tx_lock);
1783 }
1784 }
1785 }
1786 }
1787
1788 static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1789 {
1790 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1791
1792
1793 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1794 event->event, sqp->port);
1795 }
1796
1797 static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1798 enum ib_qp_type qp_type, int create_tun)
1799 {
1800 int i, ret;
1801 struct mlx4_ib_demux_pv_qp *tun_qp;
1802 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1803 struct ib_qp_attr attr;
1804 int qp_attr_mask_INIT;
1805 const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
1806
1807 if (qp_type > IB_QPT_GSI)
1808 return -EINVAL;
1809
1810 tun_qp = &ctx->qp[qp_type];
1811
1812 memset(&qp_init_attr, 0, sizeof qp_init_attr);
1813 qp_init_attr.init_attr.send_cq = ctx->cq;
1814 qp_init_attr.init_attr.recv_cq = ctx->cq;
1815 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1816 qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
1817 qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
1818 qp_init_attr.init_attr.cap.max_send_sge = 1;
1819 qp_init_attr.init_attr.cap.max_recv_sge = 1;
1820 if (create_tun) {
1821 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1822 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1823 qp_init_attr.port = ctx->port;
1824 qp_init_attr.slave = ctx->slave;
1825 qp_init_attr.proxy_qp_type = qp_type;
1826 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1827 IB_QP_QKEY | IB_QP_PORT;
1828 } else {
1829 qp_init_attr.init_attr.qp_type = qp_type;
1830 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1831 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1832 }
1833 qp_init_attr.init_attr.port_num = ctx->port;
1834 qp_init_attr.init_attr.qp_context = ctx;
1835 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1836 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1837 if (IS_ERR(tun_qp->qp)) {
1838 ret = PTR_ERR(tun_qp->qp);
1839 tun_qp->qp = NULL;
1840 pr_err("Couldn't create %s QP (%d)\n",
1841 create_tun ? "tunnel" : "special", ret);
1842 return ret;
1843 }
1844
1845 memset(&attr, 0, sizeof attr);
1846 attr.qp_state = IB_QPS_INIT;
1847 ret = 0;
1848 if (create_tun)
1849 ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
1850 ctx->port, IB_DEFAULT_PKEY_FULL,
1851 &attr.pkey_index);
1852 if (ret || !create_tun)
1853 attr.pkey_index =
1854 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1855 attr.qkey = IB_QP1_QKEY;
1856 attr.port_num = ctx->port;
1857 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1858 if (ret) {
1859 pr_err("Couldn't change %s qp state to INIT (%d)\n",
1860 create_tun ? "tunnel" : "special", ret);
1861 goto err_qp;
1862 }
1863 attr.qp_state = IB_QPS_RTR;
1864 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1865 if (ret) {
1866 pr_err("Couldn't change %s qp state to RTR (%d)\n",
1867 create_tun ? "tunnel" : "special", ret);
1868 goto err_qp;
1869 }
1870 attr.qp_state = IB_QPS_RTS;
1871 attr.sq_psn = 0;
1872 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1873 if (ret) {
1874 pr_err("Couldn't change %s qp state to RTS (%d)\n",
1875 create_tun ? "tunnel" : "special", ret);
1876 goto err_qp;
1877 }
1878
1879 for (i = 0; i < nmbr_bufs; i++) {
1880 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1881 if (ret) {
1882 pr_err(" mlx4_ib_post_pv_buf error"
1883 " (err = %d, i = %d)\n", ret, i);
1884 goto err_qp;
1885 }
1886 }
1887 return 0;
1888
1889 err_qp:
1890 ib_destroy_qp(tun_qp->qp);
1891 tun_qp->qp = NULL;
1892 return ret;
1893 }
1894
1895
1896
1897
1898 static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1899 {
1900 struct mlx4_ib_demux_pv_ctx *ctx;
1901 struct mlx4_ib_demux_pv_qp *sqp;
1902 struct ib_wc wc;
1903 struct ib_grh *grh;
1904 struct ib_mad *mad;
1905
1906 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1907 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1908
1909 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1910 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1911 if (wc.status == IB_WC_SUCCESS) {
1912 switch (wc.opcode) {
1913 case IB_WC_SEND:
1914 kfree(sqp->tx_ring[wc.wr_id &
1915 (MLX4_NUM_WIRE_BUFS - 1)].ah);
1916 sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
1917 = NULL;
1918 spin_lock(&sqp->tx_lock);
1919 sqp->tx_ix_tail++;
1920 spin_unlock(&sqp->tx_lock);
1921 break;
1922 case IB_WC_RECV:
1923 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1924 (sqp->ring[wc.wr_id &
1925 (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
1926 grh = &(((struct mlx4_mad_rcv_buf *)
1927 (sqp->ring[wc.wr_id &
1928 (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
1929 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1930 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1931 (MLX4_NUM_WIRE_BUFS - 1)))
1932 pr_err("Failed reposting SQP "
1933 "buf:%lld\n", wc.wr_id);
1934 break;
1935 default:
1936 break;
1937 }
1938 } else {
1939 pr_debug("mlx4_ib: completion error in tunnel: %d."
1940 " status = %d, wrid = 0x%llx\n",
1941 ctx->slave, wc.status, wc.wr_id);
1942 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1943 kfree(sqp->tx_ring[wc.wr_id &
1944 (MLX4_NUM_WIRE_BUFS - 1)].ah);
1945 sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
1946 = NULL;
1947 spin_lock(&sqp->tx_lock);
1948 sqp->tx_ix_tail++;
1949 spin_unlock(&sqp->tx_lock);
1950 }
1951 }
1952 }
1953 }
1954
1955 static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1956 struct mlx4_ib_demux_pv_ctx **ret_ctx)
1957 {
1958 struct mlx4_ib_demux_pv_ctx *ctx;
1959
1960 *ret_ctx = NULL;
1961 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1962 if (!ctx)
1963 return -ENOMEM;
1964
1965 ctx->ib_dev = &dev->ib_dev;
1966 ctx->port = port;
1967 ctx->slave = slave;
1968 *ret_ctx = ctx;
1969 return 0;
1970 }
1971
1972 static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1973 {
1974 if (dev->sriov.demux[port - 1].tun[slave]) {
1975 kfree(dev->sriov.demux[port - 1].tun[slave]);
1976 dev->sriov.demux[port - 1].tun[slave] = NULL;
1977 }
1978 }
1979
1980 static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1981 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1982 {
1983 int ret, cq_size;
1984 struct ib_cq_init_attr cq_attr = {};
1985 const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
1986
1987 if (ctx->state != DEMUX_PV_STATE_DOWN)
1988 return -EEXIST;
1989
1990 ctx->state = DEMUX_PV_STATE_STARTING;
1991
1992 if (rdma_port_get_link_layer(ibdev, ctx->port) ==
1993 IB_LINK_LAYER_INFINIBAND)
1994 ctx->has_smi = 1;
1995
1996 if (ctx->has_smi) {
1997 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1998 if (ret) {
1999 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
2000 goto err_out;
2001 }
2002 }
2003
2004 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
2005 if (ret) {
2006 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
2007 goto err_out_qp0;
2008 }
2009
2010 cq_size = 2 * nmbr_bufs;
2011 if (ctx->has_smi)
2012 cq_size *= 2;
2013
2014 cq_attr.cqe = cq_size;
2015 ctx->cq = ib_create_cq(ctx->ib_dev,
2016 create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
2017 NULL, ctx, &cq_attr);
2018 if (IS_ERR(ctx->cq)) {
2019 ret = PTR_ERR(ctx->cq);
2020 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
2021 goto err_buf;
2022 }
2023
2024 ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
2025 if (IS_ERR(ctx->pd)) {
2026 ret = PTR_ERR(ctx->pd);
2027 pr_err("Couldn't create tunnel PD (%d)\n", ret);
2028 goto err_cq;
2029 }
2030
2031 if (ctx->has_smi) {
2032 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
2033 if (ret) {
2034 pr_err("Couldn't create %s QP0 (%d)\n",
2035 create_tun ? "tunnel for" : "", ret);
2036 goto err_pd;
2037 }
2038 }
2039
2040 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
2041 if (ret) {
2042 pr_err("Couldn't create %s QP1 (%d)\n",
2043 create_tun ? "tunnel for" : "", ret);
2044 goto err_qp0;
2045 }
2046
2047 if (create_tun)
2048 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
2049 else
2050 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
2051
2052 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
2053 ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
2054
2055 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
2056 if (ret) {
2057 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
2058 goto err_wq;
2059 }
2060 ctx->state = DEMUX_PV_STATE_ACTIVE;
2061 return 0;
2062
2063 err_wq:
2064 ctx->wq = NULL;
2065 ib_destroy_qp(ctx->qp[1].qp);
2066 ctx->qp[1].qp = NULL;
2067
2068
2069 err_qp0:
2070 if (ctx->has_smi)
2071 ib_destroy_qp(ctx->qp[0].qp);
2072 ctx->qp[0].qp = NULL;
2073
2074 err_pd:
2075 ib_dealloc_pd(ctx->pd);
2076 ctx->pd = NULL;
2077
2078 err_cq:
2079 ib_destroy_cq(ctx->cq);
2080 ctx->cq = NULL;
2081
2082 err_buf:
2083 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
2084
2085 err_out_qp0:
2086 if (ctx->has_smi)
2087 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
2088 err_out:
2089 ctx->state = DEMUX_PV_STATE_DOWN;
2090 return ret;
2091 }
2092
2093 static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
2094 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
2095 {
2096 if (!ctx)
2097 return;
2098 if (ctx->state > DEMUX_PV_STATE_DOWN) {
2099 ctx->state = DEMUX_PV_STATE_DOWNING;
2100 if (flush)
2101 flush_workqueue(ctx->wq);
2102 if (ctx->has_smi) {
2103 ib_destroy_qp(ctx->qp[0].qp);
2104 ctx->qp[0].qp = NULL;
2105 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
2106 }
2107 ib_destroy_qp(ctx->qp[1].qp);
2108 ctx->qp[1].qp = NULL;
2109 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
2110 ib_dealloc_pd(ctx->pd);
2111 ctx->pd = NULL;
2112 ib_destroy_cq(ctx->cq);
2113 ctx->cq = NULL;
2114 ctx->state = DEMUX_PV_STATE_DOWN;
2115 }
2116 }
2117
2118 static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
2119 int port, int do_init)
2120 {
2121 int ret = 0;
2122
2123 if (!do_init) {
2124 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
2125
2126 if (slave == mlx4_master_func_num(dev->dev))
2127 destroy_pv_resources(dev, slave, port,
2128 dev->sriov.sqps[port - 1], 1);
2129
2130 destroy_pv_resources(dev, slave, port,
2131 dev->sriov.demux[port - 1].tun[slave], 1);
2132 return 0;
2133 }
2134
2135
2136 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
2137 dev->sriov.demux[port - 1].tun[slave]);
2138
2139
2140 if (!ret && slave == mlx4_master_func_num(dev->dev))
2141 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
2142 dev->sriov.sqps[port - 1]);
2143 return ret;
2144 }
2145
2146 void mlx4_ib_tunnels_update_work(struct work_struct *work)
2147 {
2148 struct mlx4_ib_demux_work *dmxw;
2149
2150 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
2151 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
2152 dmxw->do_init);
2153 kfree(dmxw);
2154 return;
2155 }
2156
2157 static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
2158 struct mlx4_ib_demux_ctx *ctx,
2159 int port)
2160 {
2161 char name[12];
2162 int ret = 0;
2163 int i;
2164
2165 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
2166 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
2167 if (!ctx->tun)
2168 return -ENOMEM;
2169
2170 ctx->dev = dev;
2171 ctx->port = port;
2172 ctx->ib_dev = &dev->ib_dev;
2173
2174 for (i = 0;
2175 i < min(dev->dev->caps.sqp_demux,
2176 (u16)(dev->dev->persist->num_vfs + 1));
2177 i++) {
2178 struct mlx4_active_ports actv_ports =
2179 mlx4_get_active_ports(dev->dev, i);
2180
2181 if (!test_bit(port - 1, actv_ports.ports))
2182 continue;
2183
2184 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
2185 if (ret) {
2186 ret = -ENOMEM;
2187 goto err_mcg;
2188 }
2189 }
2190
2191 ret = mlx4_ib_mcg_port_init(ctx);
2192 if (ret) {
2193 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
2194 goto err_mcg;
2195 }
2196
2197 snprintf(name, sizeof(name), "mlx4_ibt%d", port);
2198 ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2199 if (!ctx->wq) {
2200 pr_err("Failed to create tunnelling WQ for port %d\n", port);
2201 ret = -ENOMEM;
2202 goto err_wq;
2203 }
2204
2205 snprintf(name, sizeof(name), "mlx4_ibwi%d", port);
2206 ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2207 if (!ctx->wi_wq) {
2208 pr_err("Failed to create wire WQ for port %d\n", port);
2209 ret = -ENOMEM;
2210 goto err_wiwq;
2211 }
2212
2213 snprintf(name, sizeof(name), "mlx4_ibud%d", port);
2214 ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2215 if (!ctx->ud_wq) {
2216 pr_err("Failed to create up/down WQ for port %d\n", port);
2217 ret = -ENOMEM;
2218 goto err_udwq;
2219 }
2220
2221 return 0;
2222
2223 err_udwq:
2224 destroy_workqueue(ctx->wi_wq);
2225 ctx->wi_wq = NULL;
2226
2227 err_wiwq:
2228 destroy_workqueue(ctx->wq);
2229 ctx->wq = NULL;
2230
2231 err_wq:
2232 mlx4_ib_mcg_port_cleanup(ctx, 1);
2233 err_mcg:
2234 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
2235 free_pv_object(dev, i, port);
2236 kfree(ctx->tun);
2237 ctx->tun = NULL;
2238 return ret;
2239 }
2240
2241 static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
2242 {
2243 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
2244 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
2245 flush_workqueue(sqp_ctx->wq);
2246 if (sqp_ctx->has_smi) {
2247 ib_destroy_qp(sqp_ctx->qp[0].qp);
2248 sqp_ctx->qp[0].qp = NULL;
2249 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
2250 }
2251 ib_destroy_qp(sqp_ctx->qp[1].qp);
2252 sqp_ctx->qp[1].qp = NULL;
2253 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
2254 ib_dealloc_pd(sqp_ctx->pd);
2255 sqp_ctx->pd = NULL;
2256 ib_destroy_cq(sqp_ctx->cq);
2257 sqp_ctx->cq = NULL;
2258 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
2259 }
2260 }
2261
2262 static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
2263 {
2264 int i;
2265 if (ctx) {
2266 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
2267 mlx4_ib_mcg_port_cleanup(ctx, 1);
2268 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2269 if (!ctx->tun[i])
2270 continue;
2271 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
2272 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
2273 }
2274 flush_workqueue(ctx->wq);
2275 flush_workqueue(ctx->wi_wq);
2276 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2277 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
2278 free_pv_object(dev, i, ctx->port);
2279 }
2280 kfree(ctx->tun);
2281 destroy_workqueue(ctx->ud_wq);
2282 destroy_workqueue(ctx->wi_wq);
2283 destroy_workqueue(ctx->wq);
2284 }
2285 }
2286
2287 static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
2288 {
2289 int i;
2290
2291 if (!mlx4_is_master(dev->dev))
2292 return;
2293
2294 for (i = 0; i < dev->dev->caps.num_ports; i++)
2295 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
2296 return;
2297 }
2298
2299 int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
2300 {
2301 int i = 0;
2302 int err;
2303
2304 if (!mlx4_is_mfunc(dev->dev))
2305 return 0;
2306
2307 dev->sriov.is_going_down = 0;
2308 spin_lock_init(&dev->sriov.going_down_lock);
2309 mlx4_ib_cm_paravirt_init(dev);
2310
2311 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
2312
2313 if (mlx4_is_slave(dev->dev)) {
2314 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
2315 return 0;
2316 }
2317
2318 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2319 if (i == mlx4_master_func_num(dev->dev))
2320 mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
2321 else
2322 mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
2323 }
2324
2325 err = mlx4_ib_init_alias_guid_service(dev);
2326 if (err) {
2327 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
2328 goto paravirt_err;
2329 }
2330 err = mlx4_ib_device_register_sysfs(dev);
2331 if (err) {
2332 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
2333 goto sysfs_err;
2334 }
2335
2336 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
2337 dev->dev->caps.sqp_demux);
2338 for (i = 0; i < dev->num_ports; i++) {
2339 union ib_gid gid;
2340 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
2341 if (err)
2342 goto demux_err;
2343 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
2344 atomic64_set(&dev->sriov.demux[i].subnet_prefix,
2345 be64_to_cpu(gid.global.subnet_prefix));
2346 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
2347 &dev->sriov.sqps[i]);
2348 if (err)
2349 goto demux_err;
2350 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2351 if (err)
2352 goto free_pv;
2353 }
2354 mlx4_ib_master_tunnels(dev, 1);
2355 return 0;
2356
2357 free_pv:
2358 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2359 demux_err:
2360 while (--i >= 0) {
2361 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2362 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2363 }
2364 mlx4_ib_device_unregister_sysfs(dev);
2365
2366 sysfs_err:
2367 mlx4_ib_destroy_alias_guid_service(dev);
2368
2369 paravirt_err:
2370 mlx4_ib_cm_paravirt_clean(dev, -1);
2371
2372 return err;
2373 }
2374
2375 void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
2376 {
2377 int i;
2378 unsigned long flags;
2379
2380 if (!mlx4_is_mfunc(dev->dev))
2381 return;
2382
2383 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2384 dev->sriov.is_going_down = 1;
2385 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2386 if (mlx4_is_master(dev->dev)) {
2387 for (i = 0; i < dev->num_ports; i++) {
2388 flush_workqueue(dev->sriov.demux[i].ud_wq);
2389 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2390 kfree(dev->sriov.sqps[i]);
2391 dev->sriov.sqps[i] = NULL;
2392 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2393 }
2394
2395 mlx4_ib_cm_paravirt_clean(dev, -1);
2396 mlx4_ib_destroy_alias_guid_service(dev);
2397 mlx4_ib_device_unregister_sysfs(dev);
2398 }
2399 }