0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #include <linux/module.h>
0035 #include <linux/init.h>
0036 #include <linux/slab.h>
0037 #include <linux/errno.h>
0038 #include <linux/netdevice.h>
0039 #include <linux/inetdevice.h>
0040 #include <linux/rtnetlink.h>
0041 #include <linux/if_vlan.h>
0042 #include <linux/sched/mm.h>
0043 #include <linux/sched/task.h>
0044
0045 #include <net/ipv6.h>
0046 #include <net/addrconf.h>
0047 #include <net/devlink.h>
0048
0049 #include <rdma/ib_smi.h>
0050 #include <rdma/ib_user_verbs.h>
0051 #include <rdma/ib_addr.h>
0052 #include <rdma/ib_cache.h>
0053
0054 #include <net/bonding.h>
0055
0056 #include <linux/mlx4/driver.h>
0057 #include <linux/mlx4/cmd.h>
0058 #include <linux/mlx4/qp.h>
0059
0060 #include "mlx4_ib.h"
0061 #include <rdma/mlx4-abi.h>
0062
0063 #define DRV_NAME MLX4_IB_DRV_NAME
0064 #define DRV_VERSION "4.0-0"
0065
0066 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
0067 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
0068 #define MLX4_IB_CARD_REV_A0 0xA0
0069
0070 MODULE_AUTHOR("Roland Dreier");
0071 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
0072 MODULE_LICENSE("Dual BSD/GPL");
0073
0074 int mlx4_ib_sm_guid_assign = 0;
0075 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
0076 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
0077
0078 static const char mlx4_ib_version[] =
0079 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
0080 DRV_VERSION "\n";
0081
0082 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
0083 static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
0084 u32 port_num);
0085
0086 static struct workqueue_struct *wq;
0087
0088 static int check_flow_steering_support(struct mlx4_dev *dev)
0089 {
0090 int eth_num_ports = 0;
0091 int ib_num_ports = 0;
0092
0093 int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
0094
0095 if (dmfs) {
0096 int i;
0097 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
0098 eth_num_ports++;
0099 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
0100 ib_num_ports++;
0101 dmfs &= (!ib_num_ports ||
0102 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
0103 (!eth_num_ports ||
0104 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
0105 if (ib_num_ports && mlx4_is_mfunc(dev)) {
0106 pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
0107 dmfs = 0;
0108 }
0109 }
0110 return dmfs;
0111 }
0112
0113 static int num_ib_ports(struct mlx4_dev *dev)
0114 {
0115 int ib_ports = 0;
0116 int i;
0117
0118 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
0119 ib_ports++;
0120
0121 return ib_ports;
0122 }
0123
0124 static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
0125 u32 port_num)
0126 {
0127 struct mlx4_ib_dev *ibdev = to_mdev(device);
0128 struct net_device *dev;
0129
0130 rcu_read_lock();
0131 dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
0132
0133 if (dev) {
0134 if (mlx4_is_bonded(ibdev->dev)) {
0135 struct net_device *upper = NULL;
0136
0137 upper = netdev_master_upper_dev_get_rcu(dev);
0138 if (upper) {
0139 struct net_device *active;
0140
0141 active = bond_option_active_slave_get_rcu(netdev_priv(upper));
0142 if (active)
0143 dev = active;
0144 }
0145 }
0146 }
0147 if (dev)
0148 dev_hold(dev);
0149
0150 rcu_read_unlock();
0151 return dev;
0152 }
0153
0154 static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
0155 struct mlx4_ib_dev *ibdev,
0156 u32 port_num)
0157 {
0158 struct mlx4_cmd_mailbox *mailbox;
0159 int err;
0160 struct mlx4_dev *dev = ibdev->dev;
0161 int i;
0162 union ib_gid *gid_tbl;
0163
0164 mailbox = mlx4_alloc_cmd_mailbox(dev);
0165 if (IS_ERR(mailbox))
0166 return -ENOMEM;
0167
0168 gid_tbl = mailbox->buf;
0169
0170 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
0171 memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
0172
0173 err = mlx4_cmd(dev, mailbox->dma,
0174 MLX4_SET_PORT_GID_TABLE << 8 | port_num,
0175 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
0176 MLX4_CMD_WRAPPED);
0177 if (mlx4_is_bonded(dev))
0178 err += mlx4_cmd(dev, mailbox->dma,
0179 MLX4_SET_PORT_GID_TABLE << 8 | 2,
0180 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
0181 MLX4_CMD_WRAPPED);
0182
0183 mlx4_free_cmd_mailbox(dev, mailbox);
0184 return err;
0185 }
0186
0187 static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
0188 struct mlx4_ib_dev *ibdev,
0189 u32 port_num)
0190 {
0191 struct mlx4_cmd_mailbox *mailbox;
0192 int err;
0193 struct mlx4_dev *dev = ibdev->dev;
0194 int i;
0195 struct {
0196 union ib_gid gid;
0197 __be32 rsrvd1[2];
0198 __be16 rsrvd2;
0199 u8 type;
0200 u8 version;
0201 __be32 rsrvd3;
0202 } *gid_tbl;
0203
0204 mailbox = mlx4_alloc_cmd_mailbox(dev);
0205 if (IS_ERR(mailbox))
0206 return -ENOMEM;
0207
0208 gid_tbl = mailbox->buf;
0209 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
0210 memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
0211 if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
0212 gid_tbl[i].version = 2;
0213 if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
0214 gid_tbl[i].type = 1;
0215 }
0216 }
0217
0218 err = mlx4_cmd(dev, mailbox->dma,
0219 MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
0220 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
0221 MLX4_CMD_WRAPPED);
0222 if (mlx4_is_bonded(dev))
0223 err += mlx4_cmd(dev, mailbox->dma,
0224 MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
0225 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
0226 MLX4_CMD_WRAPPED);
0227
0228 mlx4_free_cmd_mailbox(dev, mailbox);
0229 return err;
0230 }
0231
0232 static int mlx4_ib_update_gids(struct gid_entry *gids,
0233 struct mlx4_ib_dev *ibdev,
0234 u32 port_num)
0235 {
0236 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
0237 return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
0238
0239 return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
0240 }
0241
0242 static void free_gid_entry(struct gid_entry *entry)
0243 {
0244 memset(&entry->gid, 0, sizeof(entry->gid));
0245 kfree(entry->ctx);
0246 entry->ctx = NULL;
0247 }
0248
0249 static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
0250 {
0251 struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
0252 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
0253 struct mlx4_port_gid_table *port_gid_table;
0254 int free = -1, found = -1;
0255 int ret = 0;
0256 int hw_update = 0;
0257 int i;
0258 struct gid_entry *gids = NULL;
0259 u16 vlan_id = 0xffff;
0260 u8 mac[ETH_ALEN];
0261
0262 if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
0263 return -EINVAL;
0264
0265 if (attr->port_num > MLX4_MAX_PORTS)
0266 return -EINVAL;
0267
0268 if (!context)
0269 return -EINVAL;
0270
0271 ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
0272 if (ret)
0273 return ret;
0274 port_gid_table = &iboe->gids[attr->port_num - 1];
0275 spin_lock_bh(&iboe->lock);
0276 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
0277 if (!memcmp(&port_gid_table->gids[i].gid,
0278 &attr->gid, sizeof(attr->gid)) &&
0279 port_gid_table->gids[i].gid_type == attr->gid_type &&
0280 port_gid_table->gids[i].vlan_id == vlan_id) {
0281 found = i;
0282 break;
0283 }
0284 if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
0285 free = i;
0286 }
0287
0288 if (found < 0) {
0289 if (free < 0) {
0290 ret = -ENOSPC;
0291 } else {
0292 port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
0293 if (!port_gid_table->gids[free].ctx) {
0294 ret = -ENOMEM;
0295 } else {
0296 *context = port_gid_table->gids[free].ctx;
0297 memcpy(&port_gid_table->gids[free].gid,
0298 &attr->gid, sizeof(attr->gid));
0299 port_gid_table->gids[free].gid_type = attr->gid_type;
0300 port_gid_table->gids[free].vlan_id = vlan_id;
0301 port_gid_table->gids[free].ctx->real_index = free;
0302 port_gid_table->gids[free].ctx->refcount = 1;
0303 hw_update = 1;
0304 }
0305 }
0306 } else {
0307 struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
0308 *context = ctx;
0309 ctx->refcount++;
0310 }
0311 if (!ret && hw_update) {
0312 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
0313 GFP_ATOMIC);
0314 if (!gids) {
0315 ret = -ENOMEM;
0316 *context = NULL;
0317 free_gid_entry(&port_gid_table->gids[free]);
0318 } else {
0319 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
0320 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
0321 gids[i].gid_type = port_gid_table->gids[i].gid_type;
0322 }
0323 }
0324 }
0325 spin_unlock_bh(&iboe->lock);
0326
0327 if (!ret && hw_update) {
0328 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
0329 if (ret) {
0330 spin_lock_bh(&iboe->lock);
0331 *context = NULL;
0332 free_gid_entry(&port_gid_table->gids[free]);
0333 spin_unlock_bh(&iboe->lock);
0334 }
0335 kfree(gids);
0336 }
0337
0338 return ret;
0339 }
0340
0341 static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
0342 {
0343 struct gid_cache_context *ctx = *context;
0344 struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
0345 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
0346 struct mlx4_port_gid_table *port_gid_table;
0347 int ret = 0;
0348 int hw_update = 0;
0349 struct gid_entry *gids = NULL;
0350
0351 if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
0352 return -EINVAL;
0353
0354 if (attr->port_num > MLX4_MAX_PORTS)
0355 return -EINVAL;
0356
0357 port_gid_table = &iboe->gids[attr->port_num - 1];
0358 spin_lock_bh(&iboe->lock);
0359 if (ctx) {
0360 ctx->refcount--;
0361 if (!ctx->refcount) {
0362 unsigned int real_index = ctx->real_index;
0363
0364 free_gid_entry(&port_gid_table->gids[real_index]);
0365 hw_update = 1;
0366 }
0367 }
0368 if (!ret && hw_update) {
0369 int i;
0370
0371 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
0372 GFP_ATOMIC);
0373 if (!gids) {
0374 ret = -ENOMEM;
0375 } else {
0376 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
0377 memcpy(&gids[i].gid,
0378 &port_gid_table->gids[i].gid,
0379 sizeof(union ib_gid));
0380 gids[i].gid_type =
0381 port_gid_table->gids[i].gid_type;
0382 }
0383 }
0384 }
0385 spin_unlock_bh(&iboe->lock);
0386
0387 if (!ret && hw_update) {
0388 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
0389 kfree(gids);
0390 }
0391 return ret;
0392 }
0393
0394 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
0395 const struct ib_gid_attr *attr)
0396 {
0397 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
0398 struct gid_cache_context *ctx = NULL;
0399 struct mlx4_port_gid_table *port_gid_table;
0400 int real_index = -EINVAL;
0401 int i;
0402 unsigned long flags;
0403 u32 port_num = attr->port_num;
0404
0405 if (port_num > MLX4_MAX_PORTS)
0406 return -EINVAL;
0407
0408 if (mlx4_is_bonded(ibdev->dev))
0409 port_num = 1;
0410
0411 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
0412 return attr->index;
0413
0414 spin_lock_irqsave(&iboe->lock, flags);
0415 port_gid_table = &iboe->gids[port_num - 1];
0416
0417 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
0418 if (!memcmp(&port_gid_table->gids[i].gid,
0419 &attr->gid, sizeof(attr->gid)) &&
0420 attr->gid_type == port_gid_table->gids[i].gid_type) {
0421 ctx = port_gid_table->gids[i].ctx;
0422 break;
0423 }
0424 if (ctx)
0425 real_index = ctx->real_index;
0426 spin_unlock_irqrestore(&iboe->lock, flags);
0427 return real_index;
0428 }
0429
0430 static int mlx4_ib_query_device(struct ib_device *ibdev,
0431 struct ib_device_attr *props,
0432 struct ib_udata *uhw)
0433 {
0434 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0435 struct ib_smp *in_mad = NULL;
0436 struct ib_smp *out_mad = NULL;
0437 int err;
0438 int have_ib_ports;
0439 struct mlx4_uverbs_ex_query_device cmd;
0440 struct mlx4_uverbs_ex_query_device_resp resp = {};
0441 struct mlx4_clock_params clock_params;
0442
0443 if (uhw->inlen) {
0444 if (uhw->inlen < sizeof(cmd))
0445 return -EINVAL;
0446
0447 err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
0448 if (err)
0449 return err;
0450
0451 if (cmd.comp_mask)
0452 return -EINVAL;
0453
0454 if (cmd.reserved)
0455 return -EINVAL;
0456 }
0457
0458 resp.response_length = offsetof(typeof(resp), response_length) +
0459 sizeof(resp.response_length);
0460 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
0461 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0462 err = -ENOMEM;
0463 if (!in_mad || !out_mad)
0464 goto out;
0465
0466 ib_init_query_mad(in_mad);
0467 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
0468
0469 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
0470 1, NULL, NULL, in_mad, out_mad);
0471 if (err)
0472 goto out;
0473
0474 memset(props, 0, sizeof *props);
0475
0476 have_ib_ports = num_ib_ports(dev->dev);
0477
0478 props->fw_ver = dev->dev->caps.fw_ver;
0479 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
0480 IB_DEVICE_PORT_ACTIVE_EVENT |
0481 IB_DEVICE_SYS_IMAGE_GUID |
0482 IB_DEVICE_RC_RNR_NAK_GEN;
0483 props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
0484 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
0485 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
0486 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
0487 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
0488 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
0489 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
0490 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
0491 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
0492 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
0493 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
0494 if (dev->dev->caps.max_gso_sz &&
0495 (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
0496 (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
0497 props->kernel_cap_flags |= IBK_UD_TSO;
0498 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
0499 props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
0500 if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
0501 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
0502 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
0503 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
0504 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
0505 props->device_cap_flags |= IB_DEVICE_XRC;
0506 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
0507 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
0508 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
0509 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
0510 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
0511 else
0512 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
0513 }
0514 if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
0515 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
0516
0517 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
0518
0519 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0520 0xffffff;
0521 props->vendor_part_id = dev->dev->persist->pdev->device;
0522 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
0523 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
0524
0525 props->max_mr_size = ~0ull;
0526 props->page_size_cap = dev->dev->caps.page_size_cap;
0527 props->max_qp = dev->dev->quotas.qp;
0528 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
0529 props->max_send_sge =
0530 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
0531 props->max_recv_sge =
0532 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
0533 props->max_sge_rd = MLX4_MAX_SGE_RD;
0534 props->max_cq = dev->dev->quotas.cq;
0535 props->max_cqe = dev->dev->caps.max_cqes;
0536 props->max_mr = dev->dev->quotas.mpt;
0537 props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
0538 props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
0539 props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
0540 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
0541 props->max_srq = dev->dev->quotas.srq;
0542 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
0543 props->max_srq_sge = dev->dev->caps.max_srq_sge;
0544 props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
0545 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
0546 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
0547 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
0548 props->masked_atomic_cap = props->atomic_cap;
0549 props->max_pkeys = dev->dev->caps.pkey_table_len[1];
0550 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
0551 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
0552 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
0553 props->max_mcast_grp;
0554 props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
0555 props->timestamp_mask = 0xFFFFFFFFFFFFULL;
0556 props->max_ah = INT_MAX;
0557
0558 if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
0559 mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
0560 if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
0561 props->rss_caps.max_rwq_indirection_tables =
0562 props->max_qp;
0563 props->rss_caps.max_rwq_indirection_table_size =
0564 dev->dev->caps.max_rss_tbl_sz;
0565 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
0566 props->max_wq_type_rq = props->max_qp;
0567 }
0568
0569 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
0570 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
0571 }
0572
0573 props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
0574 props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
0575
0576 if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
0577 resp.response_length += sizeof(resp.hca_core_clock_offset);
0578 if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
0579 resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
0580 resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
0581 }
0582 }
0583
0584 if (uhw->outlen >= resp.response_length +
0585 sizeof(resp.max_inl_recv_sz)) {
0586 resp.response_length += sizeof(resp.max_inl_recv_sz);
0587 resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
0588 sizeof(struct mlx4_wqe_data_seg);
0589 }
0590
0591 if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
0592 if (props->rss_caps.supported_qpts) {
0593 resp.rss_caps.rx_hash_function =
0594 MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
0595
0596 resp.rss_caps.rx_hash_fields_mask =
0597 MLX4_IB_RX_HASH_SRC_IPV4 |
0598 MLX4_IB_RX_HASH_DST_IPV4 |
0599 MLX4_IB_RX_HASH_SRC_IPV6 |
0600 MLX4_IB_RX_HASH_DST_IPV6 |
0601 MLX4_IB_RX_HASH_SRC_PORT_TCP |
0602 MLX4_IB_RX_HASH_DST_PORT_TCP |
0603 MLX4_IB_RX_HASH_SRC_PORT_UDP |
0604 MLX4_IB_RX_HASH_DST_PORT_UDP;
0605
0606 if (dev->dev->caps.tunnel_offload_mode ==
0607 MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
0608 resp.rss_caps.rx_hash_fields_mask |=
0609 MLX4_IB_RX_HASH_INNER;
0610 }
0611 resp.response_length = offsetof(typeof(resp), rss_caps) +
0612 sizeof(resp.rss_caps);
0613 }
0614
0615 if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
0616 if (dev->dev->caps.max_gso_sz &&
0617 ((mlx4_ib_port_link_layer(ibdev, 1) ==
0618 IB_LINK_LAYER_ETHERNET) ||
0619 (mlx4_ib_port_link_layer(ibdev, 2) ==
0620 IB_LINK_LAYER_ETHERNET))) {
0621 resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
0622 resp.tso_caps.supported_qpts |=
0623 1 << IB_QPT_RAW_PACKET;
0624 }
0625 resp.response_length = offsetof(typeof(resp), tso_caps) +
0626 sizeof(resp.tso_caps);
0627 }
0628
0629 if (uhw->outlen) {
0630 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
0631 if (err)
0632 goto out;
0633 }
0634 out:
0635 kfree(in_mad);
0636 kfree(out_mad);
0637
0638 return err;
0639 }
0640
0641 static enum rdma_link_layer
0642 mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
0643 {
0644 struct mlx4_dev *dev = to_mdev(device)->dev;
0645
0646 return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
0647 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
0648 }
0649
0650 static int ib_link_query_port(struct ib_device *ibdev, u32 port,
0651 struct ib_port_attr *props, int netw_view)
0652 {
0653 struct ib_smp *in_mad = NULL;
0654 struct ib_smp *out_mad = NULL;
0655 int ext_active_speed;
0656 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
0657 int err = -ENOMEM;
0658
0659 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
0660 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0661 if (!in_mad || !out_mad)
0662 goto out;
0663
0664 ib_init_query_mad(in_mad);
0665 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
0666 in_mad->attr_mod = cpu_to_be32(port);
0667
0668 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
0669 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
0670
0671 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
0672 in_mad, out_mad);
0673 if (err)
0674 goto out;
0675
0676
0677 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
0678 props->lmc = out_mad->data[34] & 0x7;
0679 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
0680 props->sm_sl = out_mad->data[36] & 0xf;
0681 props->state = out_mad->data[32] & 0xf;
0682 props->phys_state = out_mad->data[33] >> 4;
0683 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
0684 if (netw_view)
0685 props->gid_tbl_len = out_mad->data[50];
0686 else
0687 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
0688 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
0689 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
0690 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
0691 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
0692 props->active_width = out_mad->data[31] & 0xf;
0693 props->active_speed = out_mad->data[35] >> 4;
0694 props->max_mtu = out_mad->data[41] & 0xf;
0695 props->active_mtu = out_mad->data[36] >> 4;
0696 props->subnet_timeout = out_mad->data[51] & 0x1f;
0697 props->max_vl_num = out_mad->data[37] >> 4;
0698 props->init_type_reply = out_mad->data[41] >> 4;
0699
0700
0701 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
0702 ext_active_speed = out_mad->data[62] >> 4;
0703
0704 switch (ext_active_speed) {
0705 case 1:
0706 props->active_speed = IB_SPEED_FDR;
0707 break;
0708 case 2:
0709 props->active_speed = IB_SPEED_EDR;
0710 break;
0711 }
0712 }
0713
0714
0715 if (props->active_speed == IB_SPEED_QDR) {
0716 ib_init_query_mad(in_mad);
0717 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
0718 in_mad->attr_mod = cpu_to_be32(port);
0719
0720 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
0721 NULL, NULL, in_mad, out_mad);
0722 if (err)
0723 goto out;
0724
0725
0726 if (out_mad->data[15] & 0x1)
0727 props->active_speed = IB_SPEED_FDR10;
0728 }
0729
0730
0731 if (props->state == IB_PORT_DOWN)
0732 props->active_speed = IB_SPEED_SDR;
0733
0734 out:
0735 kfree(in_mad);
0736 kfree(out_mad);
0737 return err;
0738 }
0739
0740 static u8 state_to_phys_state(enum ib_port_state state)
0741 {
0742 return state == IB_PORT_ACTIVE ?
0743 IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
0744 }
0745
0746 static int eth_link_query_port(struct ib_device *ibdev, u32 port,
0747 struct ib_port_attr *props)
0748 {
0749
0750 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
0751 struct mlx4_ib_iboe *iboe = &mdev->iboe;
0752 struct net_device *ndev;
0753 enum ib_mtu tmp;
0754 struct mlx4_cmd_mailbox *mailbox;
0755 int err = 0;
0756 int is_bonded = mlx4_is_bonded(mdev->dev);
0757
0758 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
0759 if (IS_ERR(mailbox))
0760 return PTR_ERR(mailbox);
0761
0762 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
0763 MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
0764 MLX4_CMD_WRAPPED);
0765 if (err)
0766 goto out;
0767
0768 props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ||
0769 (((u8 *)mailbox->buf)[5] == 0x20 ) ?
0770 IB_WIDTH_4X : IB_WIDTH_1X;
0771 props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 ) ?
0772 IB_SPEED_FDR : IB_SPEED_QDR;
0773 props->port_cap_flags = IB_PORT_CM_SUP;
0774 props->ip_gids = true;
0775 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
0776 props->max_msg_sz = mdev->dev->caps.max_msg_sz;
0777 if (mdev->dev->caps.pkey_table_len[port])
0778 props->pkey_tbl_len = 1;
0779 props->max_mtu = IB_MTU_4096;
0780 props->max_vl_num = 2;
0781 props->state = IB_PORT_DOWN;
0782 props->phys_state = state_to_phys_state(props->state);
0783 props->active_mtu = IB_MTU_256;
0784 spin_lock_bh(&iboe->lock);
0785 ndev = iboe->netdevs[port - 1];
0786 if (ndev && is_bonded) {
0787 rcu_read_lock();
0788 ndev = netdev_master_upper_dev_get_rcu(ndev);
0789 rcu_read_unlock();
0790 }
0791 if (!ndev)
0792 goto out_unlock;
0793
0794 tmp = iboe_get_mtu(ndev->mtu);
0795 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
0796
0797 props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
0798 IB_PORT_ACTIVE : IB_PORT_DOWN;
0799 props->phys_state = state_to_phys_state(props->state);
0800 out_unlock:
0801 spin_unlock_bh(&iboe->lock);
0802 out:
0803 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
0804 return err;
0805 }
0806
0807 int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
0808 struct ib_port_attr *props, int netw_view)
0809 {
0810 int err;
0811
0812
0813
0814 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
0815 ib_link_query_port(ibdev, port, props, netw_view) :
0816 eth_link_query_port(ibdev, port, props);
0817
0818 return err;
0819 }
0820
0821 static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
0822 struct ib_port_attr *props)
0823 {
0824
0825 return __mlx4_ib_query_port(ibdev, port, props, 0);
0826 }
0827
0828 int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
0829 union ib_gid *gid, int netw_view)
0830 {
0831 struct ib_smp *in_mad = NULL;
0832 struct ib_smp *out_mad = NULL;
0833 int err = -ENOMEM;
0834 struct mlx4_ib_dev *dev = to_mdev(ibdev);
0835 int clear = 0;
0836 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
0837
0838 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
0839 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0840 if (!in_mad || !out_mad)
0841 goto out;
0842
0843 ib_init_query_mad(in_mad);
0844 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
0845 in_mad->attr_mod = cpu_to_be32(port);
0846
0847 if (mlx4_is_mfunc(dev->dev) && netw_view)
0848 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
0849
0850 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
0851 if (err)
0852 goto out;
0853
0854 memcpy(gid->raw, out_mad->data + 8, 8);
0855
0856 if (mlx4_is_mfunc(dev->dev) && !netw_view) {
0857 if (index) {
0858
0859 err = 0;
0860 clear = 1;
0861 goto out;
0862 }
0863 }
0864
0865 ib_init_query_mad(in_mad);
0866 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
0867 in_mad->attr_mod = cpu_to_be32(index / 8);
0868
0869 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
0870 NULL, NULL, in_mad, out_mad);
0871 if (err)
0872 goto out;
0873
0874 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
0875
0876 out:
0877 if (clear)
0878 memset(gid->raw + 8, 0, 8);
0879 kfree(in_mad);
0880 kfree(out_mad);
0881 return err;
0882 }
0883
0884 static int mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
0885 union ib_gid *gid)
0886 {
0887 if (rdma_protocol_ib(ibdev, port))
0888 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
0889 return 0;
0890 }
0891
0892 static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
0893 u64 *sl2vl_tbl)
0894 {
0895 union sl2vl_tbl_to_u64 sl2vl64;
0896 struct ib_smp *in_mad = NULL;
0897 struct ib_smp *out_mad = NULL;
0898 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
0899 int err = -ENOMEM;
0900 int jj;
0901
0902 if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
0903 *sl2vl_tbl = 0;
0904 return 0;
0905 }
0906
0907 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
0908 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
0909 if (!in_mad || !out_mad)
0910 goto out;
0911
0912 ib_init_query_mad(in_mad);
0913 in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
0914 in_mad->attr_mod = 0;
0915
0916 if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
0917 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
0918
0919 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
0920 in_mad, out_mad);
0921 if (err)
0922 goto out;
0923
0924 for (jj = 0; jj < 8; jj++)
0925 sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
0926 *sl2vl_tbl = sl2vl64.sl64;
0927
0928 out:
0929 kfree(in_mad);
0930 kfree(out_mad);
0931 return err;
0932 }
0933
0934 static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
0935 {
0936 u64 sl2vl;
0937 int i;
0938 int err;
0939
0940 for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
0941 if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
0942 continue;
0943 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
0944 if (err) {
0945 pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n",
0946 i, err);
0947 sl2vl = 0;
0948 }
0949 atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
0950 }
0951 }
0952
0953 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
0954 u16 *pkey, int netw_view)
0955 {
0956 struct ib_smp *in_mad = NULL;
0957 struct ib_smp *out_mad = NULL;
0958 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
0959 int err = -ENOMEM;
0960
0961 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
0962 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0963 if (!in_mad || !out_mad)
0964 goto out;
0965
0966 ib_init_query_mad(in_mad);
0967 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
0968 in_mad->attr_mod = cpu_to_be32(index / 32);
0969
0970 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
0971 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
0972
0973 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
0974 in_mad, out_mad);
0975 if (err)
0976 goto out;
0977
0978 *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
0979
0980 out:
0981 kfree(in_mad);
0982 kfree(out_mad);
0983 return err;
0984 }
0985
0986 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
0987 u16 *pkey)
0988 {
0989 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
0990 }
0991
0992 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
0993 struct ib_device_modify *props)
0994 {
0995 struct mlx4_cmd_mailbox *mailbox;
0996 unsigned long flags;
0997
0998 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
0999 return -EOPNOTSUPP;
1000
1001 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
1002 return 0;
1003
1004 if (mlx4_is_slave(to_mdev(ibdev)->dev))
1005 return -EOPNOTSUPP;
1006
1007 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
1008 memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1009 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
1010
1011
1012
1013
1014
1015 mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
1016 if (IS_ERR(mailbox))
1017 return 0;
1018
1019 memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1020 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
1021 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1022
1023 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
1024
1025 return 0;
1026 }
1027
1028 static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u32 port,
1029 int reset_qkey_viols, u32 cap_mask)
1030 {
1031 struct mlx4_cmd_mailbox *mailbox;
1032 int err;
1033
1034 mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
1035 if (IS_ERR(mailbox))
1036 return PTR_ERR(mailbox);
1037
1038 if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
1039 *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
1040 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
1041 } else {
1042 ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
1043 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
1044 }
1045
1046 err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
1047 MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1048 MLX4_CMD_WRAPPED);
1049
1050 mlx4_free_cmd_mailbox(dev->dev, mailbox);
1051 return err;
1052 }
1053
1054 static int mlx4_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
1055 struct ib_port_modify *props)
1056 {
1057 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1058 u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1059 struct ib_port_attr attr;
1060 u32 cap_mask;
1061 int err;
1062
1063
1064
1065
1066
1067 if (is_eth)
1068 return 0;
1069
1070 mutex_lock(&mdev->cap_mask_mutex);
1071
1072 err = ib_query_port(ibdev, port, &attr);
1073 if (err)
1074 goto out;
1075
1076 cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1077 ~props->clr_port_cap_mask;
1078
1079 err = mlx4_ib_SET_PORT(mdev, port,
1080 !!(mask & IB_PORT_RESET_QKEY_CNTR),
1081 cap_mask);
1082
1083 out:
1084 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1085 return err;
1086 }
1087
1088 static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
1089 struct ib_udata *udata)
1090 {
1091 struct ib_device *ibdev = uctx->device;
1092 struct mlx4_ib_dev *dev = to_mdev(ibdev);
1093 struct mlx4_ib_ucontext *context = to_mucontext(uctx);
1094 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1095 struct mlx4_ib_alloc_ucontext_resp resp;
1096 int err;
1097
1098 if (!dev->ib_active)
1099 return -EAGAIN;
1100
1101 if (ibdev->ops.uverbs_abi_ver ==
1102 MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
1103 resp_v3.qp_tab_size = dev->dev->caps.num_qps;
1104 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
1105 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1106 } else {
1107 resp.dev_caps = dev->dev->caps.userspace_caps;
1108 resp.qp_tab_size = dev->dev->caps.num_qps;
1109 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
1110 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1111 resp.cqe_size = dev->dev->caps.cqe_size;
1112 }
1113
1114 err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1115 if (err)
1116 return err;
1117
1118 INIT_LIST_HEAD(&context->db_page_list);
1119 mutex_init(&context->db_page_mutex);
1120
1121 INIT_LIST_HEAD(&context->wqn_ranges_list);
1122 mutex_init(&context->wqn_ranges_mutex);
1123
1124 if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1125 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1126 else
1127 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1128
1129 if (err) {
1130 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1131 return -EFAULT;
1132 }
1133
1134 return err;
1135 }
1136
1137 static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1138 {
1139 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1140
1141 mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
1142 }
1143
1144 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1145 {
1146 }
1147
1148 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1149 {
1150 struct mlx4_ib_dev *dev = to_mdev(context->device);
1151
1152 switch (vma->vm_pgoff) {
1153 case 0:
1154 return rdma_user_mmap_io(context, vma,
1155 to_mucontext(context)->uar.pfn,
1156 PAGE_SIZE,
1157 pgprot_noncached(vma->vm_page_prot),
1158 NULL);
1159
1160 case 1:
1161 if (dev->dev->caps.bf_reg_size == 0)
1162 return -EINVAL;
1163 return rdma_user_mmap_io(
1164 context, vma,
1165 to_mucontext(context)->uar.pfn +
1166 dev->dev->caps.num_uars,
1167 PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
1168 NULL);
1169
1170 case 3: {
1171 struct mlx4_clock_params params;
1172 int ret;
1173
1174 ret = mlx4_get_internal_clock_params(dev->dev, ¶ms);
1175 if (ret)
1176 return ret;
1177
1178 return rdma_user_mmap_io(
1179 context, vma,
1180 (pci_resource_start(dev->dev->persist->pdev,
1181 params.bar) +
1182 params.offset) >>
1183 PAGE_SHIFT,
1184 PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
1185 NULL);
1186 }
1187
1188 default:
1189 return -EINVAL;
1190 }
1191 }
1192
1193 static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
1194 {
1195 struct mlx4_ib_pd *pd = to_mpd(ibpd);
1196 struct ib_device *ibdev = ibpd->device;
1197 int err;
1198
1199 err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1200 if (err)
1201 return err;
1202
1203 if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
1204 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1205 return -EFAULT;
1206 }
1207 return 0;
1208 }
1209
1210 static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
1211 {
1212 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
1213 return 0;
1214 }
1215
1216 static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
1217 {
1218 struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
1219 struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
1220 struct ib_cq_init_attr cq_attr = {};
1221 int err;
1222
1223 if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1224 return -EOPNOTSUPP;
1225
1226 err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
1227 if (err)
1228 return err;
1229
1230 xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
1231 if (IS_ERR(xrcd->pd)) {
1232 err = PTR_ERR(xrcd->pd);
1233 goto err2;
1234 }
1235
1236 cq_attr.cqe = 1;
1237 xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
1238 if (IS_ERR(xrcd->cq)) {
1239 err = PTR_ERR(xrcd->cq);
1240 goto err3;
1241 }
1242
1243 return 0;
1244
1245 err3:
1246 ib_dealloc_pd(xrcd->pd);
1247 err2:
1248 mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
1249 return err;
1250 }
1251
1252 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
1253 {
1254 ib_destroy_cq(to_mxrcd(xrcd)->cq);
1255 ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1256 mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
1257 return 0;
1258 }
1259
1260 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1261 {
1262 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1263 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1264 struct mlx4_ib_gid_entry *ge;
1265
1266 ge = kzalloc(sizeof *ge, GFP_KERNEL);
1267 if (!ge)
1268 return -ENOMEM;
1269
1270 ge->gid = *gid;
1271 if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1272 ge->port = mqp->port;
1273 ge->added = 1;
1274 }
1275
1276 mutex_lock(&mqp->mutex);
1277 list_add_tail(&ge->list, &mqp->gid_list);
1278 mutex_unlock(&mqp->mutex);
1279
1280 return 0;
1281 }
1282
1283 static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1284 struct mlx4_ib_counters *ctr_table)
1285 {
1286 struct counter_index *counter, *tmp_count;
1287
1288 mutex_lock(&ctr_table->mutex);
1289 list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1290 list) {
1291 if (counter->allocated)
1292 mlx4_counter_free(ibdev->dev, counter->index);
1293 list_del(&counter->list);
1294 kfree(counter);
1295 }
1296 mutex_unlock(&ctr_table->mutex);
1297 }
1298
1299 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1300 union ib_gid *gid)
1301 {
1302 struct net_device *ndev;
1303 int ret = 0;
1304
1305 if (!mqp->port)
1306 return 0;
1307
1308 spin_lock_bh(&mdev->iboe.lock);
1309 ndev = mdev->iboe.netdevs[mqp->port - 1];
1310 if (ndev)
1311 dev_hold(ndev);
1312 spin_unlock_bh(&mdev->iboe.lock);
1313
1314 if (ndev) {
1315 ret = 1;
1316 dev_put(ndev);
1317 }
1318
1319 return ret;
1320 }
1321
1322 struct mlx4_ib_steering {
1323 struct list_head list;
1324 struct mlx4_flow_reg_id reg_id;
1325 union ib_gid gid;
1326 };
1327
1328 #define LAST_ETH_FIELD vlan_tag
1329 #define LAST_IB_FIELD sl
1330 #define LAST_IPV4_FIELD dst_ip
1331 #define LAST_TCP_UDP_FIELD src_port
1332
1333
1334 #define FIELDS_NOT_SUPPORTED(filter, field)\
1335 memchr_inv((void *)&filter.field +\
1336 sizeof(filter.field), 0,\
1337 sizeof(filter) -\
1338 offsetof(typeof(filter), field) -\
1339 sizeof(filter.field))
1340
1341 static int parse_flow_attr(struct mlx4_dev *dev,
1342 u32 qp_num,
1343 union ib_flow_spec *ib_spec,
1344 struct _rule_hw *mlx4_spec)
1345 {
1346 enum mlx4_net_trans_rule_id type;
1347
1348 switch (ib_spec->type) {
1349 case IB_FLOW_SPEC_ETH:
1350 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1351 return -ENOTSUPP;
1352
1353 type = MLX4_NET_TRANS_RULE_ID_ETH;
1354 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1355 ETH_ALEN);
1356 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1357 ETH_ALEN);
1358 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1359 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1360 break;
1361 case IB_FLOW_SPEC_IB:
1362 if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1363 return -ENOTSUPP;
1364
1365 type = MLX4_NET_TRANS_RULE_ID_IB;
1366 mlx4_spec->ib.l3_qpn =
1367 cpu_to_be32(qp_num);
1368 mlx4_spec->ib.qpn_mask =
1369 cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1370 break;
1371
1372
1373 case IB_FLOW_SPEC_IPV4:
1374 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1375 return -ENOTSUPP;
1376
1377 type = MLX4_NET_TRANS_RULE_ID_IPV4;
1378 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1379 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1380 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1381 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1382 break;
1383
1384 case IB_FLOW_SPEC_TCP:
1385 case IB_FLOW_SPEC_UDP:
1386 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1387 return -ENOTSUPP;
1388
1389 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1390 MLX4_NET_TRANS_RULE_ID_TCP :
1391 MLX4_NET_TRANS_RULE_ID_UDP;
1392 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1393 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1394 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1395 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1396 break;
1397
1398 default:
1399 return -EINVAL;
1400 }
1401 if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1402 mlx4_hw_rule_sz(dev, type) < 0)
1403 return -EINVAL;
1404 mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1405 mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1406 return mlx4_hw_rule_sz(dev, type);
1407 }
1408
1409 struct default_rules {
1410 __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1411 __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1412 __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1413 __u8 link_layer;
1414 };
1415 static const struct default_rules default_table[] = {
1416 {
1417 .mandatory_fields = {IB_FLOW_SPEC_IPV4},
1418 .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1419 .rules_create_list = {IB_FLOW_SPEC_IB},
1420 .link_layer = IB_LINK_LAYER_INFINIBAND
1421 }
1422 };
1423
1424 static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1425 struct ib_flow_attr *flow_attr)
1426 {
1427 int i, j, k;
1428 void *ib_flow;
1429 const struct default_rules *pdefault_rules = default_table;
1430 u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1431
1432 for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1433 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1434 memset(&field_types, 0, sizeof(field_types));
1435
1436 if (link_layer != pdefault_rules->link_layer)
1437 continue;
1438
1439 ib_flow = flow_attr + 1;
1440
1441 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1442 j < flow_attr->num_of_specs; k++) {
1443 union ib_flow_spec *current_flow =
1444 (union ib_flow_spec *)ib_flow;
1445
1446
1447 if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1448 (pdefault_rules->mandatory_fields[k] &
1449 IB_FLOW_SPEC_LAYER_MASK)) &&
1450 (current_flow->type !=
1451 pdefault_rules->mandatory_fields[k]))
1452 goto out;
1453
1454
1455 if (current_flow->type ==
1456 pdefault_rules->mandatory_fields[k]) {
1457 j++;
1458 ib_flow +=
1459 ((union ib_flow_spec *)ib_flow)->size;
1460 }
1461 }
1462
1463 ib_flow = flow_attr + 1;
1464 for (j = 0; j < flow_attr->num_of_specs;
1465 j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1466 for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1467
1468 if (((union ib_flow_spec *)ib_flow)->type ==
1469 pdefault_rules->mandatory_not_fields[k])
1470 goto out;
1471
1472 return i;
1473 }
1474 out:
1475 return -1;
1476 }
1477
1478 static int __mlx4_ib_create_default_rules(
1479 struct mlx4_ib_dev *mdev,
1480 struct ib_qp *qp,
1481 const struct default_rules *pdefault_rules,
1482 struct _rule_hw *mlx4_spec) {
1483 int size = 0;
1484 int i;
1485
1486 for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
1487 union ib_flow_spec ib_spec = {};
1488 int ret;
1489
1490 switch (pdefault_rules->rules_create_list[i]) {
1491 case 0:
1492
1493 continue;
1494 case IB_FLOW_SPEC_IB:
1495 ib_spec.type = IB_FLOW_SPEC_IB;
1496 ib_spec.size = sizeof(struct ib_flow_spec_ib);
1497
1498 break;
1499 default:
1500
1501 return -EINVAL;
1502 }
1503
1504 ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1505 mlx4_spec);
1506 if (ret < 0) {
1507 pr_info("invalid parsing\n");
1508 return -EINVAL;
1509 }
1510
1511 mlx4_spec = (void *)mlx4_spec + ret;
1512 size += ret;
1513 }
1514 return size;
1515 }
1516
1517 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1518 int domain,
1519 enum mlx4_net_trans_promisc_mode flow_type,
1520 u64 *reg_id)
1521 {
1522 int ret, i;
1523 int size = 0;
1524 void *ib_flow;
1525 struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1526 struct mlx4_cmd_mailbox *mailbox;
1527 struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1528 int default_flow;
1529
1530 if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1531 pr_err("Invalid priority value %d\n", flow_attr->priority);
1532 return -EINVAL;
1533 }
1534
1535 if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1536 return -EINVAL;
1537
1538 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1539 if (IS_ERR(mailbox))
1540 return PTR_ERR(mailbox);
1541 ctrl = mailbox->buf;
1542
1543 ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
1544 ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1545 ctrl->port = flow_attr->port;
1546 ctrl->qpn = cpu_to_be32(qp->qp_num);
1547
1548 ib_flow = flow_attr + 1;
1549 size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1550
1551 default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1552 if (default_flow >= 0) {
1553 ret = __mlx4_ib_create_default_rules(
1554 mdev, qp, default_table + default_flow,
1555 mailbox->buf + size);
1556 if (ret < 0) {
1557 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1558 return -EINVAL;
1559 }
1560 size += ret;
1561 }
1562 for (i = 0; i < flow_attr->num_of_specs; i++) {
1563 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1564 mailbox->buf + size);
1565 if (ret < 0) {
1566 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1567 return -EINVAL;
1568 }
1569 ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1570 size += ret;
1571 }
1572
1573 if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
1574 flow_attr->num_of_specs == 1) {
1575 struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
1576 enum ib_flow_spec_type header_spec =
1577 ((union ib_flow_spec *)(flow_attr + 1))->type;
1578
1579 if (header_spec == IB_FLOW_SPEC_ETH)
1580 mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
1581 }
1582
1583 ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1584 MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1585 MLX4_CMD_NATIVE);
1586 if (ret == -ENOMEM)
1587 pr_err("mcg table is full. Fail to register network rule.\n");
1588 else if (ret == -ENXIO)
1589 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1590 else if (ret)
1591 pr_err("Invalid argument. Fail to register network rule.\n");
1592
1593 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1594 return ret;
1595 }
1596
1597 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1598 {
1599 int err;
1600 err = mlx4_cmd(dev, reg_id, 0, 0,
1601 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1602 MLX4_CMD_NATIVE);
1603 if (err)
1604 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1605 reg_id);
1606 return err;
1607 }
1608
1609 static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1610 u64 *reg_id)
1611 {
1612 void *ib_flow;
1613 union ib_flow_spec *ib_spec;
1614 struct mlx4_dev *dev = to_mdev(qp->device)->dev;
1615 int err = 0;
1616
1617 if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1618 dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1619 return 0;
1620
1621 ib_flow = flow_attr + 1;
1622 ib_spec = (union ib_flow_spec *)ib_flow;
1623
1624 if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1625 return 0;
1626
1627 err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1628 flow_attr->port, qp->qp_num,
1629 MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1630 reg_id);
1631 return err;
1632 }
1633
1634 static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1635 struct ib_flow_attr *flow_attr,
1636 enum mlx4_net_trans_promisc_mode *type)
1637 {
1638 int err = 0;
1639
1640 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1641 (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1642 (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1643 return -EOPNOTSUPP;
1644 }
1645
1646 if (flow_attr->num_of_specs == 0) {
1647 type[0] = MLX4_FS_MC_SNIFFER;
1648 type[1] = MLX4_FS_UC_SNIFFER;
1649 } else {
1650 union ib_flow_spec *ib_spec;
1651
1652 ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1653 if (ib_spec->type != IB_FLOW_SPEC_ETH)
1654 return -EINVAL;
1655
1656
1657 if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1658 type[0] = MLX4_FS_MC_SNIFFER;
1659 type[1] = MLX4_FS_UC_SNIFFER;
1660 } else {
1661 u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1662 ib_spec->eth.mask.dst_mac[1],
1663 ib_spec->eth.mask.dst_mac[2],
1664 ib_spec->eth.mask.dst_mac[3],
1665 ib_spec->eth.mask.dst_mac[4],
1666 ib_spec->eth.mask.dst_mac[5]};
1667
1668
1669
1670
1671 if (!is_zero_ether_addr(&mac[0]))
1672 return -EINVAL;
1673
1674 if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1675 type[0] = MLX4_FS_MC_SNIFFER;
1676 else
1677 type[0] = MLX4_FS_UC_SNIFFER;
1678 }
1679 }
1680
1681 return err;
1682 }
1683
1684 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1685 struct ib_flow_attr *flow_attr,
1686 struct ib_udata *udata)
1687 {
1688 int err = 0, i = 0, j = 0;
1689 struct mlx4_ib_flow *mflow;
1690 enum mlx4_net_trans_promisc_mode type[2];
1691 struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1692 int is_bonded = mlx4_is_bonded(dev);
1693
1694 if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1695 return ERR_PTR(-EOPNOTSUPP);
1696
1697 if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1698 (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1699 return ERR_PTR(-EOPNOTSUPP);
1700
1701 if (udata &&
1702 udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
1703 return ERR_PTR(-EOPNOTSUPP);
1704
1705 memset(type, 0, sizeof(type));
1706
1707 mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1708 if (!mflow) {
1709 err = -ENOMEM;
1710 goto err_free;
1711 }
1712
1713 switch (flow_attr->type) {
1714 case IB_FLOW_ATTR_NORMAL:
1715
1716
1717
1718
1719 if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1720 err = mlx4_ib_add_dont_trap_rule(dev,
1721 flow_attr,
1722 type);
1723 if (err)
1724 goto err_free;
1725 } else {
1726 type[0] = MLX4_FS_REGULAR;
1727 }
1728 break;
1729
1730 case IB_FLOW_ATTR_ALL_DEFAULT:
1731 type[0] = MLX4_FS_ALL_DEFAULT;
1732 break;
1733
1734 case IB_FLOW_ATTR_MC_DEFAULT:
1735 type[0] = MLX4_FS_MC_DEFAULT;
1736 break;
1737
1738 case IB_FLOW_ATTR_SNIFFER:
1739 type[0] = MLX4_FS_MIRROR_RX_PORT;
1740 type[1] = MLX4_FS_MIRROR_SX_PORT;
1741 break;
1742
1743 default:
1744 err = -EINVAL;
1745 goto err_free;
1746 }
1747
1748 while (i < ARRAY_SIZE(type) && type[i]) {
1749 err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
1750 type[i], &mflow->reg_id[i].id);
1751 if (err)
1752 goto err_create_flow;
1753 if (is_bonded) {
1754
1755
1756
1757 flow_attr->port = 2;
1758 err = __mlx4_ib_create_flow(qp, flow_attr,
1759 MLX4_DOMAIN_UVERBS, type[j],
1760 &mflow->reg_id[j].mirror);
1761 flow_attr->port = 1;
1762 if (err)
1763 goto err_create_flow;
1764 j++;
1765 }
1766
1767 i++;
1768 }
1769
1770 if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1771 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1772 &mflow->reg_id[i].id);
1773 if (err)
1774 goto err_create_flow;
1775
1776 if (is_bonded) {
1777 flow_attr->port = 2;
1778 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1779 &mflow->reg_id[j].mirror);
1780 flow_attr->port = 1;
1781 if (err)
1782 goto err_create_flow;
1783 j++;
1784 }
1785
1786 i++;
1787 }
1788
1789 return &mflow->ibflow;
1790
1791 err_create_flow:
1792 while (i) {
1793 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1794 mflow->reg_id[i].id);
1795 i--;
1796 }
1797
1798 while (j) {
1799 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1800 mflow->reg_id[j].mirror);
1801 j--;
1802 }
1803 err_free:
1804 kfree(mflow);
1805 return ERR_PTR(err);
1806 }
1807
1808 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1809 {
1810 int err, ret = 0;
1811 int i = 0;
1812 struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1813 struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1814
1815 while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1816 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1817 if (err)
1818 ret = err;
1819 if (mflow->reg_id[i].mirror) {
1820 err = __mlx4_ib_destroy_flow(mdev->dev,
1821 mflow->reg_id[i].mirror);
1822 if (err)
1823 ret = err;
1824 }
1825 i++;
1826 }
1827
1828 kfree(mflow);
1829 return ret;
1830 }
1831
1832 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1833 {
1834 int err;
1835 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1836 struct mlx4_dev *dev = mdev->dev;
1837 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1838 struct mlx4_ib_steering *ib_steering = NULL;
1839 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1840 struct mlx4_flow_reg_id reg_id;
1841
1842 if (mdev->dev->caps.steering_mode ==
1843 MLX4_STEERING_MODE_DEVICE_MANAGED) {
1844 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1845 if (!ib_steering)
1846 return -ENOMEM;
1847 }
1848
1849 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1850 !!(mqp->flags &
1851 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1852 prot, ®_id.id);
1853 if (err) {
1854 pr_err("multicast attach op failed, err %d\n", err);
1855 goto err_malloc;
1856 }
1857
1858 reg_id.mirror = 0;
1859 if (mlx4_is_bonded(dev)) {
1860 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
1861 (mqp->port == 1) ? 2 : 1,
1862 !!(mqp->flags &
1863 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1864 prot, ®_id.mirror);
1865 if (err)
1866 goto err_add;
1867 }
1868
1869 err = add_gid_entry(ibqp, gid);
1870 if (err)
1871 goto err_add;
1872
1873 if (ib_steering) {
1874 memcpy(ib_steering->gid.raw, gid->raw, 16);
1875 ib_steering->reg_id = reg_id;
1876 mutex_lock(&mqp->mutex);
1877 list_add(&ib_steering->list, &mqp->steering_rules);
1878 mutex_unlock(&mqp->mutex);
1879 }
1880 return 0;
1881
1882 err_add:
1883 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1884 prot, reg_id.id);
1885 if (reg_id.mirror)
1886 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1887 prot, reg_id.mirror);
1888 err_malloc:
1889 kfree(ib_steering);
1890
1891 return err;
1892 }
1893
1894 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1895 {
1896 struct mlx4_ib_gid_entry *ge;
1897 struct mlx4_ib_gid_entry *tmp;
1898 struct mlx4_ib_gid_entry *ret = NULL;
1899
1900 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1901 if (!memcmp(raw, ge->gid.raw, 16)) {
1902 ret = ge;
1903 break;
1904 }
1905 }
1906
1907 return ret;
1908 }
1909
1910 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1911 {
1912 int err;
1913 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1914 struct mlx4_dev *dev = mdev->dev;
1915 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1916 struct net_device *ndev;
1917 struct mlx4_ib_gid_entry *ge;
1918 struct mlx4_flow_reg_id reg_id = {0, 0};
1919 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1920
1921 if (mdev->dev->caps.steering_mode ==
1922 MLX4_STEERING_MODE_DEVICE_MANAGED) {
1923 struct mlx4_ib_steering *ib_steering;
1924
1925 mutex_lock(&mqp->mutex);
1926 list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
1927 if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
1928 list_del(&ib_steering->list);
1929 break;
1930 }
1931 }
1932 mutex_unlock(&mqp->mutex);
1933 if (&ib_steering->list == &mqp->steering_rules) {
1934 pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
1935 return -EINVAL;
1936 }
1937 reg_id = ib_steering->reg_id;
1938 kfree(ib_steering);
1939 }
1940
1941 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1942 prot, reg_id.id);
1943 if (err)
1944 return err;
1945
1946 if (mlx4_is_bonded(dev)) {
1947 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1948 prot, reg_id.mirror);
1949 if (err)
1950 return err;
1951 }
1952
1953 mutex_lock(&mqp->mutex);
1954 ge = find_gid_entry(mqp, gid->raw);
1955 if (ge) {
1956 spin_lock_bh(&mdev->iboe.lock);
1957 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1958 if (ndev)
1959 dev_hold(ndev);
1960 spin_unlock_bh(&mdev->iboe.lock);
1961 if (ndev)
1962 dev_put(ndev);
1963 list_del(&ge->list);
1964 kfree(ge);
1965 } else
1966 pr_warn("could not find mgid entry\n");
1967
1968 mutex_unlock(&mqp->mutex);
1969
1970 return 0;
1971 }
1972
1973 static int init_node_data(struct mlx4_ib_dev *dev)
1974 {
1975 struct ib_smp *in_mad = NULL;
1976 struct ib_smp *out_mad = NULL;
1977 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
1978 int err = -ENOMEM;
1979
1980 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
1981 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1982 if (!in_mad || !out_mad)
1983 goto out;
1984
1985 ib_init_query_mad(in_mad);
1986 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1987 if (mlx4_is_master(dev->dev))
1988 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
1989
1990 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1991 if (err)
1992 goto out;
1993
1994 memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
1995
1996 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1997
1998 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1999 if (err)
2000 goto out;
2001
2002 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2003 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2004
2005 out:
2006 kfree(in_mad);
2007 kfree(out_mad);
2008 return err;
2009 }
2010
2011 static ssize_t hca_type_show(struct device *device,
2012 struct device_attribute *attr, char *buf)
2013 {
2014 struct mlx4_ib_dev *dev =
2015 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2016
2017 return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
2018 }
2019 static DEVICE_ATTR_RO(hca_type);
2020
2021 static ssize_t hw_rev_show(struct device *device,
2022 struct device_attribute *attr, char *buf)
2023 {
2024 struct mlx4_ib_dev *dev =
2025 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2026
2027 return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
2028 }
2029 static DEVICE_ATTR_RO(hw_rev);
2030
2031 static ssize_t board_id_show(struct device *device,
2032 struct device_attribute *attr, char *buf)
2033 {
2034 struct mlx4_ib_dev *dev =
2035 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2036
2037 return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
2038 }
2039 static DEVICE_ATTR_RO(board_id);
2040
2041 static struct attribute *mlx4_class_attributes[] = {
2042 &dev_attr_hw_rev.attr,
2043 &dev_attr_hca_type.attr,
2044 &dev_attr_board_id.attr,
2045 NULL
2046 };
2047
2048 static const struct attribute_group mlx4_attr_group = {
2049 .attrs = mlx4_class_attributes,
2050 };
2051
2052 struct diag_counter {
2053 const char *name;
2054 u32 offset;
2055 };
2056
2057 #define DIAG_COUNTER(_name, _offset) \
2058 { .name = #_name, .offset = _offset }
2059
2060 static const struct diag_counter diag_basic[] = {
2061 DIAG_COUNTER(rq_num_lle, 0x00),
2062 DIAG_COUNTER(sq_num_lle, 0x04),
2063 DIAG_COUNTER(rq_num_lqpoe, 0x08),
2064 DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2065 DIAG_COUNTER(rq_num_lpe, 0x18),
2066 DIAG_COUNTER(sq_num_lpe, 0x1C),
2067 DIAG_COUNTER(rq_num_wrfe, 0x20),
2068 DIAG_COUNTER(sq_num_wrfe, 0x24),
2069 DIAG_COUNTER(sq_num_mwbe, 0x2C),
2070 DIAG_COUNTER(sq_num_bre, 0x34),
2071 DIAG_COUNTER(sq_num_rire, 0x44),
2072 DIAG_COUNTER(rq_num_rire, 0x48),
2073 DIAG_COUNTER(sq_num_rae, 0x4C),
2074 DIAG_COUNTER(rq_num_rae, 0x50),
2075 DIAG_COUNTER(sq_num_roe, 0x54),
2076 DIAG_COUNTER(sq_num_tree, 0x5C),
2077 DIAG_COUNTER(sq_num_rree, 0x64),
2078 DIAG_COUNTER(rq_num_rnr, 0x68),
2079 DIAG_COUNTER(sq_num_rnr, 0x6C),
2080 DIAG_COUNTER(rq_num_oos, 0x100),
2081 DIAG_COUNTER(sq_num_oos, 0x104),
2082 };
2083
2084 static const struct diag_counter diag_ext[] = {
2085 DIAG_COUNTER(rq_num_dup, 0x130),
2086 DIAG_COUNTER(sq_num_to, 0x134),
2087 };
2088
2089 static const struct diag_counter diag_device_only[] = {
2090 DIAG_COUNTER(num_cqovf, 0x1A0),
2091 DIAG_COUNTER(rq_num_udsdprd, 0x118),
2092 };
2093
2094 static struct rdma_hw_stats *
2095 mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
2096 {
2097 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2098 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2099
2100 if (!diag[0].descs)
2101 return NULL;
2102
2103 return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
2104 RDMA_HW_STATS_DEFAULT_LIFESPAN);
2105 }
2106
2107 static struct rdma_hw_stats *
2108 mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
2109 {
2110 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2111 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2112
2113 if (!diag[1].descs)
2114 return NULL;
2115
2116 return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
2117 RDMA_HW_STATS_DEFAULT_LIFESPAN);
2118 }
2119
2120 static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2121 struct rdma_hw_stats *stats,
2122 u32 port, int index)
2123 {
2124 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2125 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2126 u32 hw_value[ARRAY_SIZE(diag_device_only) +
2127 ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2128 int ret;
2129 int i;
2130
2131 ret = mlx4_query_diag_counters(dev->dev,
2132 MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2133 diag[!!port].offset, hw_value,
2134 diag[!!port].num_counters, port);
2135
2136 if (ret)
2137 return ret;
2138
2139 for (i = 0; i < diag[!!port].num_counters; i++)
2140 stats->value[i] = hw_value[i];
2141
2142 return diag[!!port].num_counters;
2143 }
2144
2145 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2146 struct rdma_stat_desc **pdescs,
2147 u32 **offset, u32 *num, bool port)
2148 {
2149 u32 num_counters;
2150
2151 num_counters = ARRAY_SIZE(diag_basic);
2152
2153 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2154 num_counters += ARRAY_SIZE(diag_ext);
2155
2156 if (!port)
2157 num_counters += ARRAY_SIZE(diag_device_only);
2158
2159 *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
2160 GFP_KERNEL);
2161 if (!*pdescs)
2162 return -ENOMEM;
2163
2164 *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2165 if (!*offset)
2166 goto err;
2167
2168 *num = num_counters;
2169
2170 return 0;
2171
2172 err:
2173 kfree(*pdescs);
2174 return -ENOMEM;
2175 }
2176
2177 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2178 struct rdma_stat_desc *descs,
2179 u32 *offset, bool port)
2180 {
2181 int i;
2182 int j;
2183
2184 for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2185 descs[i].name = diag_basic[i].name;
2186 offset[i] = diag_basic[i].offset;
2187 }
2188
2189 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2190 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2191 descs[j].name = diag_ext[i].name;
2192 offset[j] = diag_ext[i].offset;
2193 }
2194 }
2195
2196 if (!port) {
2197 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2198 descs[j].name = diag_device_only[i].name;
2199 offset[j] = diag_device_only[i].offset;
2200 }
2201 }
2202 }
2203
2204 static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
2205 .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
2206 .alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
2207 .get_hw_stats = mlx4_ib_get_hw_stats,
2208 };
2209
2210 static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
2211 .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
2212 .get_hw_stats = mlx4_ib_get_hw_stats,
2213 };
2214
2215 static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2216 {
2217 struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2218 int i;
2219 int ret;
2220 bool per_port = !!(ibdev->dev->caps.flags2 &
2221 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2222
2223 if (mlx4_is_slave(ibdev->dev))
2224 return 0;
2225
2226 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2227
2228
2229
2230
2231 if (i && !per_port) {
2232 ib_set_device_ops(&ibdev->ib_dev,
2233 &mlx4_ib_hw_stats_ops1);
2234
2235 return 0;
2236 }
2237
2238 ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
2239 &diag[i].offset,
2240 &diag[i].num_counters, i);
2241 if (ret)
2242 goto err_alloc;
2243
2244 mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
2245 diag[i].offset, i);
2246 }
2247
2248 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
2249
2250 return 0;
2251
2252 err_alloc:
2253 if (i) {
2254 kfree(diag[i - 1].descs);
2255 kfree(diag[i - 1].offset);
2256 }
2257
2258 return ret;
2259 }
2260
2261 static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2262 {
2263 int i;
2264
2265 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2266 kfree(ibdev->diag_counters[i].offset);
2267 kfree(ibdev->diag_counters[i].descs);
2268 }
2269 }
2270
2271 #define MLX4_IB_INVALID_MAC ((u64)-1)
2272 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2273 struct net_device *dev,
2274 int port)
2275 {
2276 u64 new_smac = 0;
2277 u64 release_mac = MLX4_IB_INVALID_MAC;
2278 struct mlx4_ib_qp *qp;
2279
2280 new_smac = ether_addr_to_u64(dev->dev_addr);
2281 atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2282
2283
2284 if (!mlx4_is_mfunc(ibdev->dev))
2285 return;
2286
2287 mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2288 qp = ibdev->qp1_proxy[port - 1];
2289 if (qp) {
2290 int new_smac_index;
2291 u64 old_smac;
2292 struct mlx4_update_qp_params update_params;
2293
2294 mutex_lock(&qp->mutex);
2295 old_smac = qp->pri.smac;
2296 if (new_smac == old_smac)
2297 goto unlock;
2298
2299 new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2300
2301 if (new_smac_index < 0)
2302 goto unlock;
2303
2304 update_params.smac_index = new_smac_index;
2305 if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2306 &update_params)) {
2307 release_mac = new_smac;
2308 goto unlock;
2309 }
2310
2311 if (qp->pri.smac_port)
2312 release_mac = old_smac;
2313 qp->pri.smac = new_smac;
2314 qp->pri.smac_port = port;
2315 qp->pri.smac_index = new_smac_index;
2316 }
2317
2318 unlock:
2319 if (release_mac != MLX4_IB_INVALID_MAC)
2320 mlx4_unregister_mac(ibdev->dev, port, release_mac);
2321 if (qp)
2322 mutex_unlock(&qp->mutex);
2323 mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2324 }
2325
2326 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
2327 struct net_device *dev,
2328 unsigned long event)
2329
2330 {
2331 struct mlx4_ib_iboe *iboe;
2332 int update_qps_port = -1;
2333 int port;
2334
2335 ASSERT_RTNL();
2336
2337 iboe = &ibdev->iboe;
2338
2339 spin_lock_bh(&iboe->lock);
2340 mlx4_foreach_ib_transport_port(port, ibdev->dev) {
2341
2342 iboe->netdevs[port - 1] =
2343 mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
2344
2345 if (dev == iboe->netdevs[port - 1] &&
2346 (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2347 event == NETDEV_UP || event == NETDEV_CHANGE))
2348 update_qps_port = port;
2349
2350 if (dev == iboe->netdevs[port - 1] &&
2351 (event == NETDEV_UP || event == NETDEV_DOWN)) {
2352 enum ib_port_state port_state;
2353 struct ib_event ibev = { };
2354
2355 if (ib_get_cached_port_state(&ibdev->ib_dev, port,
2356 &port_state))
2357 continue;
2358
2359 if (event == NETDEV_UP &&
2360 (port_state != IB_PORT_ACTIVE ||
2361 iboe->last_port_state[port - 1] != IB_PORT_DOWN))
2362 continue;
2363 if (event == NETDEV_DOWN &&
2364 (port_state != IB_PORT_DOWN ||
2365 iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
2366 continue;
2367 iboe->last_port_state[port - 1] = port_state;
2368
2369 ibev.device = &ibdev->ib_dev;
2370 ibev.element.port_num = port;
2371 ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
2372 IB_EVENT_PORT_ERR;
2373 ib_dispatch_event(&ibev);
2374 }
2375
2376 }
2377 spin_unlock_bh(&iboe->lock);
2378
2379 if (update_qps_port > 0)
2380 mlx4_ib_update_qps(ibdev, dev, update_qps_port);
2381 }
2382
2383 static int mlx4_ib_netdev_event(struct notifier_block *this,
2384 unsigned long event, void *ptr)
2385 {
2386 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2387 struct mlx4_ib_dev *ibdev;
2388
2389 if (!net_eq(dev_net(dev), &init_net))
2390 return NOTIFY_DONE;
2391
2392 ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2393 mlx4_ib_scan_netdevs(ibdev, dev, event);
2394
2395 return NOTIFY_DONE;
2396 }
2397
2398 static void init_pkeys(struct mlx4_ib_dev *ibdev)
2399 {
2400 int port;
2401 int slave;
2402 int i;
2403
2404 if (mlx4_is_master(ibdev->dev)) {
2405 for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2406 ++slave) {
2407 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2408 for (i = 0;
2409 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2410 ++i) {
2411 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2412
2413 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2414 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2415 mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2416 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2417 }
2418 }
2419 }
2420
2421 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2422 for (i = 0;
2423 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2424 ++i)
2425 ibdev->pkeys.phys_pkey_cache[port-1][i] =
2426 (i) ? 0 : 0xFFFF;
2427 }
2428 }
2429 }
2430
2431 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2432 {
2433 int i, j, eq = 0, total_eqs = 0;
2434
2435 ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2436 sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2437 if (!ibdev->eq_table)
2438 return;
2439
2440 for (i = 1; i <= dev->caps.num_ports; i++) {
2441 for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2442 j++, total_eqs++) {
2443 if (i > 1 && mlx4_is_eq_shared(dev, total_eqs))
2444 continue;
2445 ibdev->eq_table[eq] = total_eqs;
2446 if (!mlx4_assign_eq(dev, i,
2447 &ibdev->eq_table[eq]))
2448 eq++;
2449 else
2450 ibdev->eq_table[eq] = -1;
2451 }
2452 }
2453
2454 for (i = eq; i < dev->caps.num_comp_vectors;
2455 ibdev->eq_table[i++] = -1)
2456 ;
2457
2458
2459 ibdev->ib_dev.num_comp_vectors = eq;
2460 }
2461
2462 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2463 {
2464 int i;
2465 int total_eqs = ibdev->ib_dev.num_comp_vectors;
2466
2467
2468 if (!ibdev->eq_table)
2469 return;
2470
2471
2472 ibdev->ib_dev.num_comp_vectors = 0;
2473
2474 for (i = 0; i < total_eqs; i++)
2475 mlx4_release_eq(dev, ibdev->eq_table[i]);
2476
2477 kfree(ibdev->eq_table);
2478 ibdev->eq_table = NULL;
2479 }
2480
2481 static int mlx4_port_immutable(struct ib_device *ibdev, u32 port_num,
2482 struct ib_port_immutable *immutable)
2483 {
2484 struct ib_port_attr attr;
2485 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2486 int err;
2487
2488 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2489 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2490 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2491 } else {
2492 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2493 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2494 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2495 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2496 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2497 immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2498 if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2499 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2500 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2501 }
2502
2503 err = ib_query_port(ibdev, port_num, &attr);
2504 if (err)
2505 return err;
2506
2507 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2508 immutable->gid_tbl_len = attr.gid_tbl_len;
2509
2510 return 0;
2511 }
2512
2513 static void get_fw_ver_str(struct ib_device *device, char *str)
2514 {
2515 struct mlx4_ib_dev *dev =
2516 container_of(device, struct mlx4_ib_dev, ib_dev);
2517 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
2518 (int) (dev->dev->caps.fw_ver >> 32),
2519 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2520 (int) dev->dev->caps.fw_ver & 0xffff);
2521 }
2522
2523 static const struct ib_device_ops mlx4_ib_dev_ops = {
2524 .owner = THIS_MODULE,
2525 .driver_id = RDMA_DRIVER_MLX4,
2526 .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
2527
2528 .add_gid = mlx4_ib_add_gid,
2529 .alloc_mr = mlx4_ib_alloc_mr,
2530 .alloc_pd = mlx4_ib_alloc_pd,
2531 .alloc_ucontext = mlx4_ib_alloc_ucontext,
2532 .attach_mcast = mlx4_ib_mcg_attach,
2533 .create_ah = mlx4_ib_create_ah,
2534 .create_cq = mlx4_ib_create_cq,
2535 .create_qp = mlx4_ib_create_qp,
2536 .create_srq = mlx4_ib_create_srq,
2537 .dealloc_pd = mlx4_ib_dealloc_pd,
2538 .dealloc_ucontext = mlx4_ib_dealloc_ucontext,
2539 .del_gid = mlx4_ib_del_gid,
2540 .dereg_mr = mlx4_ib_dereg_mr,
2541 .destroy_ah = mlx4_ib_destroy_ah,
2542 .destroy_cq = mlx4_ib_destroy_cq,
2543 .destroy_qp = mlx4_ib_destroy_qp,
2544 .destroy_srq = mlx4_ib_destroy_srq,
2545 .detach_mcast = mlx4_ib_mcg_detach,
2546 .device_group = &mlx4_attr_group,
2547 .disassociate_ucontext = mlx4_ib_disassociate_ucontext,
2548 .drain_rq = mlx4_ib_drain_rq,
2549 .drain_sq = mlx4_ib_drain_sq,
2550 .get_dev_fw_str = get_fw_ver_str,
2551 .get_dma_mr = mlx4_ib_get_dma_mr,
2552 .get_link_layer = mlx4_ib_port_link_layer,
2553 .get_netdev = mlx4_ib_get_netdev,
2554 .get_port_immutable = mlx4_port_immutable,
2555 .map_mr_sg = mlx4_ib_map_mr_sg,
2556 .mmap = mlx4_ib_mmap,
2557 .modify_cq = mlx4_ib_modify_cq,
2558 .modify_device = mlx4_ib_modify_device,
2559 .modify_port = mlx4_ib_modify_port,
2560 .modify_qp = mlx4_ib_modify_qp,
2561 .modify_srq = mlx4_ib_modify_srq,
2562 .poll_cq = mlx4_ib_poll_cq,
2563 .post_recv = mlx4_ib_post_recv,
2564 .post_send = mlx4_ib_post_send,
2565 .post_srq_recv = mlx4_ib_post_srq_recv,
2566 .process_mad = mlx4_ib_process_mad,
2567 .query_ah = mlx4_ib_query_ah,
2568 .query_device = mlx4_ib_query_device,
2569 .query_gid = mlx4_ib_query_gid,
2570 .query_pkey = mlx4_ib_query_pkey,
2571 .query_port = mlx4_ib_query_port,
2572 .query_qp = mlx4_ib_query_qp,
2573 .query_srq = mlx4_ib_query_srq,
2574 .reg_user_mr = mlx4_ib_reg_user_mr,
2575 .req_notify_cq = mlx4_ib_arm_cq,
2576 .rereg_user_mr = mlx4_ib_rereg_user_mr,
2577 .resize_cq = mlx4_ib_resize_cq,
2578
2579 INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
2580 INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
2581 INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
2582 INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
2583 INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
2584 INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
2585 };
2586
2587 static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
2588 .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
2589 .create_wq = mlx4_ib_create_wq,
2590 .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
2591 .destroy_wq = mlx4_ib_destroy_wq,
2592 .modify_wq = mlx4_ib_modify_wq,
2593
2594 INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
2595 ib_rwq_ind_tbl),
2596 };
2597
2598 static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
2599 .alloc_mw = mlx4_ib_alloc_mw,
2600 .dealloc_mw = mlx4_ib_dealloc_mw,
2601
2602 INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
2603 };
2604
2605 static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
2606 .alloc_xrcd = mlx4_ib_alloc_xrcd,
2607 .dealloc_xrcd = mlx4_ib_dealloc_xrcd,
2608
2609 INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
2610 };
2611
2612 static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
2613 .create_flow = mlx4_ib_create_flow,
2614 .destroy_flow = mlx4_ib_destroy_flow,
2615 };
2616
2617 static void *mlx4_ib_add(struct mlx4_dev *dev)
2618 {
2619 struct mlx4_ib_dev *ibdev;
2620 int num_ports = 0;
2621 int i, j;
2622 int err;
2623 struct mlx4_ib_iboe *iboe;
2624 int ib_num_ports = 0;
2625 int num_req_counters;
2626 int allocated;
2627 u32 counter_index;
2628 struct counter_index *new_counter_index = NULL;
2629
2630 pr_info_once("%s", mlx4_ib_version);
2631
2632 num_ports = 0;
2633 mlx4_foreach_ib_transport_port(i, dev)
2634 num_ports++;
2635
2636
2637 if (num_ports == 0)
2638 return NULL;
2639
2640 ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
2641 if (!ibdev) {
2642 dev_err(&dev->persist->pdev->dev,
2643 "Device struct alloc failed\n");
2644 return NULL;
2645 }
2646
2647 iboe = &ibdev->iboe;
2648
2649 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
2650 goto err_dealloc;
2651
2652 if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
2653 goto err_pd;
2654
2655 ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2656 PAGE_SIZE);
2657 if (!ibdev->uar_map)
2658 goto err_uar;
2659 MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2660
2661 ibdev->dev = dev;
2662 ibdev->bond_next_port = 0;
2663
2664 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
2665 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
2666 ibdev->num_ports = num_ports;
2667 ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
2668 1 : ibdev->num_ports;
2669 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
2670 ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
2671
2672 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
2673
2674 if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
2675 ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
2676 IB_LINK_LAYER_ETHERNET) ||
2677 (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
2678 IB_LINK_LAYER_ETHERNET)))
2679 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
2680
2681 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2682 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2683 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
2684
2685 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2686 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
2687 }
2688
2689 if (check_flow_steering_support(dev)) {
2690 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2691 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
2692 }
2693
2694 if (!dev->caps.userspace_caps)
2695 ibdev->ib_dev.ops.uverbs_abi_ver =
2696 MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2697
2698 mlx4_ib_alloc_eqs(dev, ibdev);
2699
2700 spin_lock_init(&iboe->lock);
2701
2702 if (init_node_data(ibdev))
2703 goto err_map;
2704 mlx4_init_sl2vl_tbl(ibdev);
2705
2706 for (i = 0; i < ibdev->num_ports; ++i) {
2707 mutex_init(&ibdev->counters_table[i].mutex);
2708 INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2709 iboe->last_port_state[i] = IB_PORT_DOWN;
2710 }
2711
2712 num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2713 for (i = 0; i < num_req_counters; ++i) {
2714 mutex_init(&ibdev->qp1_proxy_lock[i]);
2715 allocated = 0;
2716 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2717 IB_LINK_LAYER_ETHERNET) {
2718 err = mlx4_counter_alloc(ibdev->dev, &counter_index,
2719 MLX4_RES_USAGE_DRIVER);
2720
2721 if (err)
2722 counter_index =
2723 mlx4_get_default_counter_index(dev,
2724 i + 1);
2725 else
2726 allocated = 1;
2727 } else {
2728 counter_index = mlx4_get_default_counter_index(dev,
2729 i + 1);
2730 }
2731 new_counter_index = kmalloc(sizeof(*new_counter_index),
2732 GFP_KERNEL);
2733 if (!new_counter_index) {
2734 if (allocated)
2735 mlx4_counter_free(ibdev->dev, counter_index);
2736 goto err_counter;
2737 }
2738 new_counter_index->index = counter_index;
2739 new_counter_index->allocated = allocated;
2740 list_add_tail(&new_counter_index->list,
2741 &ibdev->counters_table[i].counters_list);
2742 ibdev->counters_table[i].default_counter = counter_index;
2743 pr_info("counter index %d for port %d allocated %d\n",
2744 counter_index, i + 1, allocated);
2745 }
2746 if (mlx4_is_bonded(dev))
2747 for (i = 1; i < ibdev->num_ports ; ++i) {
2748 new_counter_index =
2749 kmalloc(sizeof(struct counter_index),
2750 GFP_KERNEL);
2751 if (!new_counter_index)
2752 goto err_counter;
2753 new_counter_index->index = counter_index;
2754 new_counter_index->allocated = 0;
2755 list_add_tail(&new_counter_index->list,
2756 &ibdev->counters_table[i].counters_list);
2757 ibdev->counters_table[i].default_counter =
2758 counter_index;
2759 }
2760
2761 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2762 ib_num_ports++;
2763
2764 spin_lock_init(&ibdev->sm_lock);
2765 mutex_init(&ibdev->cap_mask_mutex);
2766 INIT_LIST_HEAD(&ibdev->qp_list);
2767 spin_lock_init(&ibdev->reset_flow_resource_lock);
2768
2769 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2770 ib_num_ports) {
2771 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2772 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2773 MLX4_IB_UC_STEER_QPN_ALIGN,
2774 &ibdev->steer_qpn_base, 0,
2775 MLX4_RES_USAGE_DRIVER);
2776 if (err)
2777 goto err_counter;
2778
2779 ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
2780 GFP_KERNEL);
2781 if (!ibdev->ib_uc_qpns_bitmap)
2782 goto err_steer_qp_release;
2783
2784 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
2785 bitmap_zero(ibdev->ib_uc_qpns_bitmap,
2786 ibdev->steer_qpn_count);
2787 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2788 dev, ibdev->steer_qpn_base,
2789 ibdev->steer_qpn_base +
2790 ibdev->steer_qpn_count - 1);
2791 if (err)
2792 goto err_steer_free_bitmap;
2793 } else {
2794 bitmap_fill(ibdev->ib_uc_qpns_bitmap,
2795 ibdev->steer_qpn_count);
2796 }
2797 }
2798
2799 for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2800 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2801
2802 if (mlx4_ib_alloc_diag_counters(ibdev))
2803 goto err_steer_free_bitmap;
2804
2805 if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
2806 &dev->persist->pdev->dev))
2807 goto err_diag_counters;
2808
2809 if (mlx4_ib_mad_init(ibdev))
2810 goto err_reg;
2811
2812 if (mlx4_ib_init_sriov(ibdev))
2813 goto err_mad;
2814
2815 if (!iboe->nb.notifier_call) {
2816 iboe->nb.notifier_call = mlx4_ib_netdev_event;
2817 err = register_netdevice_notifier(&iboe->nb);
2818 if (err) {
2819 iboe->nb.notifier_call = NULL;
2820 goto err_notif;
2821 }
2822 }
2823 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2824 err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2825 if (err)
2826 goto err_notif;
2827 }
2828
2829 ibdev->ib_active = true;
2830 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2831 devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
2832 &ibdev->ib_dev);
2833
2834 if (mlx4_is_mfunc(ibdev->dev))
2835 init_pkeys(ibdev);
2836
2837
2838 if (mlx4_is_master(ibdev->dev)) {
2839 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2840 if (j == mlx4_master_func_num(ibdev->dev))
2841 continue;
2842 if (mlx4_is_slave_active(ibdev->dev, j))
2843 do_slave_init(ibdev, j, 1);
2844 }
2845 }
2846 return ibdev;
2847
2848 err_notif:
2849 if (ibdev->iboe.nb.notifier_call) {
2850 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2851 pr_warn("failure unregistering notifier\n");
2852 ibdev->iboe.nb.notifier_call = NULL;
2853 }
2854 flush_workqueue(wq);
2855
2856 mlx4_ib_close_sriov(ibdev);
2857
2858 err_mad:
2859 mlx4_ib_mad_cleanup(ibdev);
2860
2861 err_reg:
2862 ib_unregister_device(&ibdev->ib_dev);
2863
2864 err_diag_counters:
2865 mlx4_ib_diag_cleanup(ibdev);
2866
2867 err_steer_free_bitmap:
2868 bitmap_free(ibdev->ib_uc_qpns_bitmap);
2869
2870 err_steer_qp_release:
2871 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2872 ibdev->steer_qpn_count);
2873 err_counter:
2874 for (i = 0; i < ibdev->num_ports; ++i)
2875 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2876
2877 err_map:
2878 mlx4_ib_free_eqs(dev, ibdev);
2879 iounmap(ibdev->uar_map);
2880
2881 err_uar:
2882 mlx4_uar_free(dev, &ibdev->priv_uar);
2883
2884 err_pd:
2885 mlx4_pd_free(dev, ibdev->priv_pdn);
2886
2887 err_dealloc:
2888 ib_dealloc_device(&ibdev->ib_dev);
2889
2890 return NULL;
2891 }
2892
2893 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
2894 {
2895 int offset;
2896
2897 WARN_ON(!dev->ib_uc_qpns_bitmap);
2898
2899 offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
2900 dev->steer_qpn_count,
2901 get_count_order(count));
2902 if (offset < 0)
2903 return offset;
2904
2905 *qpn = dev->steer_qpn_base + offset;
2906 return 0;
2907 }
2908
2909 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
2910 {
2911 if (!qpn ||
2912 dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
2913 return;
2914
2915 if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
2916 qpn, dev->steer_qpn_base))
2917
2918 return;
2919
2920 bitmap_release_region(dev->ib_uc_qpns_bitmap,
2921 qpn - dev->steer_qpn_base,
2922 get_count_order(count));
2923 }
2924
2925 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
2926 int is_attach)
2927 {
2928 int err;
2929 size_t flow_size;
2930 struct ib_flow_attr *flow = NULL;
2931 struct ib_flow_spec_ib *ib_spec;
2932
2933 if (is_attach) {
2934 flow_size = sizeof(struct ib_flow_attr) +
2935 sizeof(struct ib_flow_spec_ib);
2936 flow = kzalloc(flow_size, GFP_KERNEL);
2937 if (!flow)
2938 return -ENOMEM;
2939 flow->port = mqp->port;
2940 flow->num_of_specs = 1;
2941 flow->size = flow_size;
2942 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2943 ib_spec->type = IB_FLOW_SPEC_IB;
2944 ib_spec->size = sizeof(struct ib_flow_spec_ib);
2945
2946 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
2947
2948 err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
2949 MLX4_FS_REGULAR, &mqp->reg_id);
2950 } else {
2951 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
2952 }
2953 kfree(flow);
2954 return err;
2955 }
2956
2957 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2958 {
2959 struct mlx4_ib_dev *ibdev = ibdev_ptr;
2960 int p;
2961 int i;
2962
2963 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2964 devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
2965 ibdev->ib_active = false;
2966 flush_workqueue(wq);
2967
2968 if (ibdev->iboe.nb.notifier_call) {
2969 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2970 pr_warn("failure unregistering notifier\n");
2971 ibdev->iboe.nb.notifier_call = NULL;
2972 }
2973
2974 mlx4_ib_close_sriov(ibdev);
2975 mlx4_ib_mad_cleanup(ibdev);
2976 ib_unregister_device(&ibdev->ib_dev);
2977 mlx4_ib_diag_cleanup(ibdev);
2978
2979 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2980 ibdev->steer_qpn_count);
2981 bitmap_free(ibdev->ib_uc_qpns_bitmap);
2982
2983 iounmap(ibdev->uar_map);
2984 for (p = 0; p < ibdev->num_ports; ++p)
2985 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
2986
2987 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
2988 mlx4_CLOSE_PORT(dev, p);
2989
2990 mlx4_ib_free_eqs(dev, ibdev);
2991
2992 mlx4_uar_free(dev, &ibdev->priv_uar);
2993 mlx4_pd_free(dev, ibdev->priv_pdn);
2994 ib_dealloc_device(&ibdev->ib_dev);
2995 }
2996
2997 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
2998 {
2999 struct mlx4_ib_demux_work **dm = NULL;
3000 struct mlx4_dev *dev = ibdev->dev;
3001 int i;
3002 unsigned long flags;
3003 struct mlx4_active_ports actv_ports;
3004 unsigned int ports;
3005 unsigned int first_port;
3006
3007 if (!mlx4_is_master(dev))
3008 return;
3009
3010 actv_ports = mlx4_get_active_ports(dev, slave);
3011 ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
3012 first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
3013
3014 dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
3015 if (!dm)
3016 return;
3017
3018 for (i = 0; i < ports; i++) {
3019 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3020 if (!dm[i]) {
3021 while (--i >= 0)
3022 kfree(dm[i]);
3023 goto out;
3024 }
3025 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3026 dm[i]->port = first_port + i + 1;
3027 dm[i]->slave = slave;
3028 dm[i]->do_init = do_init;
3029 dm[i]->dev = ibdev;
3030 }
3031
3032 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3033 if (!ibdev->sriov.is_going_down) {
3034 for (i = 0; i < ports; i++)
3035 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3036 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3037 } else {
3038 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3039 for (i = 0; i < ports; i++)
3040 kfree(dm[i]);
3041 }
3042 out:
3043 kfree(dm);
3044 return;
3045 }
3046
3047 static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3048 {
3049 struct mlx4_ib_qp *mqp;
3050 unsigned long flags_qp;
3051 unsigned long flags_cq;
3052 struct mlx4_ib_cq *send_mcq, *recv_mcq;
3053 struct list_head cq_notify_list;
3054 struct mlx4_cq *mcq;
3055 unsigned long flags;
3056
3057 pr_warn("mlx4_ib_handle_catas_error was started\n");
3058 INIT_LIST_HEAD(&cq_notify_list);
3059
3060
3061 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3062
3063 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3064 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3065 if (mqp->sq.tail != mqp->sq.head) {
3066 send_mcq = to_mcq(mqp->ibqp.send_cq);
3067 spin_lock_irqsave(&send_mcq->lock, flags_cq);
3068 if (send_mcq->mcq.comp &&
3069 mqp->ibqp.send_cq->comp_handler) {
3070 if (!send_mcq->mcq.reset_notify_added) {
3071 send_mcq->mcq.reset_notify_added = 1;
3072 list_add_tail(&send_mcq->mcq.reset_notify,
3073 &cq_notify_list);
3074 }
3075 }
3076 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3077 }
3078 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3079
3080 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3081
3082 if (!mqp->ibqp.srq) {
3083 if (mqp->rq.tail != mqp->rq.head) {
3084 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3085 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3086 if (recv_mcq->mcq.comp &&
3087 mqp->ibqp.recv_cq->comp_handler) {
3088 if (!recv_mcq->mcq.reset_notify_added) {
3089 recv_mcq->mcq.reset_notify_added = 1;
3090 list_add_tail(&recv_mcq->mcq.reset_notify,
3091 &cq_notify_list);
3092 }
3093 }
3094 spin_unlock_irqrestore(&recv_mcq->lock,
3095 flags_cq);
3096 }
3097 }
3098 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3099 }
3100
3101 list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3102 mcq->comp(mcq);
3103 }
3104 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3105 pr_warn("mlx4_ib_handle_catas_error ended\n");
3106 }
3107
3108 static void handle_bonded_port_state_event(struct work_struct *work)
3109 {
3110 struct ib_event_work *ew =
3111 container_of(work, struct ib_event_work, work);
3112 struct mlx4_ib_dev *ibdev = ew->ib_dev;
3113 enum ib_port_state bonded_port_state = IB_PORT_NOP;
3114 int i;
3115 struct ib_event ibev;
3116
3117 kfree(ew);
3118 spin_lock_bh(&ibdev->iboe.lock);
3119 for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3120 struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3121 enum ib_port_state curr_port_state;
3122
3123 if (!curr_netdev)
3124 continue;
3125
3126 curr_port_state =
3127 (netif_running(curr_netdev) &&
3128 netif_carrier_ok(curr_netdev)) ?
3129 IB_PORT_ACTIVE : IB_PORT_DOWN;
3130
3131 bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3132 curr_port_state : IB_PORT_ACTIVE;
3133 }
3134 spin_unlock_bh(&ibdev->iboe.lock);
3135
3136 ibev.device = &ibdev->ib_dev;
3137 ibev.element.port_num = 1;
3138 ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3139 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3140
3141 ib_dispatch_event(&ibev);
3142 }
3143
3144 void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3145 {
3146 u64 sl2vl;
3147 int err;
3148
3149 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3150 if (err) {
3151 pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n",
3152 port, err);
3153 sl2vl = 0;
3154 }
3155 atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3156 }
3157
3158 static void ib_sl2vl_update_work(struct work_struct *work)
3159 {
3160 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3161 struct mlx4_ib_dev *mdev = ew->ib_dev;
3162 int port = ew->port;
3163
3164 mlx4_ib_sl2vl_update(mdev, port);
3165
3166 kfree(ew);
3167 }
3168
3169 void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3170 int port)
3171 {
3172 struct ib_event_work *ew;
3173
3174 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3175 if (ew) {
3176 INIT_WORK(&ew->work, ib_sl2vl_update_work);
3177 ew->port = port;
3178 ew->ib_dev = ibdev;
3179 queue_work(wq, &ew->work);
3180 }
3181 }
3182
3183 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
3184 enum mlx4_dev_event event, unsigned long param)
3185 {
3186 struct ib_event ibev;
3187 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
3188 struct mlx4_eqe *eqe = NULL;
3189 struct ib_event_work *ew;
3190 int p = 0;
3191
3192 if (mlx4_is_bonded(dev) &&
3193 ((event == MLX4_DEV_EVENT_PORT_UP) ||
3194 (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3195 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3196 if (!ew)
3197 return;
3198 INIT_WORK(&ew->work, handle_bonded_port_state_event);
3199 ew->ib_dev = ibdev;
3200 queue_work(wq, &ew->work);
3201 return;
3202 }
3203
3204 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
3205 eqe = (struct mlx4_eqe *)param;
3206 else
3207 p = (int) param;
3208
3209 switch (event) {
3210 case MLX4_DEV_EVENT_PORT_UP:
3211 if (p > ibdev->num_ports)
3212 return;
3213 if (!mlx4_is_slave(dev) &&
3214 rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3215 IB_LINK_LAYER_INFINIBAND) {
3216 if (mlx4_is_master(dev))
3217 mlx4_ib_invalidate_all_guid_record(ibdev, p);
3218 if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3219 !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3220 mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3221 }
3222 ibev.event = IB_EVENT_PORT_ACTIVE;
3223 break;
3224
3225 case MLX4_DEV_EVENT_PORT_DOWN:
3226 if (p > ibdev->num_ports)
3227 return;
3228 ibev.event = IB_EVENT_PORT_ERR;
3229 break;
3230
3231 case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3232 ibdev->ib_active = false;
3233 ibev.event = IB_EVENT_DEVICE_FATAL;
3234 mlx4_ib_handle_catas_error(ibdev);
3235 break;
3236
3237 case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3238 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3239 if (!ew)
3240 return;
3241
3242 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3243 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3244 ew->ib_dev = ibdev;
3245
3246 if (mlx4_is_master(dev))
3247 queue_work(wq, &ew->work);
3248 else
3249 handle_port_mgmt_change_event(&ew->work);
3250 return;
3251
3252 case MLX4_DEV_EVENT_SLAVE_INIT:
3253
3254 do_slave_init(ibdev, p, 1);
3255 if (mlx4_is_master(dev)) {
3256 int i;
3257
3258 for (i = 1; i <= ibdev->num_ports; i++) {
3259 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3260 == IB_LINK_LAYER_INFINIBAND)
3261 mlx4_ib_slave_alias_guid_event(ibdev,
3262 p, i,
3263 1);
3264 }
3265 }
3266 return;
3267
3268 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3269 if (mlx4_is_master(dev)) {
3270 int i;
3271
3272 for (i = 1; i <= ibdev->num_ports; i++) {
3273 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3274 == IB_LINK_LAYER_INFINIBAND)
3275 mlx4_ib_slave_alias_guid_event(ibdev,
3276 p, i,
3277 0);
3278 }
3279 }
3280
3281 do_slave_init(ibdev, p, 0);
3282 return;
3283
3284 default:
3285 return;
3286 }
3287
3288 ibev.device = ibdev_ptr;
3289 ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3290
3291 ib_dispatch_event(&ibev);
3292 }
3293
3294 static struct mlx4_interface mlx4_ib_interface = {
3295 .add = mlx4_ib_add,
3296 .remove = mlx4_ib_remove,
3297 .event = mlx4_ib_event,
3298 .protocol = MLX4_PROT_IB_IPV6,
3299 .flags = MLX4_INTFF_BONDING
3300 };
3301
3302 static int __init mlx4_ib_init(void)
3303 {
3304 int err;
3305
3306 wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3307 if (!wq)
3308 return -ENOMEM;
3309
3310 err = mlx4_ib_cm_init();
3311 if (err)
3312 goto clean_wq;
3313
3314 err = mlx4_ib_mcg_init();
3315 if (err)
3316 goto clean_cm;
3317
3318 err = mlx4_register_interface(&mlx4_ib_interface);
3319 if (err)
3320 goto clean_mcg;
3321
3322 return 0;
3323
3324 clean_mcg:
3325 mlx4_ib_mcg_destroy();
3326
3327 clean_cm:
3328 mlx4_ib_cm_destroy();
3329
3330 clean_wq:
3331 destroy_workqueue(wq);
3332 return err;
3333 }
3334
3335 static void __exit mlx4_ib_cleanup(void)
3336 {
3337 mlx4_unregister_interface(&mlx4_ib_interface);
3338 mlx4_ib_mcg_destroy();
3339 mlx4_ib_cm_destroy();
3340 destroy_workqueue(wq);
3341 }
3342
3343 module_init(mlx4_ib_init);
3344 module_exit(mlx4_ib_cleanup);