0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/netdevice.h>
0034 #include <net/bonding.h>
0035 #include <linux/mlx5/driver.h>
0036 #include <linux/mlx5/eswitch.h>
0037 #include <linux/mlx5/vport.h>
0038 #include "lib/devcom.h"
0039 #include "mlx5_core.h"
0040 #include "eswitch.h"
0041 #include "esw/acl/ofld.h"
0042 #include "lag.h"
0043 #include "mp.h"
0044 #include "mpesw.h"
0045
0046 enum {
0047 MLX5_LAG_EGRESS_PORT_1 = 1,
0048 MLX5_LAG_EGRESS_PORT_2,
0049 };
0050
0051
0052
0053
0054
0055 static DEFINE_SPINLOCK(lag_lock);
0056
0057 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
0058 {
0059 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0060 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
0061
0062 if (mode == MLX5_LAG_MODE_MPESW)
0063 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
0064
0065 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
0066 }
0067
0068 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
0069 unsigned long flags)
0070 {
0071 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
0072 &flags);
0073 int port_sel_mode = get_port_sel_mode(mode, flags);
0074 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
0075 void *lag_ctx;
0076
0077 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
0078 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
0079 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
0080 if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
0081 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
0082 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
0083 }
0084 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
0085
0086 return mlx5_cmd_exec_in(dev, create_lag, in);
0087 }
0088
0089 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
0090 u8 *ports)
0091 {
0092 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
0093 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
0094
0095 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
0096 MLX5_SET(modify_lag_in, in, field_select, 0x1);
0097
0098 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
0099 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
0100
0101 return mlx5_cmd_exec_in(dev, modify_lag, in);
0102 }
0103
0104 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
0105 {
0106 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
0107
0108 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
0109
0110 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
0111 }
0112 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
0113
0114 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
0115 {
0116 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
0117
0118 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
0119
0120 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
0121 }
0122 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
0123
0124 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
0125 u8 *ports, int *num_disabled)
0126 {
0127 int i;
0128
0129 *num_disabled = 0;
0130 for (i = 0; i < num_ports; i++) {
0131 if (!tracker->netdev_state[i].tx_enabled ||
0132 !tracker->netdev_state[i].link_up)
0133 ports[(*num_disabled)++] = i;
0134 }
0135 }
0136
0137 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
0138 u8 *ports, int *num_enabled)
0139 {
0140 int i;
0141
0142 *num_enabled = 0;
0143 for (i = 0; i < num_ports; i++) {
0144 if (tracker->netdev_state[i].tx_enabled &&
0145 tracker->netdev_state[i].link_up)
0146 ports[(*num_enabled)++] = i;
0147 }
0148
0149 if (*num_enabled == 0)
0150 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
0151 }
0152
0153 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
0154 struct mlx5_lag *ldev,
0155 struct lag_tracker *tracker,
0156 unsigned long flags)
0157 {
0158 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
0159 u8 enabled_ports[MLX5_MAX_PORTS] = {};
0160 int written = 0;
0161 int num_enabled;
0162 int idx;
0163 int err;
0164 int i;
0165 int j;
0166
0167 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
0168 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
0169 &num_enabled);
0170 for (i = 0; i < num_enabled; i++) {
0171 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
0172 if (err != 3)
0173 return;
0174 written += err;
0175 }
0176 buf[written - 2] = 0;
0177 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
0178 } else {
0179 for (i = 0; i < ldev->ports; i++) {
0180 for (j = 0; j < ldev->buckets; j++) {
0181 idx = i * ldev->buckets + j;
0182 err = scnprintf(buf + written, 10,
0183 " port %d:%d", i + 1, ldev->v2p_map[idx]);
0184 if (err != 9)
0185 return;
0186 written += err;
0187 }
0188 }
0189 mlx5_core_info(dev, "lag map:%s\n", buf);
0190 }
0191 }
0192
0193 static int mlx5_lag_netdev_event(struct notifier_block *this,
0194 unsigned long event, void *ptr);
0195 static void mlx5_do_bond_work(struct work_struct *work);
0196
0197 static void mlx5_ldev_free(struct kref *ref)
0198 {
0199 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
0200
0201 if (ldev->nb.notifier_call)
0202 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
0203 mlx5_lag_mp_cleanup(ldev);
0204 mlx5_lag_mpesw_cleanup(ldev);
0205 cancel_work_sync(&ldev->mpesw_work);
0206 destroy_workqueue(ldev->wq);
0207 mutex_destroy(&ldev->lock);
0208 kfree(ldev);
0209 }
0210
0211 static void mlx5_ldev_put(struct mlx5_lag *ldev)
0212 {
0213 kref_put(&ldev->ref, mlx5_ldev_free);
0214 }
0215
0216 static void mlx5_ldev_get(struct mlx5_lag *ldev)
0217 {
0218 kref_get(&ldev->ref);
0219 }
0220
0221 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
0222 {
0223 struct mlx5_lag *ldev;
0224 int err;
0225
0226 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
0227 if (!ldev)
0228 return NULL;
0229
0230 ldev->wq = create_singlethread_workqueue("mlx5_lag");
0231 if (!ldev->wq) {
0232 kfree(ldev);
0233 return NULL;
0234 }
0235
0236 kref_init(&ldev->ref);
0237 mutex_init(&ldev->lock);
0238 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
0239
0240 ldev->nb.notifier_call = mlx5_lag_netdev_event;
0241 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
0242 ldev->nb.notifier_call = NULL;
0243 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
0244 }
0245 ldev->mode = MLX5_LAG_MODE_NONE;
0246
0247 err = mlx5_lag_mp_init(ldev);
0248 if (err)
0249 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
0250 err);
0251
0252 mlx5_lag_mpesw_init(ldev);
0253 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
0254 ldev->buckets = 1;
0255
0256 return ldev;
0257 }
0258
0259 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
0260 struct net_device *ndev)
0261 {
0262 int i;
0263
0264 for (i = 0; i < ldev->ports; i++)
0265 if (ldev->pf[i].netdev == ndev)
0266 return i;
0267
0268 return -ENOENT;
0269 }
0270
0271 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
0272 {
0273 return ldev->mode == MLX5_LAG_MODE_ROCE;
0274 }
0275
0276 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
0277 {
0278 return ldev->mode == MLX5_LAG_MODE_SRIOV;
0279 }
0280
0281
0282
0283
0284
0285
0286
0287 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
0288 u8 num_ports,
0289 u8 buckets,
0290 u8 *ports)
0291 {
0292 int disabled[MLX5_MAX_PORTS] = {};
0293 int enabled[MLX5_MAX_PORTS] = {};
0294 int disabled_ports_num = 0;
0295 int enabled_ports_num = 0;
0296 int idx;
0297 u32 rand;
0298 int i;
0299 int j;
0300
0301 for (i = 0; i < num_ports; i++) {
0302 if (tracker->netdev_state[i].tx_enabled &&
0303 tracker->netdev_state[i].link_up)
0304 enabled[enabled_ports_num++] = i;
0305 else
0306 disabled[disabled_ports_num++] = i;
0307 }
0308
0309
0310
0311
0312 for (i = 0; i < num_ports; i++)
0313 for (j = 0; j < buckets; j++) {
0314 idx = i * buckets + j;
0315 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
0316 }
0317
0318
0319 if (enabled_ports_num == num_ports ||
0320 disabled_ports_num == num_ports)
0321 return;
0322
0323
0324 for (i = 0; i < disabled_ports_num; i++) {
0325 for (j = 0; j < buckets; j++) {
0326 get_random_bytes(&rand, 4);
0327 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
0328 }
0329 }
0330 }
0331
0332 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
0333 {
0334 int i;
0335
0336 for (i = 0; i < ldev->ports; i++)
0337 if (ldev->pf[i].has_drop)
0338 return true;
0339 return false;
0340 }
0341
0342 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
0343 {
0344 int i;
0345
0346 for (i = 0; i < ldev->ports; i++) {
0347 if (!ldev->pf[i].has_drop)
0348 continue;
0349
0350 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
0351 MLX5_VPORT_UPLINK);
0352 ldev->pf[i].has_drop = false;
0353 }
0354 }
0355
0356 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
0357 struct lag_tracker *tracker)
0358 {
0359 u8 disabled_ports[MLX5_MAX_PORTS] = {};
0360 struct mlx5_core_dev *dev;
0361 int disabled_index;
0362 int num_disabled;
0363 int err;
0364 int i;
0365
0366
0367
0368
0369 mlx5_lag_drop_rule_cleanup(ldev);
0370
0371 if (!ldev->tracker.has_inactive)
0372 return;
0373
0374 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
0375
0376 for (i = 0; i < num_disabled; i++) {
0377 disabled_index = disabled_ports[i];
0378 dev = ldev->pf[disabled_index].dev;
0379 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
0380 MLX5_VPORT_UPLINK);
0381 if (!err)
0382 ldev->pf[disabled_index].has_drop = true;
0383 else
0384 mlx5_core_err(dev,
0385 "Failed to create lag drop rule, error: %d", err);
0386 }
0387 }
0388
0389 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
0390 {
0391 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0392
0393 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
0394 return mlx5_lag_port_sel_modify(ldev, ports);
0395 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
0396 }
0397
0398 void mlx5_modify_lag(struct mlx5_lag *ldev,
0399 struct lag_tracker *tracker)
0400 {
0401 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
0402 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0403 int idx;
0404 int err;
0405 int i;
0406 int j;
0407
0408 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
0409
0410 for (i = 0; i < ldev->ports; i++) {
0411 for (j = 0; j < ldev->buckets; j++) {
0412 idx = i * ldev->buckets + j;
0413 if (ports[idx] == ldev->v2p_map[idx])
0414 continue;
0415 err = _mlx5_modify_lag(ldev, ports);
0416 if (err) {
0417 mlx5_core_err(dev0,
0418 "Failed to modify LAG (%d)\n",
0419 err);
0420 return;
0421 }
0422 memcpy(ldev->v2p_map, ports, sizeof(ports));
0423
0424 mlx5_lag_print_mapping(dev0, ldev, tracker,
0425 ldev->mode_flags);
0426 break;
0427 }
0428 }
0429
0430 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
0431 !(ldev->mode == MLX5_LAG_MODE_ROCE))
0432 mlx5_lag_drop_rule_setup(ldev, tracker);
0433 }
0434
0435 #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
0436 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
0437 unsigned long *flags)
0438 {
0439 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
0440
0441 if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
0442
0443 if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
0444 return -EINVAL;
0445 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
0446 if (ldev->ports > 2)
0447 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
0448 }
0449
0450 return 0;
0451 }
0452
0453 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
0454 struct lag_tracker *tracker,
0455 enum mlx5_lag_mode mode,
0456 unsigned long *flags)
0457 {
0458 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
0459
0460 if (mode == MLX5_LAG_MODE_MPESW)
0461 return;
0462
0463 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
0464 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
0465 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
0466 }
0467
0468 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
0469 struct lag_tracker *tracker, bool shared_fdb,
0470 unsigned long *flags)
0471 {
0472 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
0473
0474 *flags = 0;
0475 if (shared_fdb) {
0476 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
0477 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
0478 }
0479
0480 if (mode == MLX5_LAG_MODE_MPESW)
0481 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
0482
0483 if (roce_lag)
0484 return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
0485
0486 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
0487 return 0;
0488 }
0489
0490 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
0491 {
0492 int port_sel_mode = get_port_sel_mode(mode, flags);
0493
0494 switch (port_sel_mode) {
0495 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
0496 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
0497 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
0498 default: return "invalid";
0499 }
0500 }
0501
0502 static int mlx5_create_lag(struct mlx5_lag *ldev,
0503 struct lag_tracker *tracker,
0504 enum mlx5_lag_mode mode,
0505 unsigned long flags)
0506 {
0507 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
0508 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0509 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0510 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
0511 int err;
0512
0513 if (tracker)
0514 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
0515 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
0516 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
0517
0518 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
0519 if (err) {
0520 mlx5_core_err(dev0,
0521 "Failed to create LAG (%d)\n",
0522 err);
0523 return err;
0524 }
0525
0526 if (shared_fdb) {
0527 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
0528 dev1->priv.eswitch);
0529 if (err)
0530 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
0531 else
0532 mlx5_core_info(dev0, "Operation mode is single FDB\n");
0533 }
0534
0535 if (err) {
0536 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
0537 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
0538 mlx5_core_err(dev0,
0539 "Failed to deactivate RoCE LAG; driver restart required\n");
0540 }
0541
0542 return err;
0543 }
0544
0545 int mlx5_activate_lag(struct mlx5_lag *ldev,
0546 struct lag_tracker *tracker,
0547 enum mlx5_lag_mode mode,
0548 bool shared_fdb)
0549 {
0550 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
0551 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0552 unsigned long flags = 0;
0553 int err;
0554
0555 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
0556 if (err)
0557 return err;
0558
0559 if (mode != MLX5_LAG_MODE_MPESW) {
0560 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
0561 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
0562 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
0563 ldev->v2p_map);
0564 if (err) {
0565 mlx5_core_err(dev0,
0566 "Failed to create LAG port selection(%d)\n",
0567 err);
0568 return err;
0569 }
0570 }
0571 }
0572
0573 err = mlx5_create_lag(ldev, tracker, mode, flags);
0574 if (err) {
0575 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0576 mlx5_lag_port_sel_destroy(ldev);
0577 if (roce_lag)
0578 mlx5_core_err(dev0,
0579 "Failed to activate RoCE LAG\n");
0580 else
0581 mlx5_core_err(dev0,
0582 "Failed to activate VF LAG\n"
0583 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
0584 return err;
0585 }
0586
0587 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
0588 !roce_lag)
0589 mlx5_lag_drop_rule_setup(ldev, tracker);
0590
0591 ldev->mode = mode;
0592 ldev->mode_flags = flags;
0593 return 0;
0594 }
0595
0596 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
0597 {
0598 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0599 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0600 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
0601 bool roce_lag = __mlx5_lag_is_roce(ldev);
0602 unsigned long flags = ldev->mode_flags;
0603 int err;
0604
0605 ldev->mode = MLX5_LAG_MODE_NONE;
0606 ldev->mode_flags = 0;
0607 mlx5_lag_mp_reset(ldev);
0608
0609 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
0610 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
0611 dev1->priv.eswitch);
0612 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
0613 }
0614
0615 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
0616 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
0617 if (err) {
0618 if (roce_lag) {
0619 mlx5_core_err(dev0,
0620 "Failed to deactivate RoCE LAG; driver restart required\n");
0621 } else {
0622 mlx5_core_err(dev0,
0623 "Failed to deactivate VF LAG; driver restart required\n"
0624 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
0625 }
0626 return err;
0627 }
0628
0629 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0630 mlx5_lag_port_sel_destroy(ldev);
0631 if (mlx5_lag_has_drop_rule(ldev))
0632 mlx5_lag_drop_rule_cleanup(ldev);
0633
0634 return 0;
0635 }
0636
0637 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
0638 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
0639 {
0640 #ifdef CONFIG_MLX5_ESWITCH
0641 struct mlx5_core_dev *dev;
0642 u8 mode;
0643 #endif
0644 int i;
0645
0646 for (i = 0; i < ldev->ports; i++)
0647 if (!ldev->pf[i].dev)
0648 return false;
0649
0650 #ifdef CONFIG_MLX5_ESWITCH
0651 dev = ldev->pf[MLX5_LAG_P1].dev;
0652 if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
0653 return false;
0654
0655 mode = mlx5_eswitch_mode(dev);
0656 for (i = 0; i < ldev->ports; i++)
0657 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
0658 return false;
0659
0660 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
0661 return false;
0662 #else
0663 for (i = 0; i < ldev->ports; i++)
0664 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
0665 return false;
0666 #endif
0667 return true;
0668 }
0669
0670 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
0671 {
0672 int i;
0673
0674 for (i = 0; i < ldev->ports; i++) {
0675 if (!ldev->pf[i].dev)
0676 continue;
0677
0678 if (ldev->pf[i].dev->priv.flags &
0679 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
0680 continue;
0681
0682 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0683 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
0684 }
0685 }
0686
0687 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
0688 {
0689 int i;
0690
0691 for (i = 0; i < ldev->ports; i++) {
0692 if (!ldev->pf[i].dev)
0693 continue;
0694
0695 if (ldev->pf[i].dev->priv.flags &
0696 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
0697 continue;
0698
0699 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0700 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
0701 }
0702 }
0703
0704 void mlx5_disable_lag(struct mlx5_lag *ldev)
0705 {
0706 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
0707 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0708 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0709 bool roce_lag;
0710 int err;
0711 int i;
0712
0713 roce_lag = __mlx5_lag_is_roce(ldev);
0714
0715 if (shared_fdb) {
0716 mlx5_lag_remove_devices(ldev);
0717 } else if (roce_lag) {
0718 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
0719 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0720 mlx5_rescan_drivers_locked(dev0);
0721 }
0722 for (i = 1; i < ldev->ports; i++)
0723 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
0724 }
0725
0726 err = mlx5_deactivate_lag(ldev);
0727 if (err)
0728 return;
0729
0730 if (shared_fdb || roce_lag)
0731 mlx5_lag_add_devices(ldev);
0732
0733 if (shared_fdb) {
0734 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
0735 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0736 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
0737 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0738 }
0739 }
0740
0741 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
0742 {
0743 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0744 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0745
0746 if (is_mdev_switchdev_mode(dev0) &&
0747 is_mdev_switchdev_mode(dev1) &&
0748 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
0749 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
0750 mlx5_devcom_is_paired(dev0->priv.devcom,
0751 MLX5_DEVCOM_ESW_OFFLOADS) &&
0752 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
0753 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
0754 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
0755 return true;
0756
0757 return false;
0758 }
0759
0760 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
0761 {
0762 bool roce_lag = true;
0763 int i;
0764
0765 for (i = 0; i < ldev->ports; i++)
0766 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
0767
0768 #ifdef CONFIG_MLX5_ESWITCH
0769 for (i = 0; i < ldev->ports; i++)
0770 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
0771 #endif
0772
0773 return roce_lag;
0774 }
0775
0776 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
0777 {
0778 return do_bond && __mlx5_lag_is_active(ldev) &&
0779 ldev->mode != MLX5_LAG_MODE_MPESW;
0780 }
0781
0782 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
0783 {
0784 return !do_bond && __mlx5_lag_is_active(ldev) &&
0785 ldev->mode != MLX5_LAG_MODE_MPESW;
0786 }
0787
0788 static void mlx5_do_bond(struct mlx5_lag *ldev)
0789 {
0790 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0791 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0792 struct lag_tracker tracker = { };
0793 bool do_bond, roce_lag;
0794 int err;
0795 int i;
0796
0797 if (!mlx5_lag_is_ready(ldev)) {
0798 do_bond = false;
0799 } else {
0800
0801 if (mlx5_lag_is_multipath(dev0))
0802 return;
0803
0804 tracker = ldev->tracker;
0805
0806 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
0807 }
0808
0809 if (do_bond && !__mlx5_lag_is_active(ldev)) {
0810 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
0811
0812 roce_lag = mlx5_lag_is_roce_lag(ldev);
0813
0814 if (shared_fdb || roce_lag)
0815 mlx5_lag_remove_devices(ldev);
0816
0817 err = mlx5_activate_lag(ldev, &tracker,
0818 roce_lag ? MLX5_LAG_MODE_ROCE :
0819 MLX5_LAG_MODE_SRIOV,
0820 shared_fdb);
0821 if (err) {
0822 if (shared_fdb || roce_lag)
0823 mlx5_lag_add_devices(ldev);
0824
0825 return;
0826 } else if (roce_lag) {
0827 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0828 mlx5_rescan_drivers_locked(dev0);
0829 for (i = 1; i < ldev->ports; i++)
0830 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
0831 } else if (shared_fdb) {
0832 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0833 mlx5_rescan_drivers_locked(dev0);
0834
0835 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0836 if (!err)
0837 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0838
0839 if (err) {
0840 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0841 mlx5_rescan_drivers_locked(dev0);
0842 mlx5_deactivate_lag(ldev);
0843 mlx5_lag_add_devices(ldev);
0844 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0845 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0846 mlx5_core_err(dev0, "Failed to enable lag\n");
0847 return;
0848 }
0849 }
0850 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
0851 mlx5_modify_lag(ldev, &tracker);
0852 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
0853 mlx5_disable_lag(ldev);
0854 }
0855 }
0856
0857 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
0858 {
0859 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
0860 }
0861
0862 static void mlx5_do_bond_work(struct work_struct *work)
0863 {
0864 struct delayed_work *delayed_work = to_delayed_work(work);
0865 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
0866 bond_work);
0867 int status;
0868
0869 status = mlx5_dev_list_trylock();
0870 if (!status) {
0871 mlx5_queue_bond_work(ldev, HZ);
0872 return;
0873 }
0874
0875 mutex_lock(&ldev->lock);
0876 if (ldev->mode_changes_in_progress) {
0877 mutex_unlock(&ldev->lock);
0878 mlx5_dev_list_unlock();
0879 mlx5_queue_bond_work(ldev, HZ);
0880 return;
0881 }
0882
0883 mlx5_do_bond(ldev);
0884 mutex_unlock(&ldev->lock);
0885 mlx5_dev_list_unlock();
0886 }
0887
0888 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
0889 struct lag_tracker *tracker,
0890 struct netdev_notifier_changeupper_info *info)
0891 {
0892 struct net_device *upper = info->upper_dev, *ndev_tmp;
0893 struct netdev_lag_upper_info *lag_upper_info = NULL;
0894 bool is_bonded, is_in_lag, mode_supported;
0895 bool has_inactive = 0;
0896 struct slave *slave;
0897 u8 bond_status = 0;
0898 int num_slaves = 0;
0899 int changed = 0;
0900 int idx;
0901
0902 if (!netif_is_lag_master(upper))
0903 return 0;
0904
0905 if (info->linking)
0906 lag_upper_info = info->upper_info;
0907
0908
0909
0910
0911
0912
0913 rcu_read_lock();
0914 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
0915 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
0916 if (idx >= 0) {
0917 slave = bond_slave_get_rcu(ndev_tmp);
0918 if (slave)
0919 has_inactive |= bond_is_slave_inactive(slave);
0920 bond_status |= (1 << idx);
0921 }
0922
0923 num_slaves++;
0924 }
0925 rcu_read_unlock();
0926
0927
0928 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
0929 return 0;
0930
0931 if (lag_upper_info) {
0932 tracker->tx_type = lag_upper_info->tx_type;
0933 tracker->hash_type = lag_upper_info->hash_type;
0934 }
0935
0936 tracker->has_inactive = has_inactive;
0937
0938
0939
0940
0941 is_in_lag = num_slaves == ldev->ports &&
0942 bond_status == GENMASK(ldev->ports - 1, 0);
0943
0944
0945 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
0946 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
0947
0948 is_bonded = is_in_lag && mode_supported;
0949 if (tracker->is_bonded != is_bonded) {
0950 tracker->is_bonded = is_bonded;
0951 changed = 1;
0952 }
0953
0954 if (!is_in_lag)
0955 return changed;
0956
0957 if (!mlx5_lag_is_ready(ldev))
0958 NL_SET_ERR_MSG_MOD(info->info.extack,
0959 "Can't activate LAG offload, PF is configured with more than 64 VFs");
0960 else if (!mode_supported)
0961 NL_SET_ERR_MSG_MOD(info->info.extack,
0962 "Can't activate LAG offload, TX type isn't supported");
0963
0964 return changed;
0965 }
0966
0967 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
0968 struct lag_tracker *tracker,
0969 struct net_device *ndev,
0970 struct netdev_notifier_changelowerstate_info *info)
0971 {
0972 struct netdev_lag_lower_state_info *lag_lower_info;
0973 int idx;
0974
0975 if (!netif_is_lag_port(ndev))
0976 return 0;
0977
0978 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
0979 if (idx < 0)
0980 return 0;
0981
0982
0983
0984
0985 lag_lower_info = info->lower_state_info;
0986 if (!lag_lower_info)
0987 return 0;
0988
0989 tracker->netdev_state[idx] = *lag_lower_info;
0990
0991 return 1;
0992 }
0993
0994 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
0995 struct lag_tracker *tracker,
0996 struct net_device *ndev)
0997 {
0998 struct net_device *ndev_tmp;
0999 struct slave *slave;
1000 bool has_inactive = 0;
1001 int idx;
1002
1003 if (!netif_is_lag_master(ndev))
1004 return 0;
1005
1006 rcu_read_lock();
1007 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1008 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1009 if (idx < 0)
1010 continue;
1011
1012 slave = bond_slave_get_rcu(ndev_tmp);
1013 if (slave)
1014 has_inactive |= bond_is_slave_inactive(slave);
1015 }
1016 rcu_read_unlock();
1017
1018 if (tracker->has_inactive == has_inactive)
1019 return 0;
1020
1021 tracker->has_inactive = has_inactive;
1022
1023 return 1;
1024 }
1025
1026
1027 static int mlx5_lag_netdev_event(struct notifier_block *this,
1028 unsigned long event, void *ptr)
1029 {
1030 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1031 struct lag_tracker tracker;
1032 struct mlx5_lag *ldev;
1033 int changed = 0;
1034
1035 if (event != NETDEV_CHANGEUPPER &&
1036 event != NETDEV_CHANGELOWERSTATE &&
1037 event != NETDEV_CHANGEINFODATA)
1038 return NOTIFY_DONE;
1039
1040 ldev = container_of(this, struct mlx5_lag, nb);
1041
1042 tracker = ldev->tracker;
1043
1044 switch (event) {
1045 case NETDEV_CHANGEUPPER:
1046 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1047 break;
1048 case NETDEV_CHANGELOWERSTATE:
1049 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1050 ndev, ptr);
1051 break;
1052 case NETDEV_CHANGEINFODATA:
1053 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1054 break;
1055 }
1056
1057 ldev->tracker = tracker;
1058
1059 if (changed)
1060 mlx5_queue_bond_work(ldev, 0);
1061
1062 return NOTIFY_DONE;
1063 }
1064
1065 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1066 struct mlx5_core_dev *dev,
1067 struct net_device *netdev)
1068 {
1069 unsigned int fn = mlx5_get_dev_index(dev);
1070 unsigned long flags;
1071
1072 if (fn >= ldev->ports)
1073 return;
1074
1075 spin_lock_irqsave(&lag_lock, flags);
1076 ldev->pf[fn].netdev = netdev;
1077 ldev->tracker.netdev_state[fn].link_up = 0;
1078 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1079 spin_unlock_irqrestore(&lag_lock, flags);
1080 }
1081
1082 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1083 struct net_device *netdev)
1084 {
1085 unsigned long flags;
1086 int i;
1087
1088 spin_lock_irqsave(&lag_lock, flags);
1089 for (i = 0; i < ldev->ports; i++) {
1090 if (ldev->pf[i].netdev == netdev) {
1091 ldev->pf[i].netdev = NULL;
1092 break;
1093 }
1094 }
1095 spin_unlock_irqrestore(&lag_lock, flags);
1096 }
1097
1098 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1099 struct mlx5_core_dev *dev)
1100 {
1101 unsigned int fn = mlx5_get_dev_index(dev);
1102
1103 if (fn >= ldev->ports)
1104 return;
1105
1106 ldev->pf[fn].dev = dev;
1107 dev->priv.lag = ldev;
1108 }
1109
1110 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1111 struct mlx5_core_dev *dev)
1112 {
1113 int i;
1114
1115 for (i = 0; i < ldev->ports; i++)
1116 if (ldev->pf[i].dev == dev)
1117 break;
1118
1119 if (i == ldev->ports)
1120 return;
1121
1122 ldev->pf[i].dev = NULL;
1123 dev->priv.lag = NULL;
1124 }
1125
1126
1127 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1128 {
1129 struct mlx5_lag *ldev = NULL;
1130 struct mlx5_core_dev *tmp_dev;
1131
1132 tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1133 if (tmp_dev)
1134 ldev = tmp_dev->priv.lag;
1135
1136 if (!ldev) {
1137 ldev = mlx5_lag_dev_alloc(dev);
1138 if (!ldev) {
1139 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1140 return 0;
1141 }
1142 mlx5_ldev_add_mdev(ldev, dev);
1143 return 0;
1144 }
1145
1146 mutex_lock(&ldev->lock);
1147 if (ldev->mode_changes_in_progress) {
1148 mutex_unlock(&ldev->lock);
1149 return -EAGAIN;
1150 }
1151 mlx5_ldev_get(ldev);
1152 mlx5_ldev_add_mdev(ldev, dev);
1153 mutex_unlock(&ldev->lock);
1154
1155 return 0;
1156 }
1157
1158 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1159 {
1160 struct mlx5_lag *ldev;
1161
1162 ldev = mlx5_lag_dev(dev);
1163 if (!ldev)
1164 return;
1165
1166
1167
1168
1169 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1170 recheck:
1171 mutex_lock(&ldev->lock);
1172 if (ldev->mode_changes_in_progress) {
1173 mutex_unlock(&ldev->lock);
1174 msleep(100);
1175 goto recheck;
1176 }
1177 mlx5_ldev_remove_mdev(ldev, dev);
1178 mutex_unlock(&ldev->lock);
1179 mlx5_ldev_put(ldev);
1180 }
1181
1182 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1183 {
1184 int err;
1185
1186 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1187 !MLX5_CAP_GEN(dev, lag_master) ||
1188 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1189 MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1190 return;
1191
1192 recheck:
1193 mlx5_dev_list_lock();
1194 err = __mlx5_lag_dev_add_mdev(dev);
1195 mlx5_dev_list_unlock();
1196
1197 if (err) {
1198 msleep(100);
1199 goto recheck;
1200 }
1201 mlx5_ldev_add_debugfs(dev);
1202 }
1203
1204 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1205 struct net_device *netdev)
1206 {
1207 struct mlx5_lag *ldev;
1208 bool lag_is_active;
1209
1210 ldev = mlx5_lag_dev(dev);
1211 if (!ldev)
1212 return;
1213
1214 mutex_lock(&ldev->lock);
1215 mlx5_ldev_remove_netdev(ldev, netdev);
1216 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1217
1218 lag_is_active = __mlx5_lag_is_active(ldev);
1219 mutex_unlock(&ldev->lock);
1220
1221 if (lag_is_active)
1222 mlx5_queue_bond_work(ldev, 0);
1223 }
1224
1225 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1226 struct net_device *netdev)
1227 {
1228 struct mlx5_lag *ldev;
1229 int i;
1230
1231 ldev = mlx5_lag_dev(dev);
1232 if (!ldev)
1233 return;
1234
1235 mutex_lock(&ldev->lock);
1236 mlx5_ldev_add_netdev(ldev, dev, netdev);
1237
1238 for (i = 0; i < ldev->ports; i++)
1239 if (!ldev->pf[i].netdev)
1240 break;
1241
1242 if (i >= ldev->ports)
1243 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1244 mutex_unlock(&ldev->lock);
1245 mlx5_queue_bond_work(ldev, 0);
1246 }
1247
1248 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1249 {
1250 struct mlx5_lag *ldev;
1251 unsigned long flags;
1252 bool res;
1253
1254 spin_lock_irqsave(&lag_lock, flags);
1255 ldev = mlx5_lag_dev(dev);
1256 res = ldev && __mlx5_lag_is_roce(ldev);
1257 spin_unlock_irqrestore(&lag_lock, flags);
1258
1259 return res;
1260 }
1261 EXPORT_SYMBOL(mlx5_lag_is_roce);
1262
1263 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1264 {
1265 struct mlx5_lag *ldev;
1266 unsigned long flags;
1267 bool res;
1268
1269 spin_lock_irqsave(&lag_lock, flags);
1270 ldev = mlx5_lag_dev(dev);
1271 res = ldev && __mlx5_lag_is_active(ldev);
1272 spin_unlock_irqrestore(&lag_lock, flags);
1273
1274 return res;
1275 }
1276 EXPORT_SYMBOL(mlx5_lag_is_active);
1277
1278 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1279 {
1280 struct mlx5_lag *ldev;
1281 unsigned long flags;
1282 bool res;
1283
1284 spin_lock_irqsave(&lag_lock, flags);
1285 ldev = mlx5_lag_dev(dev);
1286 res = ldev && __mlx5_lag_is_active(ldev) &&
1287 dev == ldev->pf[MLX5_LAG_P1].dev;
1288 spin_unlock_irqrestore(&lag_lock, flags);
1289
1290 return res;
1291 }
1292 EXPORT_SYMBOL(mlx5_lag_is_master);
1293
1294 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1295 {
1296 struct mlx5_lag *ldev;
1297 unsigned long flags;
1298 bool res;
1299
1300 spin_lock_irqsave(&lag_lock, flags);
1301 ldev = mlx5_lag_dev(dev);
1302 res = ldev && __mlx5_lag_is_sriov(ldev);
1303 spin_unlock_irqrestore(&lag_lock, flags);
1304
1305 return res;
1306 }
1307 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1308
1309 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1310 {
1311 struct mlx5_lag *ldev;
1312 unsigned long flags;
1313 bool res;
1314
1315 spin_lock_irqsave(&lag_lock, flags);
1316 ldev = mlx5_lag_dev(dev);
1317 res = ldev && __mlx5_lag_is_sriov(ldev) &&
1318 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1319 spin_unlock_irqrestore(&lag_lock, flags);
1320
1321 return res;
1322 }
1323 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1324
1325 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1326 {
1327 struct mlx5_lag *ldev;
1328
1329 ldev = mlx5_lag_dev(dev);
1330 if (!ldev)
1331 return;
1332
1333 mlx5_dev_list_lock();
1334 mutex_lock(&ldev->lock);
1335
1336 ldev->mode_changes_in_progress++;
1337 if (__mlx5_lag_is_active(ldev))
1338 mlx5_disable_lag(ldev);
1339
1340 mutex_unlock(&ldev->lock);
1341 mlx5_dev_list_unlock();
1342 }
1343
1344 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1345 {
1346 struct mlx5_lag *ldev;
1347
1348 ldev = mlx5_lag_dev(dev);
1349 if (!ldev)
1350 return;
1351
1352 mutex_lock(&ldev->lock);
1353 ldev->mode_changes_in_progress--;
1354 mutex_unlock(&ldev->lock);
1355 mlx5_queue_bond_work(ldev, 0);
1356 }
1357
1358 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1359 {
1360 struct net_device *ndev = NULL;
1361 struct mlx5_lag *ldev;
1362 unsigned long flags;
1363 int i;
1364
1365 spin_lock_irqsave(&lag_lock, flags);
1366 ldev = mlx5_lag_dev(dev);
1367
1368 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1369 goto unlock;
1370
1371 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1372 for (i = 0; i < ldev->ports; i++)
1373 if (ldev->tracker.netdev_state[i].tx_enabled)
1374 ndev = ldev->pf[i].netdev;
1375 if (!ndev)
1376 ndev = ldev->pf[ldev->ports - 1].netdev;
1377 } else {
1378 ndev = ldev->pf[MLX5_LAG_P1].netdev;
1379 }
1380 if (ndev)
1381 dev_hold(ndev);
1382
1383 unlock:
1384 spin_unlock_irqrestore(&lag_lock, flags);
1385
1386 return ndev;
1387 }
1388 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1389
1390 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1391 struct net_device *slave)
1392 {
1393 struct mlx5_lag *ldev;
1394 unsigned long flags;
1395 u8 port = 0;
1396 int i;
1397
1398 spin_lock_irqsave(&lag_lock, flags);
1399 ldev = mlx5_lag_dev(dev);
1400 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1401 goto unlock;
1402
1403 for (i = 0; i < ldev->ports; i++) {
1404 if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1405 port = i;
1406 break;
1407 }
1408 }
1409
1410 port = ldev->v2p_map[port * ldev->buckets];
1411
1412 unlock:
1413 spin_unlock_irqrestore(&lag_lock, flags);
1414 return port;
1415 }
1416 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1417
1418 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1419 {
1420 struct mlx5_lag *ldev;
1421
1422 ldev = mlx5_lag_dev(dev);
1423 if (!ldev)
1424 return 0;
1425
1426 return ldev->ports;
1427 }
1428 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1429
1430 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1431 {
1432 struct mlx5_core_dev *peer_dev = NULL;
1433 struct mlx5_lag *ldev;
1434 unsigned long flags;
1435
1436 spin_lock_irqsave(&lag_lock, flags);
1437 ldev = mlx5_lag_dev(dev);
1438 if (!ldev)
1439 goto unlock;
1440
1441 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1442 ldev->pf[MLX5_LAG_P2].dev :
1443 ldev->pf[MLX5_LAG_P1].dev;
1444
1445 unlock:
1446 spin_unlock_irqrestore(&lag_lock, flags);
1447 return peer_dev;
1448 }
1449 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1450
1451 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1452 u64 *values,
1453 int num_counters,
1454 size_t *offsets)
1455 {
1456 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1457 struct mlx5_core_dev **mdev;
1458 struct mlx5_lag *ldev;
1459 unsigned long flags;
1460 int num_ports;
1461 int ret, i, j;
1462 void *out;
1463
1464 out = kvzalloc(outlen, GFP_KERNEL);
1465 if (!out)
1466 return -ENOMEM;
1467
1468 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1469 if (!mdev) {
1470 ret = -ENOMEM;
1471 goto free_out;
1472 }
1473
1474 memset(values, 0, sizeof(*values) * num_counters);
1475
1476 spin_lock_irqsave(&lag_lock, flags);
1477 ldev = mlx5_lag_dev(dev);
1478 if (ldev && __mlx5_lag_is_active(ldev)) {
1479 num_ports = ldev->ports;
1480 for (i = 0; i < ldev->ports; i++)
1481 mdev[i] = ldev->pf[i].dev;
1482 } else {
1483 num_ports = 1;
1484 mdev[MLX5_LAG_P1] = dev;
1485 }
1486 spin_unlock_irqrestore(&lag_lock, flags);
1487
1488 for (i = 0; i < num_ports; ++i) {
1489 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1490
1491 MLX5_SET(query_cong_statistics_in, in, opcode,
1492 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1493 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1494 out);
1495 if (ret)
1496 goto free_mdev;
1497
1498 for (j = 0; j < num_counters; ++j)
1499 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1500 }
1501
1502 free_mdev:
1503 kvfree(mdev);
1504 free_out:
1505 kvfree(out);
1506 return ret;
1507 }
1508 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);