Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 
0033 #include <linux/netdevice.h>
0034 #include <net/bonding.h>
0035 #include <linux/mlx5/driver.h>
0036 #include <linux/mlx5/eswitch.h>
0037 #include <linux/mlx5/vport.h>
0038 #include "lib/devcom.h"
0039 #include "mlx5_core.h"
0040 #include "eswitch.h"
0041 #include "esw/acl/ofld.h"
0042 #include "lag.h"
0043 #include "mp.h"
0044 #include "mpesw.h"
0045 
0046 enum {
0047     MLX5_LAG_EGRESS_PORT_1 = 1,
0048     MLX5_LAG_EGRESS_PORT_2,
0049 };
0050 
0051 /* General purpose, use for short periods of time.
0052  * Beware of lock dependencies (preferably, no locks should be acquired
0053  * under it).
0054  */
0055 static DEFINE_SPINLOCK(lag_lock);
0056 
0057 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
0058 {
0059     if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0060         return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
0061 
0062     if (mode == MLX5_LAG_MODE_MPESW)
0063         return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
0064 
0065     return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
0066 }
0067 
0068 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
0069                    unsigned long flags)
0070 {
0071     bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
0072                      &flags);
0073     int port_sel_mode = get_port_sel_mode(mode, flags);
0074     u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
0075     void *lag_ctx;
0076 
0077     lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
0078     MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
0079     MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
0080     if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
0081         MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
0082         MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
0083     }
0084     MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
0085 
0086     return mlx5_cmd_exec_in(dev, create_lag, in);
0087 }
0088 
0089 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
0090                    u8 *ports)
0091 {
0092     u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
0093     void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
0094 
0095     MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
0096     MLX5_SET(modify_lag_in, in, field_select, 0x1);
0097 
0098     MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
0099     MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
0100 
0101     return mlx5_cmd_exec_in(dev, modify_lag, in);
0102 }
0103 
0104 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
0105 {
0106     u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
0107 
0108     MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
0109 
0110     return mlx5_cmd_exec_in(dev, create_vport_lag, in);
0111 }
0112 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
0113 
0114 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
0115 {
0116     u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
0117 
0118     MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
0119 
0120     return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
0121 }
0122 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
0123 
0124 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
0125                    u8 *ports, int *num_disabled)
0126 {
0127     int i;
0128 
0129     *num_disabled = 0;
0130     for (i = 0; i < num_ports; i++) {
0131         if (!tracker->netdev_state[i].tx_enabled ||
0132             !tracker->netdev_state[i].link_up)
0133             ports[(*num_disabled)++] = i;
0134     }
0135 }
0136 
0137 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
0138                u8 *ports, int *num_enabled)
0139 {
0140     int i;
0141 
0142     *num_enabled = 0;
0143     for (i = 0; i < num_ports; i++) {
0144         if (tracker->netdev_state[i].tx_enabled &&
0145             tracker->netdev_state[i].link_up)
0146             ports[(*num_enabled)++] = i;
0147     }
0148 
0149     if (*num_enabled == 0)
0150         mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
0151 }
0152 
0153 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
0154                    struct mlx5_lag *ldev,
0155                    struct lag_tracker *tracker,
0156                    unsigned long flags)
0157 {
0158     char buf[MLX5_MAX_PORTS * 10 + 1] = {};
0159     u8 enabled_ports[MLX5_MAX_PORTS] = {};
0160     int written = 0;
0161     int num_enabled;
0162     int idx;
0163     int err;
0164     int i;
0165     int j;
0166 
0167     if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
0168         mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
0169                       &num_enabled);
0170         for (i = 0; i < num_enabled; i++) {
0171             err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
0172             if (err != 3)
0173                 return;
0174             written += err;
0175         }
0176         buf[written - 2] = 0;
0177         mlx5_core_info(dev, "lag map active ports: %s\n", buf);
0178     } else {
0179         for (i = 0; i < ldev->ports; i++) {
0180             for (j  = 0; j < ldev->buckets; j++) {
0181                 idx = i * ldev->buckets + j;
0182                 err = scnprintf(buf + written, 10,
0183                         " port %d:%d", i + 1, ldev->v2p_map[idx]);
0184                 if (err != 9)
0185                     return;
0186                 written += err;
0187             }
0188         }
0189         mlx5_core_info(dev, "lag map:%s\n", buf);
0190     }
0191 }
0192 
0193 static int mlx5_lag_netdev_event(struct notifier_block *this,
0194                  unsigned long event, void *ptr);
0195 static void mlx5_do_bond_work(struct work_struct *work);
0196 
0197 static void mlx5_ldev_free(struct kref *ref)
0198 {
0199     struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
0200 
0201     if (ldev->nb.notifier_call)
0202         unregister_netdevice_notifier_net(&init_net, &ldev->nb);
0203     mlx5_lag_mp_cleanup(ldev);
0204     mlx5_lag_mpesw_cleanup(ldev);
0205     cancel_work_sync(&ldev->mpesw_work);
0206     destroy_workqueue(ldev->wq);
0207     mutex_destroy(&ldev->lock);
0208     kfree(ldev);
0209 }
0210 
0211 static void mlx5_ldev_put(struct mlx5_lag *ldev)
0212 {
0213     kref_put(&ldev->ref, mlx5_ldev_free);
0214 }
0215 
0216 static void mlx5_ldev_get(struct mlx5_lag *ldev)
0217 {
0218     kref_get(&ldev->ref);
0219 }
0220 
0221 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
0222 {
0223     struct mlx5_lag *ldev;
0224     int err;
0225 
0226     ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
0227     if (!ldev)
0228         return NULL;
0229 
0230     ldev->wq = create_singlethread_workqueue("mlx5_lag");
0231     if (!ldev->wq) {
0232         kfree(ldev);
0233         return NULL;
0234     }
0235 
0236     kref_init(&ldev->ref);
0237     mutex_init(&ldev->lock);
0238     INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
0239 
0240     ldev->nb.notifier_call = mlx5_lag_netdev_event;
0241     if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
0242         ldev->nb.notifier_call = NULL;
0243         mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
0244     }
0245     ldev->mode = MLX5_LAG_MODE_NONE;
0246 
0247     err = mlx5_lag_mp_init(ldev);
0248     if (err)
0249         mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
0250                   err);
0251 
0252     mlx5_lag_mpesw_init(ldev);
0253     ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
0254     ldev->buckets = 1;
0255 
0256     return ldev;
0257 }
0258 
0259 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
0260                 struct net_device *ndev)
0261 {
0262     int i;
0263 
0264     for (i = 0; i < ldev->ports; i++)
0265         if (ldev->pf[i].netdev == ndev)
0266             return i;
0267 
0268     return -ENOENT;
0269 }
0270 
0271 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
0272 {
0273     return ldev->mode == MLX5_LAG_MODE_ROCE;
0274 }
0275 
0276 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
0277 {
0278     return ldev->mode == MLX5_LAG_MODE_SRIOV;
0279 }
0280 
0281 /* Create a mapping between steering slots and active ports.
0282  * As we have ldev->buckets slots per port first assume the native
0283  * mapping should be used.
0284  * If there are ports that are disabled fill the relevant slots
0285  * with mapping that points to active ports.
0286  */
0287 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
0288                        u8 num_ports,
0289                        u8 buckets,
0290                        u8 *ports)
0291 {
0292     int disabled[MLX5_MAX_PORTS] = {};
0293     int enabled[MLX5_MAX_PORTS] = {};
0294     int disabled_ports_num = 0;
0295     int enabled_ports_num = 0;
0296     int idx;
0297     u32 rand;
0298     int i;
0299     int j;
0300 
0301     for (i = 0; i < num_ports; i++) {
0302         if (tracker->netdev_state[i].tx_enabled &&
0303             tracker->netdev_state[i].link_up)
0304             enabled[enabled_ports_num++] = i;
0305         else
0306             disabled[disabled_ports_num++] = i;
0307     }
0308 
0309     /* Use native mapping by default where each port's buckets
0310      * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
0311      */
0312     for (i = 0; i < num_ports; i++)
0313         for (j = 0; j < buckets; j++) {
0314             idx = i * buckets + j;
0315             ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
0316         }
0317 
0318     /* If all ports are disabled/enabled keep native mapping */
0319     if (enabled_ports_num == num_ports ||
0320         disabled_ports_num == num_ports)
0321         return;
0322 
0323     /* Go over the disabled ports and for each assign a random active port */
0324     for (i = 0; i < disabled_ports_num; i++) {
0325         for (j = 0; j < buckets; j++) {
0326             get_random_bytes(&rand, 4);
0327             ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
0328         }
0329     }
0330 }
0331 
0332 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
0333 {
0334     int i;
0335 
0336     for (i = 0; i < ldev->ports; i++)
0337         if (ldev->pf[i].has_drop)
0338             return true;
0339     return false;
0340 }
0341 
0342 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
0343 {
0344     int i;
0345 
0346     for (i = 0; i < ldev->ports; i++) {
0347         if (!ldev->pf[i].has_drop)
0348             continue;
0349 
0350         mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
0351                                  MLX5_VPORT_UPLINK);
0352         ldev->pf[i].has_drop = false;
0353     }
0354 }
0355 
0356 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
0357                      struct lag_tracker *tracker)
0358 {
0359     u8 disabled_ports[MLX5_MAX_PORTS] = {};
0360     struct mlx5_core_dev *dev;
0361     int disabled_index;
0362     int num_disabled;
0363     int err;
0364     int i;
0365 
0366     /* First delete the current drop rule so there won't be any dropped
0367      * packets
0368      */
0369     mlx5_lag_drop_rule_cleanup(ldev);
0370 
0371     if (!ldev->tracker.has_inactive)
0372         return;
0373 
0374     mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
0375 
0376     for (i = 0; i < num_disabled; i++) {
0377         disabled_index = disabled_ports[i];
0378         dev = ldev->pf[disabled_index].dev;
0379         err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
0380                                   MLX5_VPORT_UPLINK);
0381         if (!err)
0382             ldev->pf[disabled_index].has_drop = true;
0383         else
0384             mlx5_core_err(dev,
0385                       "Failed to create lag drop rule, error: %d", err);
0386     }
0387 }
0388 
0389 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
0390 {
0391     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0392 
0393     if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
0394         return mlx5_lag_port_sel_modify(ldev, ports);
0395     return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
0396 }
0397 
0398 void mlx5_modify_lag(struct mlx5_lag *ldev,
0399              struct lag_tracker *tracker)
0400 {
0401     u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
0402     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0403     int idx;
0404     int err;
0405     int i;
0406     int j;
0407 
0408     mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
0409 
0410     for (i = 0; i < ldev->ports; i++) {
0411         for (j = 0; j < ldev->buckets; j++) {
0412             idx = i * ldev->buckets + j;
0413             if (ports[idx] == ldev->v2p_map[idx])
0414                 continue;
0415             err = _mlx5_modify_lag(ldev, ports);
0416             if (err) {
0417                 mlx5_core_err(dev0,
0418                           "Failed to modify LAG (%d)\n",
0419                           err);
0420                 return;
0421             }
0422             memcpy(ldev->v2p_map, ports, sizeof(ports));
0423 
0424             mlx5_lag_print_mapping(dev0, ldev, tracker,
0425                            ldev->mode_flags);
0426             break;
0427         }
0428     }
0429 
0430     if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
0431         !(ldev->mode == MLX5_LAG_MODE_ROCE))
0432         mlx5_lag_drop_rule_setup(ldev, tracker);
0433 }
0434 
0435 #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
0436 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
0437                        unsigned long *flags)
0438 {
0439     struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
0440 
0441     if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
0442         /* Four ports are support only in hash mode */
0443         if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
0444             return -EINVAL;
0445         set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
0446         if (ldev->ports > 2)
0447             ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
0448     }
0449 
0450     return 0;
0451 }
0452 
0453 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
0454                         struct lag_tracker *tracker,
0455                         enum mlx5_lag_mode mode,
0456                         unsigned long *flags)
0457 {
0458     struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
0459 
0460     if (mode == MLX5_LAG_MODE_MPESW)
0461         return;
0462 
0463     if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
0464         tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
0465         set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
0466 }
0467 
0468 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
0469                   struct lag_tracker *tracker, bool shared_fdb,
0470                   unsigned long *flags)
0471 {
0472     bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
0473 
0474     *flags = 0;
0475     if (shared_fdb) {
0476         set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
0477         set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
0478     }
0479 
0480     if (mode == MLX5_LAG_MODE_MPESW)
0481         set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
0482 
0483     if (roce_lag)
0484         return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
0485 
0486     mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
0487     return 0;
0488 }
0489 
0490 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
0491 {
0492     int port_sel_mode = get_port_sel_mode(mode, flags);
0493 
0494     switch (port_sel_mode) {
0495     case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
0496     case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
0497     case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
0498     default: return "invalid";
0499     }
0500 }
0501 
0502 static int mlx5_create_lag(struct mlx5_lag *ldev,
0503                struct lag_tracker *tracker,
0504                enum mlx5_lag_mode mode,
0505                unsigned long flags)
0506 {
0507     bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
0508     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0509     struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0510     u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
0511     int err;
0512 
0513     if (tracker)
0514         mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
0515     mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
0516                shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
0517 
0518     err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
0519     if (err) {
0520         mlx5_core_err(dev0,
0521                   "Failed to create LAG (%d)\n",
0522                   err);
0523         return err;
0524     }
0525 
0526     if (shared_fdb) {
0527         err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
0528                                   dev1->priv.eswitch);
0529         if (err)
0530             mlx5_core_err(dev0, "Can't enable single FDB mode\n");
0531         else
0532             mlx5_core_info(dev0, "Operation mode is single FDB\n");
0533     }
0534 
0535     if (err) {
0536         MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
0537         if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
0538             mlx5_core_err(dev0,
0539                       "Failed to deactivate RoCE LAG; driver restart required\n");
0540     }
0541 
0542     return err;
0543 }
0544 
0545 int mlx5_activate_lag(struct mlx5_lag *ldev,
0546               struct lag_tracker *tracker,
0547               enum mlx5_lag_mode mode,
0548               bool shared_fdb)
0549 {
0550     bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
0551     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0552     unsigned long flags = 0;
0553     int err;
0554 
0555     err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
0556     if (err)
0557         return err;
0558 
0559     if (mode != MLX5_LAG_MODE_MPESW) {
0560         mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
0561         if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
0562             err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
0563                                ldev->v2p_map);
0564             if (err) {
0565                 mlx5_core_err(dev0,
0566                           "Failed to create LAG port selection(%d)\n",
0567                           err);
0568                 return err;
0569             }
0570         }
0571     }
0572 
0573     err = mlx5_create_lag(ldev, tracker, mode, flags);
0574     if (err) {
0575         if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0576             mlx5_lag_port_sel_destroy(ldev);
0577         if (roce_lag)
0578             mlx5_core_err(dev0,
0579                       "Failed to activate RoCE LAG\n");
0580         else
0581             mlx5_core_err(dev0,
0582                       "Failed to activate VF LAG\n"
0583                       "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
0584         return err;
0585     }
0586 
0587     if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
0588         !roce_lag)
0589         mlx5_lag_drop_rule_setup(ldev, tracker);
0590 
0591     ldev->mode = mode;
0592     ldev->mode_flags = flags;
0593     return 0;
0594 }
0595 
0596 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
0597 {
0598     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0599     struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0600     u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
0601     bool roce_lag = __mlx5_lag_is_roce(ldev);
0602     unsigned long flags = ldev->mode_flags;
0603     int err;
0604 
0605     ldev->mode = MLX5_LAG_MODE_NONE;
0606     ldev->mode_flags = 0;
0607     mlx5_lag_mp_reset(ldev);
0608 
0609     if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
0610         mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
0611                              dev1->priv.eswitch);
0612         clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
0613     }
0614 
0615     MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
0616     err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
0617     if (err) {
0618         if (roce_lag) {
0619             mlx5_core_err(dev0,
0620                       "Failed to deactivate RoCE LAG; driver restart required\n");
0621         } else {
0622             mlx5_core_err(dev0,
0623                       "Failed to deactivate VF LAG; driver restart required\n"
0624                       "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
0625         }
0626         return err;
0627     }
0628 
0629     if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
0630         mlx5_lag_port_sel_destroy(ldev);
0631     if (mlx5_lag_has_drop_rule(ldev))
0632         mlx5_lag_drop_rule_cleanup(ldev);
0633 
0634     return 0;
0635 }
0636 
0637 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
0638 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
0639 {
0640 #ifdef CONFIG_MLX5_ESWITCH
0641     struct mlx5_core_dev *dev;
0642     u8 mode;
0643 #endif
0644     int i;
0645 
0646     for (i = 0; i < ldev->ports; i++)
0647         if (!ldev->pf[i].dev)
0648             return false;
0649 
0650 #ifdef CONFIG_MLX5_ESWITCH
0651     dev = ldev->pf[MLX5_LAG_P1].dev;
0652     if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
0653         return false;
0654 
0655     mode = mlx5_eswitch_mode(dev);
0656     for (i = 0; i < ldev->ports; i++)
0657         if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
0658             return false;
0659 
0660     if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
0661         return false;
0662 #else
0663     for (i = 0; i < ldev->ports; i++)
0664         if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
0665             return false;
0666 #endif
0667     return true;
0668 }
0669 
0670 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
0671 {
0672     int i;
0673 
0674     for (i = 0; i < ldev->ports; i++) {
0675         if (!ldev->pf[i].dev)
0676             continue;
0677 
0678         if (ldev->pf[i].dev->priv.flags &
0679             MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
0680             continue;
0681 
0682         ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0683         mlx5_rescan_drivers_locked(ldev->pf[i].dev);
0684     }
0685 }
0686 
0687 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
0688 {
0689     int i;
0690 
0691     for (i = 0; i < ldev->ports; i++) {
0692         if (!ldev->pf[i].dev)
0693             continue;
0694 
0695         if (ldev->pf[i].dev->priv.flags &
0696             MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
0697             continue;
0698 
0699         ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0700         mlx5_rescan_drivers_locked(ldev->pf[i].dev);
0701     }
0702 }
0703 
0704 void mlx5_disable_lag(struct mlx5_lag *ldev)
0705 {
0706     bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
0707     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0708     struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0709     bool roce_lag;
0710     int err;
0711     int i;
0712 
0713     roce_lag = __mlx5_lag_is_roce(ldev);
0714 
0715     if (shared_fdb) {
0716         mlx5_lag_remove_devices(ldev);
0717     } else if (roce_lag) {
0718         if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
0719             dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0720             mlx5_rescan_drivers_locked(dev0);
0721         }
0722         for (i = 1; i < ldev->ports; i++)
0723             mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
0724     }
0725 
0726     err = mlx5_deactivate_lag(ldev);
0727     if (err)
0728         return;
0729 
0730     if (shared_fdb || roce_lag)
0731         mlx5_lag_add_devices(ldev);
0732 
0733     if (shared_fdb) {
0734         if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
0735             mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0736         if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
0737             mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0738     }
0739 }
0740 
0741 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
0742 {
0743     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0744     struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0745 
0746     if (is_mdev_switchdev_mode(dev0) &&
0747         is_mdev_switchdev_mode(dev1) &&
0748         mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
0749         mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
0750         mlx5_devcom_is_paired(dev0->priv.devcom,
0751                   MLX5_DEVCOM_ESW_OFFLOADS) &&
0752         MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
0753         MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
0754         MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
0755         return true;
0756 
0757     return false;
0758 }
0759 
0760 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
0761 {
0762     bool roce_lag = true;
0763     int i;
0764 
0765     for (i = 0; i < ldev->ports; i++)
0766         roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
0767 
0768 #ifdef CONFIG_MLX5_ESWITCH
0769     for (i = 0; i < ldev->ports; i++)
0770         roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
0771 #endif
0772 
0773     return roce_lag;
0774 }
0775 
0776 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
0777 {
0778     return do_bond && __mlx5_lag_is_active(ldev) &&
0779            ldev->mode != MLX5_LAG_MODE_MPESW;
0780 }
0781 
0782 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
0783 {
0784     return !do_bond && __mlx5_lag_is_active(ldev) &&
0785            ldev->mode != MLX5_LAG_MODE_MPESW;
0786 }
0787 
0788 static void mlx5_do_bond(struct mlx5_lag *ldev)
0789 {
0790     struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
0791     struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
0792     struct lag_tracker tracker = { };
0793     bool do_bond, roce_lag;
0794     int err;
0795     int i;
0796 
0797     if (!mlx5_lag_is_ready(ldev)) {
0798         do_bond = false;
0799     } else {
0800         /* VF LAG is in multipath mode, ignore bond change requests */
0801         if (mlx5_lag_is_multipath(dev0))
0802             return;
0803 
0804         tracker = ldev->tracker;
0805 
0806         do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
0807     }
0808 
0809     if (do_bond && !__mlx5_lag_is_active(ldev)) {
0810         bool shared_fdb = mlx5_shared_fdb_supported(ldev);
0811 
0812         roce_lag = mlx5_lag_is_roce_lag(ldev);
0813 
0814         if (shared_fdb || roce_lag)
0815             mlx5_lag_remove_devices(ldev);
0816 
0817         err = mlx5_activate_lag(ldev, &tracker,
0818                     roce_lag ? MLX5_LAG_MODE_ROCE :
0819                            MLX5_LAG_MODE_SRIOV,
0820                     shared_fdb);
0821         if (err) {
0822             if (shared_fdb || roce_lag)
0823                 mlx5_lag_add_devices(ldev);
0824 
0825             return;
0826         } else if (roce_lag) {
0827             dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0828             mlx5_rescan_drivers_locked(dev0);
0829             for (i = 1; i < ldev->ports; i++)
0830                 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
0831         } else if (shared_fdb) {
0832             dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0833             mlx5_rescan_drivers_locked(dev0);
0834 
0835             err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0836             if (!err)
0837                 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0838 
0839             if (err) {
0840                 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
0841                 mlx5_rescan_drivers_locked(dev0);
0842                 mlx5_deactivate_lag(ldev);
0843                 mlx5_lag_add_devices(ldev);
0844                 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
0845                 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
0846                 mlx5_core_err(dev0, "Failed to enable lag\n");
0847                 return;
0848             }
0849         }
0850     } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
0851         mlx5_modify_lag(ldev, &tracker);
0852     } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
0853         mlx5_disable_lag(ldev);
0854     }
0855 }
0856 
0857 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
0858 {
0859     queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
0860 }
0861 
0862 static void mlx5_do_bond_work(struct work_struct *work)
0863 {
0864     struct delayed_work *delayed_work = to_delayed_work(work);
0865     struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
0866                          bond_work);
0867     int status;
0868 
0869     status = mlx5_dev_list_trylock();
0870     if (!status) {
0871         mlx5_queue_bond_work(ldev, HZ);
0872         return;
0873     }
0874 
0875     mutex_lock(&ldev->lock);
0876     if (ldev->mode_changes_in_progress) {
0877         mutex_unlock(&ldev->lock);
0878         mlx5_dev_list_unlock();
0879         mlx5_queue_bond_work(ldev, HZ);
0880         return;
0881     }
0882 
0883     mlx5_do_bond(ldev);
0884     mutex_unlock(&ldev->lock);
0885     mlx5_dev_list_unlock();
0886 }
0887 
0888 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
0889                      struct lag_tracker *tracker,
0890                      struct netdev_notifier_changeupper_info *info)
0891 {
0892     struct net_device *upper = info->upper_dev, *ndev_tmp;
0893     struct netdev_lag_upper_info *lag_upper_info = NULL;
0894     bool is_bonded, is_in_lag, mode_supported;
0895     bool has_inactive = 0;
0896     struct slave *slave;
0897     u8 bond_status = 0;
0898     int num_slaves = 0;
0899     int changed = 0;
0900     int idx;
0901 
0902     if (!netif_is_lag_master(upper))
0903         return 0;
0904 
0905     if (info->linking)
0906         lag_upper_info = info->upper_info;
0907 
0908     /* The event may still be of interest if the slave does not belong to
0909      * us, but is enslaved to a master which has one or more of our netdevs
0910      * as slaves (e.g., if a new slave is added to a master that bonds two
0911      * of our netdevs, we should unbond).
0912      */
0913     rcu_read_lock();
0914     for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
0915         idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
0916         if (idx >= 0) {
0917             slave = bond_slave_get_rcu(ndev_tmp);
0918             if (slave)
0919                 has_inactive |= bond_is_slave_inactive(slave);
0920             bond_status |= (1 << idx);
0921         }
0922 
0923         num_slaves++;
0924     }
0925     rcu_read_unlock();
0926 
0927     /* None of this lagdev's netdevs are slaves of this master. */
0928     if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
0929         return 0;
0930 
0931     if (lag_upper_info) {
0932         tracker->tx_type = lag_upper_info->tx_type;
0933         tracker->hash_type = lag_upper_info->hash_type;
0934     }
0935 
0936     tracker->has_inactive = has_inactive;
0937     /* Determine bonding status:
0938      * A device is considered bonded if both its physical ports are slaves
0939      * of the same lag master, and only them.
0940      */
0941     is_in_lag = num_slaves == ldev->ports &&
0942         bond_status == GENMASK(ldev->ports - 1, 0);
0943 
0944     /* Lag mode must be activebackup or hash. */
0945     mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
0946              tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
0947 
0948     is_bonded = is_in_lag && mode_supported;
0949     if (tracker->is_bonded != is_bonded) {
0950         tracker->is_bonded = is_bonded;
0951         changed = 1;
0952     }
0953 
0954     if (!is_in_lag)
0955         return changed;
0956 
0957     if (!mlx5_lag_is_ready(ldev))
0958         NL_SET_ERR_MSG_MOD(info->info.extack,
0959                    "Can't activate LAG offload, PF is configured with more than 64 VFs");
0960     else if (!mode_supported)
0961         NL_SET_ERR_MSG_MOD(info->info.extack,
0962                    "Can't activate LAG offload, TX type isn't supported");
0963 
0964     return changed;
0965 }
0966 
0967 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
0968                           struct lag_tracker *tracker,
0969                           struct net_device *ndev,
0970                           struct netdev_notifier_changelowerstate_info *info)
0971 {
0972     struct netdev_lag_lower_state_info *lag_lower_info;
0973     int idx;
0974 
0975     if (!netif_is_lag_port(ndev))
0976         return 0;
0977 
0978     idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
0979     if (idx < 0)
0980         return 0;
0981 
0982     /* This information is used to determine virtual to physical
0983      * port mapping.
0984      */
0985     lag_lower_info = info->lower_state_info;
0986     if (!lag_lower_info)
0987         return 0;
0988 
0989     tracker->netdev_state[idx] = *lag_lower_info;
0990 
0991     return 1;
0992 }
0993 
0994 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
0995                         struct lag_tracker *tracker,
0996                         struct net_device *ndev)
0997 {
0998     struct net_device *ndev_tmp;
0999     struct slave *slave;
1000     bool has_inactive = 0;
1001     int idx;
1002 
1003     if (!netif_is_lag_master(ndev))
1004         return 0;
1005 
1006     rcu_read_lock();
1007     for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1008         idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1009         if (idx < 0)
1010             continue;
1011 
1012         slave = bond_slave_get_rcu(ndev_tmp);
1013         if (slave)
1014             has_inactive |= bond_is_slave_inactive(slave);
1015     }
1016     rcu_read_unlock();
1017 
1018     if (tracker->has_inactive == has_inactive)
1019         return 0;
1020 
1021     tracker->has_inactive = has_inactive;
1022 
1023     return 1;
1024 }
1025 
1026 /* this handler is always registered to netdev events */
1027 static int mlx5_lag_netdev_event(struct notifier_block *this,
1028                  unsigned long event, void *ptr)
1029 {
1030     struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1031     struct lag_tracker tracker;
1032     struct mlx5_lag *ldev;
1033     int changed = 0;
1034 
1035     if (event != NETDEV_CHANGEUPPER &&
1036         event != NETDEV_CHANGELOWERSTATE &&
1037         event != NETDEV_CHANGEINFODATA)
1038         return NOTIFY_DONE;
1039 
1040     ldev    = container_of(this, struct mlx5_lag, nb);
1041 
1042     tracker = ldev->tracker;
1043 
1044     switch (event) {
1045     case NETDEV_CHANGEUPPER:
1046         changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1047         break;
1048     case NETDEV_CHANGELOWERSTATE:
1049         changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1050                                  ndev, ptr);
1051         break;
1052     case NETDEV_CHANGEINFODATA:
1053         changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1054         break;
1055     }
1056 
1057     ldev->tracker = tracker;
1058 
1059     if (changed)
1060         mlx5_queue_bond_work(ldev, 0);
1061 
1062     return NOTIFY_DONE;
1063 }
1064 
1065 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1066                  struct mlx5_core_dev *dev,
1067                  struct net_device *netdev)
1068 {
1069     unsigned int fn = mlx5_get_dev_index(dev);
1070     unsigned long flags;
1071 
1072     if (fn >= ldev->ports)
1073         return;
1074 
1075     spin_lock_irqsave(&lag_lock, flags);
1076     ldev->pf[fn].netdev = netdev;
1077     ldev->tracker.netdev_state[fn].link_up = 0;
1078     ldev->tracker.netdev_state[fn].tx_enabled = 0;
1079     spin_unlock_irqrestore(&lag_lock, flags);
1080 }
1081 
1082 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1083                     struct net_device *netdev)
1084 {
1085     unsigned long flags;
1086     int i;
1087 
1088     spin_lock_irqsave(&lag_lock, flags);
1089     for (i = 0; i < ldev->ports; i++) {
1090         if (ldev->pf[i].netdev == netdev) {
1091             ldev->pf[i].netdev = NULL;
1092             break;
1093         }
1094     }
1095     spin_unlock_irqrestore(&lag_lock, flags);
1096 }
1097 
1098 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1099                    struct mlx5_core_dev *dev)
1100 {
1101     unsigned int fn = mlx5_get_dev_index(dev);
1102 
1103     if (fn >= ldev->ports)
1104         return;
1105 
1106     ldev->pf[fn].dev = dev;
1107     dev->priv.lag = ldev;
1108 }
1109 
1110 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1111                   struct mlx5_core_dev *dev)
1112 {
1113     int i;
1114 
1115     for (i = 0; i < ldev->ports; i++)
1116         if (ldev->pf[i].dev == dev)
1117             break;
1118 
1119     if (i == ldev->ports)
1120         return;
1121 
1122     ldev->pf[i].dev = NULL;
1123     dev->priv.lag = NULL;
1124 }
1125 
1126 /* Must be called with intf_mutex held */
1127 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1128 {
1129     struct mlx5_lag *ldev = NULL;
1130     struct mlx5_core_dev *tmp_dev;
1131 
1132     tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1133     if (tmp_dev)
1134         ldev = tmp_dev->priv.lag;
1135 
1136     if (!ldev) {
1137         ldev = mlx5_lag_dev_alloc(dev);
1138         if (!ldev) {
1139             mlx5_core_err(dev, "Failed to alloc lag dev\n");
1140             return 0;
1141         }
1142         mlx5_ldev_add_mdev(ldev, dev);
1143         return 0;
1144     }
1145 
1146     mutex_lock(&ldev->lock);
1147     if (ldev->mode_changes_in_progress) {
1148         mutex_unlock(&ldev->lock);
1149         return -EAGAIN;
1150     }
1151     mlx5_ldev_get(ldev);
1152     mlx5_ldev_add_mdev(ldev, dev);
1153     mutex_unlock(&ldev->lock);
1154 
1155     return 0;
1156 }
1157 
1158 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1159 {
1160     struct mlx5_lag *ldev;
1161 
1162     ldev = mlx5_lag_dev(dev);
1163     if (!ldev)
1164         return;
1165 
1166     /* mdev is being removed, might as well remove debugfs
1167      * as early as possible.
1168      */
1169     mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1170 recheck:
1171     mutex_lock(&ldev->lock);
1172     if (ldev->mode_changes_in_progress) {
1173         mutex_unlock(&ldev->lock);
1174         msleep(100);
1175         goto recheck;
1176     }
1177     mlx5_ldev_remove_mdev(ldev, dev);
1178     mutex_unlock(&ldev->lock);
1179     mlx5_ldev_put(ldev);
1180 }
1181 
1182 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1183 {
1184     int err;
1185 
1186     if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1187         !MLX5_CAP_GEN(dev, lag_master) ||
1188         (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1189          MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1190         return;
1191 
1192 recheck:
1193     mlx5_dev_list_lock();
1194     err = __mlx5_lag_dev_add_mdev(dev);
1195     mlx5_dev_list_unlock();
1196 
1197     if (err) {
1198         msleep(100);
1199         goto recheck;
1200     }
1201     mlx5_ldev_add_debugfs(dev);
1202 }
1203 
1204 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1205                 struct net_device *netdev)
1206 {
1207     struct mlx5_lag *ldev;
1208     bool lag_is_active;
1209 
1210     ldev = mlx5_lag_dev(dev);
1211     if (!ldev)
1212         return;
1213 
1214     mutex_lock(&ldev->lock);
1215     mlx5_ldev_remove_netdev(ldev, netdev);
1216     clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1217 
1218     lag_is_active = __mlx5_lag_is_active(ldev);
1219     mutex_unlock(&ldev->lock);
1220 
1221     if (lag_is_active)
1222         mlx5_queue_bond_work(ldev, 0);
1223 }
1224 
1225 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1226              struct net_device *netdev)
1227 {
1228     struct mlx5_lag *ldev;
1229     int i;
1230 
1231     ldev = mlx5_lag_dev(dev);
1232     if (!ldev)
1233         return;
1234 
1235     mutex_lock(&ldev->lock);
1236     mlx5_ldev_add_netdev(ldev, dev, netdev);
1237 
1238     for (i = 0; i < ldev->ports; i++)
1239         if (!ldev->pf[i].netdev)
1240             break;
1241 
1242     if (i >= ldev->ports)
1243         set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1244     mutex_unlock(&ldev->lock);
1245     mlx5_queue_bond_work(ldev, 0);
1246 }
1247 
1248 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1249 {
1250     struct mlx5_lag *ldev;
1251     unsigned long flags;
1252     bool res;
1253 
1254     spin_lock_irqsave(&lag_lock, flags);
1255     ldev = mlx5_lag_dev(dev);
1256     res  = ldev && __mlx5_lag_is_roce(ldev);
1257     spin_unlock_irqrestore(&lag_lock, flags);
1258 
1259     return res;
1260 }
1261 EXPORT_SYMBOL(mlx5_lag_is_roce);
1262 
1263 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1264 {
1265     struct mlx5_lag *ldev;
1266     unsigned long flags;
1267     bool res;
1268 
1269     spin_lock_irqsave(&lag_lock, flags);
1270     ldev = mlx5_lag_dev(dev);
1271     res  = ldev && __mlx5_lag_is_active(ldev);
1272     spin_unlock_irqrestore(&lag_lock, flags);
1273 
1274     return res;
1275 }
1276 EXPORT_SYMBOL(mlx5_lag_is_active);
1277 
1278 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1279 {
1280     struct mlx5_lag *ldev;
1281     unsigned long flags;
1282     bool res;
1283 
1284     spin_lock_irqsave(&lag_lock, flags);
1285     ldev = mlx5_lag_dev(dev);
1286     res = ldev && __mlx5_lag_is_active(ldev) &&
1287         dev == ldev->pf[MLX5_LAG_P1].dev;
1288     spin_unlock_irqrestore(&lag_lock, flags);
1289 
1290     return res;
1291 }
1292 EXPORT_SYMBOL(mlx5_lag_is_master);
1293 
1294 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1295 {
1296     struct mlx5_lag *ldev;
1297     unsigned long flags;
1298     bool res;
1299 
1300     spin_lock_irqsave(&lag_lock, flags);
1301     ldev = mlx5_lag_dev(dev);
1302     res  = ldev && __mlx5_lag_is_sriov(ldev);
1303     spin_unlock_irqrestore(&lag_lock, flags);
1304 
1305     return res;
1306 }
1307 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1308 
1309 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1310 {
1311     struct mlx5_lag *ldev;
1312     unsigned long flags;
1313     bool res;
1314 
1315     spin_lock_irqsave(&lag_lock, flags);
1316     ldev = mlx5_lag_dev(dev);
1317     res = ldev && __mlx5_lag_is_sriov(ldev) &&
1318           test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1319     spin_unlock_irqrestore(&lag_lock, flags);
1320 
1321     return res;
1322 }
1323 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1324 
1325 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1326 {
1327     struct mlx5_lag *ldev;
1328 
1329     ldev = mlx5_lag_dev(dev);
1330     if (!ldev)
1331         return;
1332 
1333     mlx5_dev_list_lock();
1334     mutex_lock(&ldev->lock);
1335 
1336     ldev->mode_changes_in_progress++;
1337     if (__mlx5_lag_is_active(ldev))
1338         mlx5_disable_lag(ldev);
1339 
1340     mutex_unlock(&ldev->lock);
1341     mlx5_dev_list_unlock();
1342 }
1343 
1344 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1345 {
1346     struct mlx5_lag *ldev;
1347 
1348     ldev = mlx5_lag_dev(dev);
1349     if (!ldev)
1350         return;
1351 
1352     mutex_lock(&ldev->lock);
1353     ldev->mode_changes_in_progress--;
1354     mutex_unlock(&ldev->lock);
1355     mlx5_queue_bond_work(ldev, 0);
1356 }
1357 
1358 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1359 {
1360     struct net_device *ndev = NULL;
1361     struct mlx5_lag *ldev;
1362     unsigned long flags;
1363     int i;
1364 
1365     spin_lock_irqsave(&lag_lock, flags);
1366     ldev = mlx5_lag_dev(dev);
1367 
1368     if (!(ldev && __mlx5_lag_is_roce(ldev)))
1369         goto unlock;
1370 
1371     if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1372         for (i = 0; i < ldev->ports; i++)
1373             if (ldev->tracker.netdev_state[i].tx_enabled)
1374                 ndev = ldev->pf[i].netdev;
1375         if (!ndev)
1376             ndev = ldev->pf[ldev->ports - 1].netdev;
1377     } else {
1378         ndev = ldev->pf[MLX5_LAG_P1].netdev;
1379     }
1380     if (ndev)
1381         dev_hold(ndev);
1382 
1383 unlock:
1384     spin_unlock_irqrestore(&lag_lock, flags);
1385 
1386     return ndev;
1387 }
1388 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1389 
1390 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1391                struct net_device *slave)
1392 {
1393     struct mlx5_lag *ldev;
1394     unsigned long flags;
1395     u8 port = 0;
1396     int i;
1397 
1398     spin_lock_irqsave(&lag_lock, flags);
1399     ldev = mlx5_lag_dev(dev);
1400     if (!(ldev && __mlx5_lag_is_roce(ldev)))
1401         goto unlock;
1402 
1403     for (i = 0; i < ldev->ports; i++) {
1404         if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1405             port = i;
1406             break;
1407         }
1408     }
1409 
1410     port = ldev->v2p_map[port * ldev->buckets];
1411 
1412 unlock:
1413     spin_unlock_irqrestore(&lag_lock, flags);
1414     return port;
1415 }
1416 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1417 
1418 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1419 {
1420     struct mlx5_lag *ldev;
1421 
1422     ldev = mlx5_lag_dev(dev);
1423     if (!ldev)
1424         return 0;
1425 
1426     return ldev->ports;
1427 }
1428 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1429 
1430 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1431 {
1432     struct mlx5_core_dev *peer_dev = NULL;
1433     struct mlx5_lag *ldev;
1434     unsigned long flags;
1435 
1436     spin_lock_irqsave(&lag_lock, flags);
1437     ldev = mlx5_lag_dev(dev);
1438     if (!ldev)
1439         goto unlock;
1440 
1441     peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1442                ldev->pf[MLX5_LAG_P2].dev :
1443                ldev->pf[MLX5_LAG_P1].dev;
1444 
1445 unlock:
1446     spin_unlock_irqrestore(&lag_lock, flags);
1447     return peer_dev;
1448 }
1449 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1450 
1451 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1452                  u64 *values,
1453                  int num_counters,
1454                  size_t *offsets)
1455 {
1456     int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1457     struct mlx5_core_dev **mdev;
1458     struct mlx5_lag *ldev;
1459     unsigned long flags;
1460     int num_ports;
1461     int ret, i, j;
1462     void *out;
1463 
1464     out = kvzalloc(outlen, GFP_KERNEL);
1465     if (!out)
1466         return -ENOMEM;
1467 
1468     mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1469     if (!mdev) {
1470         ret = -ENOMEM;
1471         goto free_out;
1472     }
1473 
1474     memset(values, 0, sizeof(*values) * num_counters);
1475 
1476     spin_lock_irqsave(&lag_lock, flags);
1477     ldev = mlx5_lag_dev(dev);
1478     if (ldev && __mlx5_lag_is_active(ldev)) {
1479         num_ports = ldev->ports;
1480         for (i = 0; i < ldev->ports; i++)
1481             mdev[i] = ldev->pf[i].dev;
1482     } else {
1483         num_ports = 1;
1484         mdev[MLX5_LAG_P1] = dev;
1485     }
1486     spin_unlock_irqrestore(&lag_lock, flags);
1487 
1488     for (i = 0; i < num_ports; ++i) {
1489         u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1490 
1491         MLX5_SET(query_cong_statistics_in, in, opcode,
1492              MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1493         ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1494                       out);
1495         if (ret)
1496             goto free_mdev;
1497 
1498         for (j = 0; j < num_counters; ++j)
1499             values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1500     }
1501 
1502 free_mdev:
1503     kvfree(mdev);
1504 free_out:
1505     kvfree(out);
1506     return ret;
1507 }
1508 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);