Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2019 Mellanox Technologies. */
0003 
0004 #include <linux/netdevice.h>
0005 #include <net/nexthop.h>
0006 #include "lag/lag.h"
0007 #include "lag/mp.h"
0008 #include "mlx5_core.h"
0009 #include "eswitch.h"
0010 #include "lib/mlx5.h"
0011 
0012 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
0013 {
0014     return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
0015 }
0016 
0017 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
0018 {
0019     if (!mlx5_lag_is_ready(ldev))
0020         return false;
0021 
0022     if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
0023         return false;
0024 
0025     return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
0026                      ldev->pf[MLX5_LAG_P2].dev);
0027 }
0028 
0029 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
0030 {
0031     struct mlx5_lag *ldev;
0032     bool res;
0033 
0034     ldev = mlx5_lag_dev(dev);
0035     res  = ldev && __mlx5_lag_is_multipath(ldev);
0036 
0037     return res;
0038 }
0039 
0040 /**
0041  * mlx5_lag_set_port_affinity
0042  *
0043  * @ldev: lag device
0044  * @port:
0045  *     0 - set normal affinity.
0046  *     1 - set affinity to port 1.
0047  *     2 - set affinity to port 2.
0048  *
0049  **/
0050 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
0051                        enum mlx5_lag_port_affinity port)
0052 {
0053     struct lag_tracker tracker = {};
0054 
0055     if (!__mlx5_lag_is_multipath(ldev))
0056         return;
0057 
0058     switch (port) {
0059     case MLX5_LAG_NORMAL_AFFINITY:
0060         tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
0061         tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
0062         tracker.netdev_state[MLX5_LAG_P1].link_up = true;
0063         tracker.netdev_state[MLX5_LAG_P2].link_up = true;
0064         break;
0065     case MLX5_LAG_P1_AFFINITY:
0066         tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
0067         tracker.netdev_state[MLX5_LAG_P1].link_up = true;
0068         tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
0069         tracker.netdev_state[MLX5_LAG_P2].link_up = false;
0070         break;
0071     case MLX5_LAG_P2_AFFINITY:
0072         tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
0073         tracker.netdev_state[MLX5_LAG_P1].link_up = false;
0074         tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
0075         tracker.netdev_state[MLX5_LAG_P2].link_up = true;
0076         break;
0077     default:
0078         mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
0079                    "Invalid affinity port %d", port);
0080         return;
0081     }
0082 
0083     if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
0084         mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
0085                      MLX5_DEV_EVENT_PORT_AFFINITY,
0086                      (void *)0);
0087 
0088     if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
0089         mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
0090                      MLX5_DEV_EVENT_PORT_AFFINITY,
0091                      (void *)0);
0092 
0093     mlx5_modify_lag(ldev, &tracker);
0094 }
0095 
0096 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
0097 {
0098     struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
0099 
0100     flush_workqueue(mp->wq);
0101 }
0102 
0103 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
0104 {
0105     mp->fib.mfi = fi;
0106     mp->fib.priority = fi->fib_priority;
0107     mp->fib.dst = dst;
0108     mp->fib.dst_len = dst_len;
0109 }
0110 
0111 struct mlx5_fib_event_work {
0112     struct work_struct work;
0113     struct mlx5_lag *ldev;
0114     unsigned long event;
0115     union {
0116         struct fib_entry_notifier_info fen_info;
0117         struct fib_nh_notifier_info fnh_info;
0118     };
0119 };
0120 
0121 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
0122                      struct fib_entry_notifier_info *fen_info)
0123 {
0124     struct fib_info *fi = fen_info->fi;
0125     struct lag_mp *mp = &ldev->lag_mp;
0126     struct fib_nh *fib_nh0, *fib_nh1;
0127     unsigned int nhs;
0128 
0129     /* Handle delete event */
0130     if (event == FIB_EVENT_ENTRY_DEL) {
0131         /* stop track */
0132         if (mp->fib.mfi == fi)
0133             mp->fib.mfi = NULL;
0134         return;
0135     }
0136 
0137     /* Handle multipath entry with lower priority value */
0138     if (mp->fib.mfi && mp->fib.mfi != fi &&
0139         (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
0140         fi->fib_priority >= mp->fib.priority)
0141         return;
0142 
0143     /* Handle add/replace event */
0144     nhs = fib_info_num_path(fi);
0145     if (nhs == 1) {
0146         if (__mlx5_lag_is_active(ldev)) {
0147             struct fib_nh *nh = fib_info_nh(fi, 0);
0148             struct net_device *nh_dev = nh->fib_nh_dev;
0149             int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
0150 
0151             if (i < 0)
0152                 return;
0153 
0154             i++;
0155             mlx5_lag_set_port_affinity(ldev, i);
0156             mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
0157         }
0158 
0159         return;
0160     }
0161 
0162     if (nhs != 2)
0163         return;
0164 
0165     /* Verify next hops are ports of the same hca */
0166     fib_nh0 = fib_info_nh(fi, 0);
0167     fib_nh1 = fib_info_nh(fi, 1);
0168     if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
0169           fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
0170         !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
0171           fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
0172         mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
0173                    "Multipath offload require two ports of the same HCA\n");
0174         return;
0175     }
0176 
0177     /* First time we see multipath route */
0178     if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
0179         struct lag_tracker tracker;
0180 
0181         tracker = ldev->tracker;
0182         mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
0183     }
0184 
0185     mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
0186     mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
0187 }
0188 
0189 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
0190                        unsigned long event,
0191                        struct fib_nh *fib_nh,
0192                        struct fib_info *fi)
0193 {
0194     struct lag_mp *mp = &ldev->lag_mp;
0195 
0196     /* Check the nh event is related to the route */
0197     if (!mp->fib.mfi || mp->fib.mfi != fi)
0198         return;
0199 
0200     /* nh added/removed */
0201     if (event == FIB_EVENT_NH_DEL) {
0202         int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
0203 
0204         if (i >= 0) {
0205             i = (i + 1) % 2 + 1; /* peer port */
0206             mlx5_lag_set_port_affinity(ldev, i);
0207         }
0208     } else if (event == FIB_EVENT_NH_ADD &&
0209            fib_info_num_path(fi) == 2) {
0210         mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
0211     }
0212 }
0213 
0214 static void mlx5_lag_fib_update(struct work_struct *work)
0215 {
0216     struct mlx5_fib_event_work *fib_work =
0217         container_of(work, struct mlx5_fib_event_work, work);
0218     struct mlx5_lag *ldev = fib_work->ldev;
0219     struct fib_nh *fib_nh;
0220 
0221     /* Protect internal structures from changes */
0222     rtnl_lock();
0223     switch (fib_work->event) {
0224     case FIB_EVENT_ENTRY_REPLACE:
0225     case FIB_EVENT_ENTRY_DEL:
0226         mlx5_lag_fib_route_event(ldev, fib_work->event,
0227                      &fib_work->fen_info);
0228         fib_info_put(fib_work->fen_info.fi);
0229         break;
0230     case FIB_EVENT_NH_ADD:
0231     case FIB_EVENT_NH_DEL:
0232         fib_nh = fib_work->fnh_info.fib_nh;
0233         mlx5_lag_fib_nexthop_event(ldev,
0234                        fib_work->event,
0235                        fib_work->fnh_info.fib_nh,
0236                        fib_nh->nh_parent);
0237         fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
0238         break;
0239     }
0240 
0241     rtnl_unlock();
0242     kfree(fib_work);
0243 }
0244 
0245 static struct mlx5_fib_event_work *
0246 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
0247 {
0248     struct mlx5_fib_event_work *fib_work;
0249 
0250     fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
0251     if (WARN_ON(!fib_work))
0252         return NULL;
0253 
0254     INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
0255     fib_work->ldev = ldev;
0256     fib_work->event = event;
0257 
0258     return fib_work;
0259 }
0260 
0261 static int mlx5_lag_fib_event(struct notifier_block *nb,
0262                   unsigned long event,
0263                   void *ptr)
0264 {
0265     struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
0266     struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
0267     struct fib_notifier_info *info = ptr;
0268     struct mlx5_fib_event_work *fib_work;
0269     struct fib_entry_notifier_info *fen_info;
0270     struct fib_nh_notifier_info *fnh_info;
0271     struct net_device *fib_dev;
0272     struct fib_info *fi;
0273 
0274     if (info->family != AF_INET)
0275         return NOTIFY_DONE;
0276 
0277     if (!mlx5_lag_multipath_check_prereq(ldev))
0278         return NOTIFY_DONE;
0279 
0280     switch (event) {
0281     case FIB_EVENT_ENTRY_REPLACE:
0282     case FIB_EVENT_ENTRY_DEL:
0283         fen_info = container_of(info, struct fib_entry_notifier_info,
0284                     info);
0285         fi = fen_info->fi;
0286         if (fi->nh)
0287             return NOTIFY_DONE;
0288         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
0289         if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
0290             fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
0291             return NOTIFY_DONE;
0292         }
0293         fib_work = mlx5_lag_init_fib_work(ldev, event);
0294         if (!fib_work)
0295             return NOTIFY_DONE;
0296         fib_work->fen_info = *fen_info;
0297         /* Take reference on fib_info to prevent it from being
0298          * freed while work is queued. Release it afterwards.
0299          */
0300         fib_info_hold(fib_work->fen_info.fi);
0301         break;
0302     case FIB_EVENT_NH_ADD:
0303     case FIB_EVENT_NH_DEL:
0304         fnh_info = container_of(info, struct fib_nh_notifier_info,
0305                     info);
0306         fib_work = mlx5_lag_init_fib_work(ldev, event);
0307         if (!fib_work)
0308             return NOTIFY_DONE;
0309         fib_work->fnh_info = *fnh_info;
0310         fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
0311         break;
0312     default:
0313         return NOTIFY_DONE;
0314     }
0315 
0316     queue_work(mp->wq, &fib_work->work);
0317 
0318     return NOTIFY_DONE;
0319 }
0320 
0321 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
0322 {
0323     /* Clear mfi, as it might become stale when a route delete event
0324      * has been missed, see mlx5_lag_fib_route_event().
0325      */
0326     ldev->lag_mp.fib.mfi = NULL;
0327 }
0328 
0329 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
0330 {
0331     struct lag_mp *mp = &ldev->lag_mp;
0332     int err;
0333 
0334     /* always clear mfi, as it might become stale when a route delete event
0335      * has been missed
0336      */
0337     mp->fib.mfi = NULL;
0338 
0339     if (mp->fib_nb.notifier_call)
0340         return 0;
0341 
0342     mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
0343     if (!mp->wq)
0344         return -ENOMEM;
0345 
0346     mp->fib_nb.notifier_call = mlx5_lag_fib_event;
0347     err = register_fib_notifier(&init_net, &mp->fib_nb,
0348                     mlx5_lag_fib_event_flush, NULL);
0349     if (err) {
0350         destroy_workqueue(mp->wq);
0351         mp->fib_nb.notifier_call = NULL;
0352     }
0353 
0354     return err;
0355 }
0356 
0357 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
0358 {
0359     struct lag_mp *mp = &ldev->lag_mp;
0360 
0361     if (!mp->fib_nb.notifier_call)
0362         return;
0363 
0364     unregister_fib_notifier(&init_net, &mp->fib_nb);
0365     destroy_workqueue(mp->wq);
0366     mp->fib_nb.notifier_call = NULL;
0367     mp->fib.mfi = NULL;
0368 }