0001
0002
0003
0004 #include <linux/netdevice.h>
0005 #include <net/nexthop.h>
0006 #include "lag/lag.h"
0007 #include "lag/mp.h"
0008 #include "mlx5_core.h"
0009 #include "eswitch.h"
0010 #include "lib/mlx5.h"
0011
0012 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
0013 {
0014 return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
0015 }
0016
0017 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
0018 {
0019 if (!mlx5_lag_is_ready(ldev))
0020 return false;
0021
0022 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
0023 return false;
0024
0025 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
0026 ldev->pf[MLX5_LAG_P2].dev);
0027 }
0028
0029 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
0030 {
0031 struct mlx5_lag *ldev;
0032 bool res;
0033
0034 ldev = mlx5_lag_dev(dev);
0035 res = ldev && __mlx5_lag_is_multipath(ldev);
0036
0037 return res;
0038 }
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
0051 enum mlx5_lag_port_affinity port)
0052 {
0053 struct lag_tracker tracker = {};
0054
0055 if (!__mlx5_lag_is_multipath(ldev))
0056 return;
0057
0058 switch (port) {
0059 case MLX5_LAG_NORMAL_AFFINITY:
0060 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
0061 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
0062 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
0063 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
0064 break;
0065 case MLX5_LAG_P1_AFFINITY:
0066 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
0067 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
0068 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
0069 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
0070 break;
0071 case MLX5_LAG_P2_AFFINITY:
0072 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
0073 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
0074 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
0075 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
0076 break;
0077 default:
0078 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
0079 "Invalid affinity port %d", port);
0080 return;
0081 }
0082
0083 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
0084 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
0085 MLX5_DEV_EVENT_PORT_AFFINITY,
0086 (void *)0);
0087
0088 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
0089 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
0090 MLX5_DEV_EVENT_PORT_AFFINITY,
0091 (void *)0);
0092
0093 mlx5_modify_lag(ldev, &tracker);
0094 }
0095
0096 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
0097 {
0098 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
0099
0100 flush_workqueue(mp->wq);
0101 }
0102
0103 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
0104 {
0105 mp->fib.mfi = fi;
0106 mp->fib.priority = fi->fib_priority;
0107 mp->fib.dst = dst;
0108 mp->fib.dst_len = dst_len;
0109 }
0110
0111 struct mlx5_fib_event_work {
0112 struct work_struct work;
0113 struct mlx5_lag *ldev;
0114 unsigned long event;
0115 union {
0116 struct fib_entry_notifier_info fen_info;
0117 struct fib_nh_notifier_info fnh_info;
0118 };
0119 };
0120
0121 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
0122 struct fib_entry_notifier_info *fen_info)
0123 {
0124 struct fib_info *fi = fen_info->fi;
0125 struct lag_mp *mp = &ldev->lag_mp;
0126 struct fib_nh *fib_nh0, *fib_nh1;
0127 unsigned int nhs;
0128
0129
0130 if (event == FIB_EVENT_ENTRY_DEL) {
0131
0132 if (mp->fib.mfi == fi)
0133 mp->fib.mfi = NULL;
0134 return;
0135 }
0136
0137
0138 if (mp->fib.mfi && mp->fib.mfi != fi &&
0139 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
0140 fi->fib_priority >= mp->fib.priority)
0141 return;
0142
0143
0144 nhs = fib_info_num_path(fi);
0145 if (nhs == 1) {
0146 if (__mlx5_lag_is_active(ldev)) {
0147 struct fib_nh *nh = fib_info_nh(fi, 0);
0148 struct net_device *nh_dev = nh->fib_nh_dev;
0149 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
0150
0151 if (i < 0)
0152 return;
0153
0154 i++;
0155 mlx5_lag_set_port_affinity(ldev, i);
0156 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
0157 }
0158
0159 return;
0160 }
0161
0162 if (nhs != 2)
0163 return;
0164
0165
0166 fib_nh0 = fib_info_nh(fi, 0);
0167 fib_nh1 = fib_info_nh(fi, 1);
0168 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
0169 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
0170 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
0171 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
0172 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
0173 "Multipath offload require two ports of the same HCA\n");
0174 return;
0175 }
0176
0177
0178 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
0179 struct lag_tracker tracker;
0180
0181 tracker = ldev->tracker;
0182 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
0183 }
0184
0185 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
0186 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
0187 }
0188
0189 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
0190 unsigned long event,
0191 struct fib_nh *fib_nh,
0192 struct fib_info *fi)
0193 {
0194 struct lag_mp *mp = &ldev->lag_mp;
0195
0196
0197 if (!mp->fib.mfi || mp->fib.mfi != fi)
0198 return;
0199
0200
0201 if (event == FIB_EVENT_NH_DEL) {
0202 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
0203
0204 if (i >= 0) {
0205 i = (i + 1) % 2 + 1;
0206 mlx5_lag_set_port_affinity(ldev, i);
0207 }
0208 } else if (event == FIB_EVENT_NH_ADD &&
0209 fib_info_num_path(fi) == 2) {
0210 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
0211 }
0212 }
0213
0214 static void mlx5_lag_fib_update(struct work_struct *work)
0215 {
0216 struct mlx5_fib_event_work *fib_work =
0217 container_of(work, struct mlx5_fib_event_work, work);
0218 struct mlx5_lag *ldev = fib_work->ldev;
0219 struct fib_nh *fib_nh;
0220
0221
0222 rtnl_lock();
0223 switch (fib_work->event) {
0224 case FIB_EVENT_ENTRY_REPLACE:
0225 case FIB_EVENT_ENTRY_DEL:
0226 mlx5_lag_fib_route_event(ldev, fib_work->event,
0227 &fib_work->fen_info);
0228 fib_info_put(fib_work->fen_info.fi);
0229 break;
0230 case FIB_EVENT_NH_ADD:
0231 case FIB_EVENT_NH_DEL:
0232 fib_nh = fib_work->fnh_info.fib_nh;
0233 mlx5_lag_fib_nexthop_event(ldev,
0234 fib_work->event,
0235 fib_work->fnh_info.fib_nh,
0236 fib_nh->nh_parent);
0237 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
0238 break;
0239 }
0240
0241 rtnl_unlock();
0242 kfree(fib_work);
0243 }
0244
0245 static struct mlx5_fib_event_work *
0246 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
0247 {
0248 struct mlx5_fib_event_work *fib_work;
0249
0250 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
0251 if (WARN_ON(!fib_work))
0252 return NULL;
0253
0254 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
0255 fib_work->ldev = ldev;
0256 fib_work->event = event;
0257
0258 return fib_work;
0259 }
0260
0261 static int mlx5_lag_fib_event(struct notifier_block *nb,
0262 unsigned long event,
0263 void *ptr)
0264 {
0265 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
0266 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
0267 struct fib_notifier_info *info = ptr;
0268 struct mlx5_fib_event_work *fib_work;
0269 struct fib_entry_notifier_info *fen_info;
0270 struct fib_nh_notifier_info *fnh_info;
0271 struct net_device *fib_dev;
0272 struct fib_info *fi;
0273
0274 if (info->family != AF_INET)
0275 return NOTIFY_DONE;
0276
0277 if (!mlx5_lag_multipath_check_prereq(ldev))
0278 return NOTIFY_DONE;
0279
0280 switch (event) {
0281 case FIB_EVENT_ENTRY_REPLACE:
0282 case FIB_EVENT_ENTRY_DEL:
0283 fen_info = container_of(info, struct fib_entry_notifier_info,
0284 info);
0285 fi = fen_info->fi;
0286 if (fi->nh)
0287 return NOTIFY_DONE;
0288 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
0289 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
0290 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
0291 return NOTIFY_DONE;
0292 }
0293 fib_work = mlx5_lag_init_fib_work(ldev, event);
0294 if (!fib_work)
0295 return NOTIFY_DONE;
0296 fib_work->fen_info = *fen_info;
0297
0298
0299
0300 fib_info_hold(fib_work->fen_info.fi);
0301 break;
0302 case FIB_EVENT_NH_ADD:
0303 case FIB_EVENT_NH_DEL:
0304 fnh_info = container_of(info, struct fib_nh_notifier_info,
0305 info);
0306 fib_work = mlx5_lag_init_fib_work(ldev, event);
0307 if (!fib_work)
0308 return NOTIFY_DONE;
0309 fib_work->fnh_info = *fnh_info;
0310 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
0311 break;
0312 default:
0313 return NOTIFY_DONE;
0314 }
0315
0316 queue_work(mp->wq, &fib_work->work);
0317
0318 return NOTIFY_DONE;
0319 }
0320
0321 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
0322 {
0323
0324
0325
0326 ldev->lag_mp.fib.mfi = NULL;
0327 }
0328
0329 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
0330 {
0331 struct lag_mp *mp = &ldev->lag_mp;
0332 int err;
0333
0334
0335
0336
0337 mp->fib.mfi = NULL;
0338
0339 if (mp->fib_nb.notifier_call)
0340 return 0;
0341
0342 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
0343 if (!mp->wq)
0344 return -ENOMEM;
0345
0346 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
0347 err = register_fib_notifier(&init_net, &mp->fib_nb,
0348 mlx5_lag_fib_event_flush, NULL);
0349 if (err) {
0350 destroy_workqueue(mp->wq);
0351 mp->fib_nb.notifier_call = NULL;
0352 }
0353
0354 return err;
0355 }
0356
0357 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
0358 {
0359 struct lag_mp *mp = &ldev->lag_mp;
0360
0361 if (!mp->fib_nb.notifier_call)
0362 return;
0363
0364 unregister_fib_notifier(&init_net, &mp->fib_nb);
0365 destroy_workqueue(mp->wq);
0366 mp->fib_nb.notifier_call = NULL;
0367 mp->fib.mfi = NULL;
0368 }