Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
0002 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
0003 
0004 #include <linux/kernel.h>
0005 #include <linux/types.h>
0006 #include <linux/rhashtable.h>
0007 #include <linux/bitops.h>
0008 #include <linux/in6.h>
0009 #include <linux/notifier.h>
0010 #include <linux/inetdevice.h>
0011 #include <linux/netdevice.h>
0012 #include <linux/if_bridge.h>
0013 #include <linux/socket.h>
0014 #include <linux/route.h>
0015 #include <linux/gcd.h>
0016 #include <linux/if_macvlan.h>
0017 #include <linux/refcount.h>
0018 #include <linux/jhash.h>
0019 #include <linux/net_namespace.h>
0020 #include <linux/mutex.h>
0021 #include <net/netevent.h>
0022 #include <net/neighbour.h>
0023 #include <net/arp.h>
0024 #include <net/inet_dscp.h>
0025 #include <net/ip_fib.h>
0026 #include <net/ip6_fib.h>
0027 #include <net/nexthop.h>
0028 #include <net/fib_rules.h>
0029 #include <net/ip_tunnels.h>
0030 #include <net/l3mdev.h>
0031 #include <net/addrconf.h>
0032 #include <net/ndisc.h>
0033 #include <net/ipv6.h>
0034 #include <net/fib_notifier.h>
0035 #include <net/switchdev.h>
0036 
0037 #include "spectrum.h"
0038 #include "core.h"
0039 #include "reg.h"
0040 #include "spectrum_cnt.h"
0041 #include "spectrum_dpipe.h"
0042 #include "spectrum_ipip.h"
0043 #include "spectrum_mr.h"
0044 #include "spectrum_mr_tcam.h"
0045 #include "spectrum_router.h"
0046 #include "spectrum_span.h"
0047 
0048 struct mlxsw_sp_fib;
0049 struct mlxsw_sp_vr;
0050 struct mlxsw_sp_lpm_tree;
0051 struct mlxsw_sp_rif_ops;
0052 
0053 struct mlxsw_sp_rif {
0054     struct list_head nexthop_list;
0055     struct list_head neigh_list;
0056     struct net_device *dev; /* NULL for underlay RIF */
0057     struct mlxsw_sp_fid *fid;
0058     unsigned char addr[ETH_ALEN];
0059     int mtu;
0060     u16 rif_index;
0061     u8 mac_profile_id;
0062     u16 vr_id;
0063     const struct mlxsw_sp_rif_ops *ops;
0064     struct mlxsw_sp *mlxsw_sp;
0065 
0066     unsigned int counter_ingress;
0067     bool counter_ingress_valid;
0068     unsigned int counter_egress;
0069     bool counter_egress_valid;
0070 };
0071 
0072 struct mlxsw_sp_rif_params {
0073     struct net_device *dev;
0074     union {
0075         u16 system_port;
0076         u16 lag_id;
0077     };
0078     u16 vid;
0079     bool lag;
0080 };
0081 
0082 struct mlxsw_sp_rif_subport {
0083     struct mlxsw_sp_rif common;
0084     refcount_t ref_count;
0085     union {
0086         u16 system_port;
0087         u16 lag_id;
0088     };
0089     u16 vid;
0090     bool lag;
0091 };
0092 
0093 struct mlxsw_sp_rif_ipip_lb {
0094     struct mlxsw_sp_rif common;
0095     struct mlxsw_sp_rif_ipip_lb_config lb_config;
0096     u16 ul_vr_id; /* Reserved for Spectrum-2. */
0097     u16 ul_rif_id; /* Reserved for Spectrum. */
0098 };
0099 
0100 struct mlxsw_sp_rif_params_ipip_lb {
0101     struct mlxsw_sp_rif_params common;
0102     struct mlxsw_sp_rif_ipip_lb_config lb_config;
0103 };
0104 
0105 struct mlxsw_sp_rif_ops {
0106     enum mlxsw_sp_rif_type type;
0107     size_t rif_size;
0108 
0109     void (*setup)(struct mlxsw_sp_rif *rif,
0110               const struct mlxsw_sp_rif_params *params);
0111     int (*configure)(struct mlxsw_sp_rif *rif,
0112              struct netlink_ext_ack *extack);
0113     void (*deconfigure)(struct mlxsw_sp_rif *rif);
0114     struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
0115                      struct netlink_ext_ack *extack);
0116     void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
0117 };
0118 
0119 struct mlxsw_sp_rif_mac_profile {
0120     unsigned char mac_prefix[ETH_ALEN];
0121     refcount_t ref_count;
0122     u8 id;
0123 };
0124 
0125 struct mlxsw_sp_router_ops {
0126     int (*init)(struct mlxsw_sp *mlxsw_sp);
0127     int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
0128 };
0129 
0130 static struct mlxsw_sp_rif *
0131 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
0132              const struct net_device *dev);
0133 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
0134 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
0135 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
0136                   struct mlxsw_sp_lpm_tree *lpm_tree);
0137 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
0138                      const struct mlxsw_sp_fib *fib,
0139                      u8 tree_id);
0140 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
0141                        const struct mlxsw_sp_fib *fib);
0142 
0143 static unsigned int *
0144 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
0145                enum mlxsw_sp_rif_counter_dir dir)
0146 {
0147     switch (dir) {
0148     case MLXSW_SP_RIF_COUNTER_EGRESS:
0149         return &rif->counter_egress;
0150     case MLXSW_SP_RIF_COUNTER_INGRESS:
0151         return &rif->counter_ingress;
0152     }
0153     return NULL;
0154 }
0155 
0156 static bool
0157 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
0158                    enum mlxsw_sp_rif_counter_dir dir)
0159 {
0160     switch (dir) {
0161     case MLXSW_SP_RIF_COUNTER_EGRESS:
0162         return rif->counter_egress_valid;
0163     case MLXSW_SP_RIF_COUNTER_INGRESS:
0164         return rif->counter_ingress_valid;
0165     }
0166     return false;
0167 }
0168 
0169 static void
0170 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
0171                    enum mlxsw_sp_rif_counter_dir dir,
0172                    bool valid)
0173 {
0174     switch (dir) {
0175     case MLXSW_SP_RIF_COUNTER_EGRESS:
0176         rif->counter_egress_valid = valid;
0177         break;
0178     case MLXSW_SP_RIF_COUNTER_INGRESS:
0179         rif->counter_ingress_valid = valid;
0180         break;
0181     }
0182 }
0183 
0184 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
0185                      unsigned int counter_index, bool enable,
0186                      enum mlxsw_sp_rif_counter_dir dir)
0187 {
0188     char ritr_pl[MLXSW_REG_RITR_LEN];
0189     bool is_egress = false;
0190     int err;
0191 
0192     if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
0193         is_egress = true;
0194     mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
0195     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
0196     if (err)
0197         return err;
0198 
0199     mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
0200                     is_egress);
0201     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
0202 }
0203 
0204 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
0205                    struct mlxsw_sp_rif *rif,
0206                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
0207 {
0208     char ricnt_pl[MLXSW_REG_RICNT_LEN];
0209     unsigned int *p_counter_index;
0210     bool valid;
0211     int err;
0212 
0213     valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
0214     if (!valid)
0215         return -EINVAL;
0216 
0217     p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
0218     if (!p_counter_index)
0219         return -EINVAL;
0220     mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
0221                  MLXSW_REG_RICNT_OPCODE_NOP);
0222     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
0223     if (err)
0224         return err;
0225     *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
0226     return 0;
0227 }
0228 
0229 struct mlxsw_sp_rif_counter_set_basic {
0230     u64 good_unicast_packets;
0231     u64 good_multicast_packets;
0232     u64 good_broadcast_packets;
0233     u64 good_unicast_bytes;
0234     u64 good_multicast_bytes;
0235     u64 good_broadcast_bytes;
0236     u64 error_packets;
0237     u64 discard_packets;
0238     u64 error_bytes;
0239     u64 discard_bytes;
0240 };
0241 
0242 static int
0243 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
0244                  enum mlxsw_sp_rif_counter_dir dir,
0245                  struct mlxsw_sp_rif_counter_set_basic *set)
0246 {
0247     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
0248     char ricnt_pl[MLXSW_REG_RICNT_LEN];
0249     unsigned int *p_counter_index;
0250     int err;
0251 
0252     if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
0253         return -EINVAL;
0254 
0255     p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
0256     if (!p_counter_index)
0257         return -EINVAL;
0258 
0259     mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
0260                  MLXSW_REG_RICNT_OPCODE_CLEAR);
0261     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
0262     if (err)
0263         return err;
0264 
0265     if (!set)
0266         return 0;
0267 
0268 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)              \
0269         (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
0270 
0271     MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
0272     MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
0273     MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
0274     MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
0275     MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
0276     MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
0277     MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
0278     MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
0279     MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
0280     MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
0281 
0282 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
0283 
0284     return 0;
0285 }
0286 
0287 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
0288                       unsigned int counter_index)
0289 {
0290     char ricnt_pl[MLXSW_REG_RICNT_LEN];
0291 
0292     mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
0293                  MLXSW_REG_RICNT_OPCODE_CLEAR);
0294     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
0295 }
0296 
0297 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
0298                    enum mlxsw_sp_rif_counter_dir dir)
0299 {
0300     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
0301     unsigned int *p_counter_index;
0302     int err;
0303 
0304     if (mlxsw_sp_rif_counter_valid_get(rif, dir))
0305         return 0;
0306 
0307     p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
0308     if (!p_counter_index)
0309         return -EINVAL;
0310 
0311     err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
0312                      p_counter_index);
0313     if (err)
0314         return err;
0315 
0316     err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
0317     if (err)
0318         goto err_counter_clear;
0319 
0320     err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
0321                     *p_counter_index, true, dir);
0322     if (err)
0323         goto err_counter_edit;
0324     mlxsw_sp_rif_counter_valid_set(rif, dir, true);
0325     return 0;
0326 
0327 err_counter_edit:
0328 err_counter_clear:
0329     mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
0330                   *p_counter_index);
0331     return err;
0332 }
0333 
0334 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
0335                    enum mlxsw_sp_rif_counter_dir dir)
0336 {
0337     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
0338     unsigned int *p_counter_index;
0339 
0340     if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
0341         return;
0342 
0343     p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
0344     if (WARN_ON(!p_counter_index))
0345         return;
0346     mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
0347                   *p_counter_index, false, dir);
0348     mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
0349                   *p_counter_index);
0350     mlxsw_sp_rif_counter_valid_set(rif, dir, false);
0351 }
0352 
0353 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
0354 {
0355     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
0356     struct devlink *devlink;
0357 
0358     devlink = priv_to_devlink(mlxsw_sp->core);
0359     if (!devlink_dpipe_table_counter_enabled(devlink,
0360                          MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
0361         return;
0362     mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
0363 }
0364 
0365 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
0366 {
0367     mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
0368 }
0369 
0370 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
0371 
0372 struct mlxsw_sp_prefix_usage {
0373     DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
0374 };
0375 
0376 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
0377     for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
0378 
0379 static bool
0380 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
0381              struct mlxsw_sp_prefix_usage *prefix_usage2)
0382 {
0383     return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
0384 }
0385 
0386 static void
0387 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
0388               struct mlxsw_sp_prefix_usage *prefix_usage2)
0389 {
0390     memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
0391 }
0392 
0393 static void
0394 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
0395               unsigned char prefix_len)
0396 {
0397     set_bit(prefix_len, prefix_usage->b);
0398 }
0399 
0400 static void
0401 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
0402                 unsigned char prefix_len)
0403 {
0404     clear_bit(prefix_len, prefix_usage->b);
0405 }
0406 
0407 struct mlxsw_sp_fib_key {
0408     unsigned char addr[sizeof(struct in6_addr)];
0409     unsigned char prefix_len;
0410 };
0411 
0412 enum mlxsw_sp_fib_entry_type {
0413     MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
0414     MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
0415     MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
0416     MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
0417     MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
0418 
0419     /* This is a special case of local delivery, where a packet should be
0420      * decapsulated on reception. Note that there is no corresponding ENCAP,
0421      * because that's a type of next hop, not of FIB entry. (There can be
0422      * several next hops in a REMOTE entry, and some of them may be
0423      * encapsulating entries.)
0424      */
0425     MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
0426     MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
0427 };
0428 
0429 struct mlxsw_sp_nexthop_group_info;
0430 struct mlxsw_sp_nexthop_group;
0431 struct mlxsw_sp_fib_entry;
0432 
0433 struct mlxsw_sp_fib_node {
0434     struct mlxsw_sp_fib_entry *fib_entry;
0435     struct list_head list;
0436     struct rhash_head ht_node;
0437     struct mlxsw_sp_fib *fib;
0438     struct mlxsw_sp_fib_key key;
0439 };
0440 
0441 struct mlxsw_sp_fib_entry_decap {
0442     struct mlxsw_sp_ipip_entry *ipip_entry;
0443     u32 tunnel_index;
0444 };
0445 
0446 struct mlxsw_sp_fib_entry {
0447     struct mlxsw_sp_fib_node *fib_node;
0448     enum mlxsw_sp_fib_entry_type type;
0449     struct list_head nexthop_group_node;
0450     struct mlxsw_sp_nexthop_group *nh_group;
0451     struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
0452 };
0453 
0454 struct mlxsw_sp_fib4_entry {
0455     struct mlxsw_sp_fib_entry common;
0456     struct fib_info *fi;
0457     u32 tb_id;
0458     dscp_t dscp;
0459     u8 type;
0460 };
0461 
0462 struct mlxsw_sp_fib6_entry {
0463     struct mlxsw_sp_fib_entry common;
0464     struct list_head rt6_list;
0465     unsigned int nrt6;
0466 };
0467 
0468 struct mlxsw_sp_rt6 {
0469     struct list_head list;
0470     struct fib6_info *rt;
0471 };
0472 
0473 struct mlxsw_sp_lpm_tree {
0474     u8 id; /* tree ID */
0475     unsigned int ref_count;
0476     enum mlxsw_sp_l3proto proto;
0477     unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
0478     struct mlxsw_sp_prefix_usage prefix_usage;
0479 };
0480 
0481 struct mlxsw_sp_fib {
0482     struct rhashtable ht;
0483     struct list_head node_list;
0484     struct mlxsw_sp_vr *vr;
0485     struct mlxsw_sp_lpm_tree *lpm_tree;
0486     enum mlxsw_sp_l3proto proto;
0487 };
0488 
0489 struct mlxsw_sp_vr {
0490     u16 id; /* virtual router ID */
0491     u32 tb_id; /* kernel fib table id */
0492     unsigned int rif_count;
0493     struct mlxsw_sp_fib *fib4;
0494     struct mlxsw_sp_fib *fib6;
0495     struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
0496     struct mlxsw_sp_rif *ul_rif;
0497     refcount_t ul_rif_refcnt;
0498 };
0499 
0500 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
0501 
0502 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
0503                         struct mlxsw_sp_vr *vr,
0504                         enum mlxsw_sp_l3proto proto)
0505 {
0506     struct mlxsw_sp_lpm_tree *lpm_tree;
0507     struct mlxsw_sp_fib *fib;
0508     int err;
0509 
0510     lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
0511     fib = kzalloc(sizeof(*fib), GFP_KERNEL);
0512     if (!fib)
0513         return ERR_PTR(-ENOMEM);
0514     err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
0515     if (err)
0516         goto err_rhashtable_init;
0517     INIT_LIST_HEAD(&fib->node_list);
0518     fib->proto = proto;
0519     fib->vr = vr;
0520     fib->lpm_tree = lpm_tree;
0521     mlxsw_sp_lpm_tree_hold(lpm_tree);
0522     err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
0523     if (err)
0524         goto err_lpm_tree_bind;
0525     return fib;
0526 
0527 err_lpm_tree_bind:
0528     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
0529 err_rhashtable_init:
0530     kfree(fib);
0531     return ERR_PTR(err);
0532 }
0533 
0534 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
0535                  struct mlxsw_sp_fib *fib)
0536 {
0537     mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
0538     mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
0539     WARN_ON(!list_empty(&fib->node_list));
0540     rhashtable_destroy(&fib->ht);
0541     kfree(fib);
0542 }
0543 
0544 static struct mlxsw_sp_lpm_tree *
0545 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
0546 {
0547     static struct mlxsw_sp_lpm_tree *lpm_tree;
0548     int i;
0549 
0550     for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
0551         lpm_tree = &mlxsw_sp->router->lpm.trees[i];
0552         if (lpm_tree->ref_count == 0)
0553             return lpm_tree;
0554     }
0555     return NULL;
0556 }
0557 
0558 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
0559                    struct mlxsw_sp_lpm_tree *lpm_tree)
0560 {
0561     char ralta_pl[MLXSW_REG_RALTA_LEN];
0562 
0563     mlxsw_reg_ralta_pack(ralta_pl, true,
0564                  (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
0565                  lpm_tree->id);
0566     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
0567 }
0568 
0569 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
0570                    struct mlxsw_sp_lpm_tree *lpm_tree)
0571 {
0572     char ralta_pl[MLXSW_REG_RALTA_LEN];
0573 
0574     mlxsw_reg_ralta_pack(ralta_pl, false,
0575                  (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
0576                  lpm_tree->id);
0577     mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
0578 }
0579 
0580 static int
0581 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
0582                   struct mlxsw_sp_prefix_usage *prefix_usage,
0583                   struct mlxsw_sp_lpm_tree *lpm_tree)
0584 {
0585     char ralst_pl[MLXSW_REG_RALST_LEN];
0586     u8 root_bin = 0;
0587     u8 prefix;
0588     u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
0589 
0590     mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
0591         root_bin = prefix;
0592 
0593     mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
0594     mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
0595         if (prefix == 0)
0596             continue;
0597         mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
0598                      MLXSW_REG_RALST_BIN_NO_CHILD);
0599         last_prefix = prefix;
0600     }
0601     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
0602 }
0603 
0604 static struct mlxsw_sp_lpm_tree *
0605 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
0606              struct mlxsw_sp_prefix_usage *prefix_usage,
0607              enum mlxsw_sp_l3proto proto)
0608 {
0609     struct mlxsw_sp_lpm_tree *lpm_tree;
0610     int err;
0611 
0612     lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
0613     if (!lpm_tree)
0614         return ERR_PTR(-EBUSY);
0615     lpm_tree->proto = proto;
0616     err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
0617     if (err)
0618         return ERR_PTR(err);
0619 
0620     err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
0621                         lpm_tree);
0622     if (err)
0623         goto err_left_struct_set;
0624     memcpy(&lpm_tree->prefix_usage, prefix_usage,
0625            sizeof(lpm_tree->prefix_usage));
0626     memset(&lpm_tree->prefix_ref_count, 0,
0627            sizeof(lpm_tree->prefix_ref_count));
0628     lpm_tree->ref_count = 1;
0629     return lpm_tree;
0630 
0631 err_left_struct_set:
0632     mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
0633     return ERR_PTR(err);
0634 }
0635 
0636 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
0637                       struct mlxsw_sp_lpm_tree *lpm_tree)
0638 {
0639     mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
0640 }
0641 
0642 static struct mlxsw_sp_lpm_tree *
0643 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
0644               struct mlxsw_sp_prefix_usage *prefix_usage,
0645               enum mlxsw_sp_l3proto proto)
0646 {
0647     struct mlxsw_sp_lpm_tree *lpm_tree;
0648     int i;
0649 
0650     for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
0651         lpm_tree = &mlxsw_sp->router->lpm.trees[i];
0652         if (lpm_tree->ref_count != 0 &&
0653             lpm_tree->proto == proto &&
0654             mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
0655                          prefix_usage)) {
0656             mlxsw_sp_lpm_tree_hold(lpm_tree);
0657             return lpm_tree;
0658         }
0659     }
0660     return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
0661 }
0662 
0663 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
0664 {
0665     lpm_tree->ref_count++;
0666 }
0667 
0668 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
0669                   struct mlxsw_sp_lpm_tree *lpm_tree)
0670 {
0671     if (--lpm_tree->ref_count == 0)
0672         mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
0673 }
0674 
0675 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
0676 
0677 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
0678 {
0679     struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
0680     struct mlxsw_sp_lpm_tree *lpm_tree;
0681     u64 max_trees;
0682     int err, i;
0683 
0684     if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
0685         return -EIO;
0686 
0687     max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
0688     mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
0689     mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
0690                          sizeof(struct mlxsw_sp_lpm_tree),
0691                          GFP_KERNEL);
0692     if (!mlxsw_sp->router->lpm.trees)
0693         return -ENOMEM;
0694 
0695     for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
0696         lpm_tree = &mlxsw_sp->router->lpm.trees[i];
0697         lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
0698     }
0699 
0700     lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
0701                      MLXSW_SP_L3_PROTO_IPV4);
0702     if (IS_ERR(lpm_tree)) {
0703         err = PTR_ERR(lpm_tree);
0704         goto err_ipv4_tree_get;
0705     }
0706     mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
0707 
0708     lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
0709                      MLXSW_SP_L3_PROTO_IPV6);
0710     if (IS_ERR(lpm_tree)) {
0711         err = PTR_ERR(lpm_tree);
0712         goto err_ipv6_tree_get;
0713     }
0714     mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
0715 
0716     return 0;
0717 
0718 err_ipv6_tree_get:
0719     lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
0720     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
0721 err_ipv4_tree_get:
0722     kfree(mlxsw_sp->router->lpm.trees);
0723     return err;
0724 }
0725 
0726 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
0727 {
0728     struct mlxsw_sp_lpm_tree *lpm_tree;
0729 
0730     lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
0731     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
0732 
0733     lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
0734     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
0735 
0736     kfree(mlxsw_sp->router->lpm.trees);
0737 }
0738 
0739 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
0740 {
0741     return !!vr->fib4 || !!vr->fib6 ||
0742            !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
0743            !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
0744 }
0745 
0746 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
0747 {
0748     struct mlxsw_sp_vr *vr;
0749     int i;
0750 
0751     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
0752         vr = &mlxsw_sp->router->vrs[i];
0753         if (!mlxsw_sp_vr_is_used(vr))
0754             return vr;
0755     }
0756     return NULL;
0757 }
0758 
0759 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
0760                      const struct mlxsw_sp_fib *fib, u8 tree_id)
0761 {
0762     char raltb_pl[MLXSW_REG_RALTB_LEN];
0763 
0764     mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
0765                  (enum mlxsw_reg_ralxx_protocol) fib->proto,
0766                  tree_id);
0767     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
0768 }
0769 
0770 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
0771                        const struct mlxsw_sp_fib *fib)
0772 {
0773     char raltb_pl[MLXSW_REG_RALTB_LEN];
0774 
0775     /* Bind to tree 0 which is default */
0776     mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
0777                  (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
0778     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
0779 }
0780 
0781 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
0782 {
0783     /* For our purpose, squash main, default and local tables into one */
0784     if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
0785         tb_id = RT_TABLE_MAIN;
0786     return tb_id;
0787 }
0788 
0789 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
0790                         u32 tb_id)
0791 {
0792     struct mlxsw_sp_vr *vr;
0793     int i;
0794 
0795     tb_id = mlxsw_sp_fix_tb_id(tb_id);
0796 
0797     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
0798         vr = &mlxsw_sp->router->vrs[i];
0799         if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
0800             return vr;
0801     }
0802     return NULL;
0803 }
0804 
0805 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
0806                 u16 *vr_id)
0807 {
0808     struct mlxsw_sp_vr *vr;
0809     int err = 0;
0810 
0811     mutex_lock(&mlxsw_sp->router->lock);
0812     vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
0813     if (!vr) {
0814         err = -ESRCH;
0815         goto out;
0816     }
0817     *vr_id = vr->id;
0818 out:
0819     mutex_unlock(&mlxsw_sp->router->lock);
0820     return err;
0821 }
0822 
0823 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
0824                         enum mlxsw_sp_l3proto proto)
0825 {
0826     switch (proto) {
0827     case MLXSW_SP_L3_PROTO_IPV4:
0828         return vr->fib4;
0829     case MLXSW_SP_L3_PROTO_IPV6:
0830         return vr->fib6;
0831     }
0832     return NULL;
0833 }
0834 
0835 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
0836                           u32 tb_id,
0837                           struct netlink_ext_ack *extack)
0838 {
0839     struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
0840     struct mlxsw_sp_fib *fib4;
0841     struct mlxsw_sp_fib *fib6;
0842     struct mlxsw_sp_vr *vr;
0843     int err;
0844 
0845     vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
0846     if (!vr) {
0847         NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
0848         return ERR_PTR(-EBUSY);
0849     }
0850     fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
0851     if (IS_ERR(fib4))
0852         return ERR_CAST(fib4);
0853     fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
0854     if (IS_ERR(fib6)) {
0855         err = PTR_ERR(fib6);
0856         goto err_fib6_create;
0857     }
0858     mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
0859                          MLXSW_SP_L3_PROTO_IPV4);
0860     if (IS_ERR(mr4_table)) {
0861         err = PTR_ERR(mr4_table);
0862         goto err_mr4_table_create;
0863     }
0864     mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
0865                          MLXSW_SP_L3_PROTO_IPV6);
0866     if (IS_ERR(mr6_table)) {
0867         err = PTR_ERR(mr6_table);
0868         goto err_mr6_table_create;
0869     }
0870 
0871     vr->fib4 = fib4;
0872     vr->fib6 = fib6;
0873     vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
0874     vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
0875     vr->tb_id = tb_id;
0876     return vr;
0877 
0878 err_mr6_table_create:
0879     mlxsw_sp_mr_table_destroy(mr4_table);
0880 err_mr4_table_create:
0881     mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
0882 err_fib6_create:
0883     mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
0884     return ERR_PTR(err);
0885 }
0886 
0887 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
0888                 struct mlxsw_sp_vr *vr)
0889 {
0890     mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
0891     vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
0892     mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
0893     vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
0894     mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
0895     vr->fib6 = NULL;
0896     mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
0897     vr->fib4 = NULL;
0898 }
0899 
0900 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
0901                        struct netlink_ext_ack *extack)
0902 {
0903     struct mlxsw_sp_vr *vr;
0904 
0905     tb_id = mlxsw_sp_fix_tb_id(tb_id);
0906     vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
0907     if (!vr)
0908         vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
0909     return vr;
0910 }
0911 
0912 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
0913 {
0914     if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
0915         list_empty(&vr->fib6->node_list) &&
0916         mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
0917         mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
0918         mlxsw_sp_vr_destroy(mlxsw_sp, vr);
0919 }
0920 
0921 static bool
0922 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
0923                     enum mlxsw_sp_l3proto proto, u8 tree_id)
0924 {
0925     struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
0926 
0927     if (!mlxsw_sp_vr_is_used(vr))
0928         return false;
0929     if (fib->lpm_tree->id == tree_id)
0930         return true;
0931     return false;
0932 }
0933 
0934 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
0935                     struct mlxsw_sp_fib *fib,
0936                     struct mlxsw_sp_lpm_tree *new_tree)
0937 {
0938     struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
0939     int err;
0940 
0941     fib->lpm_tree = new_tree;
0942     mlxsw_sp_lpm_tree_hold(new_tree);
0943     err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
0944     if (err)
0945         goto err_tree_bind;
0946     mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
0947     return 0;
0948 
0949 err_tree_bind:
0950     mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
0951     fib->lpm_tree = old_tree;
0952     return err;
0953 }
0954 
0955 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
0956                      struct mlxsw_sp_fib *fib,
0957                      struct mlxsw_sp_lpm_tree *new_tree)
0958 {
0959     enum mlxsw_sp_l3proto proto = fib->proto;
0960     struct mlxsw_sp_lpm_tree *old_tree;
0961     u8 old_id, new_id = new_tree->id;
0962     struct mlxsw_sp_vr *vr;
0963     int i, err;
0964 
0965     old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
0966     old_id = old_tree->id;
0967 
0968     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
0969         vr = &mlxsw_sp->router->vrs[i];
0970         if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
0971             continue;
0972         err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
0973                            mlxsw_sp_vr_fib(vr, proto),
0974                            new_tree);
0975         if (err)
0976             goto err_tree_replace;
0977     }
0978 
0979     memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
0980            sizeof(new_tree->prefix_ref_count));
0981     mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
0982     mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
0983 
0984     return 0;
0985 
0986 err_tree_replace:
0987     for (i--; i >= 0; i--) {
0988         if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
0989             continue;
0990         mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
0991                          mlxsw_sp_vr_fib(vr, proto),
0992                          old_tree);
0993     }
0994     return err;
0995 }
0996 
0997 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
0998 {
0999     struct mlxsw_sp_vr *vr;
1000     u64 max_vrs;
1001     int i;
1002 
1003     if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1004         return -EIO;
1005 
1006     max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1007     mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1008                     GFP_KERNEL);
1009     if (!mlxsw_sp->router->vrs)
1010         return -ENOMEM;
1011 
1012     for (i = 0; i < max_vrs; i++) {
1013         vr = &mlxsw_sp->router->vrs[i];
1014         vr->id = i;
1015     }
1016 
1017     return 0;
1018 }
1019 
1020 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1021 
1022 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1023 {
1024     /* At this stage we're guaranteed not to have new incoming
1025      * FIB notifications and the work queue is free from FIBs
1026      * sitting on top of mlxsw netdevs. However, we can still
1027      * have other FIBs queued. Flush the queue before flushing
1028      * the device's tables. No need for locks, as we're the only
1029      * writer.
1030      */
1031     mlxsw_core_flush_owq();
1032     mlxsw_sp_router_fib_flush(mlxsw_sp);
1033     kfree(mlxsw_sp->router->vrs);
1034 }
1035 
1036 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1037 {
1038     struct net_device *d;
1039     u32 tb_id;
1040 
1041     rcu_read_lock();
1042     d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1043     if (d)
1044         tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1045     else
1046         tb_id = RT_TABLE_MAIN;
1047     rcu_read_unlock();
1048 
1049     return tb_id;
1050 }
1051 
1052 static struct mlxsw_sp_rif *
1053 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1054             const struct mlxsw_sp_rif_params *params,
1055             struct netlink_ext_ack *extack);
1056 
1057 static struct mlxsw_sp_rif_ipip_lb *
1058 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1059                 enum mlxsw_sp_ipip_type ipipt,
1060                 struct net_device *ol_dev,
1061                 struct netlink_ext_ack *extack)
1062 {
1063     struct mlxsw_sp_rif_params_ipip_lb lb_params;
1064     const struct mlxsw_sp_ipip_ops *ipip_ops;
1065     struct mlxsw_sp_rif *rif;
1066 
1067     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1068     lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1069         .common.dev = ol_dev,
1070         .common.lag = false,
1071         .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1072     };
1073 
1074     rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1075     if (IS_ERR(rif))
1076         return ERR_CAST(rif);
1077     return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1078 }
1079 
1080 static struct mlxsw_sp_ipip_entry *
1081 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1082               enum mlxsw_sp_ipip_type ipipt,
1083               struct net_device *ol_dev)
1084 {
1085     const struct mlxsw_sp_ipip_ops *ipip_ops;
1086     struct mlxsw_sp_ipip_entry *ipip_entry;
1087     struct mlxsw_sp_ipip_entry *ret = NULL;
1088     int err;
1089 
1090     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1091     ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1092     if (!ipip_entry)
1093         return ERR_PTR(-ENOMEM);
1094 
1095     ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1096                                 ol_dev, NULL);
1097     if (IS_ERR(ipip_entry->ol_lb)) {
1098         ret = ERR_CAST(ipip_entry->ol_lb);
1099         goto err_ol_ipip_lb_create;
1100     }
1101 
1102     ipip_entry->ipipt = ipipt;
1103     ipip_entry->ol_dev = ol_dev;
1104     ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1105 
1106     err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1107     if (err) {
1108         ret = ERR_PTR(err);
1109         goto err_rem_ip_addr_set;
1110     }
1111 
1112     return ipip_entry;
1113 
1114 err_rem_ip_addr_set:
1115     mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1116 err_ol_ipip_lb_create:
1117     kfree(ipip_entry);
1118     return ret;
1119 }
1120 
1121 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1122                     struct mlxsw_sp_ipip_entry *ipip_entry)
1123 {
1124     const struct mlxsw_sp_ipip_ops *ipip_ops =
1125         mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1126 
1127     ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1128     mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1129     kfree(ipip_entry);
1130 }
1131 
1132 static bool
1133 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1134                   const enum mlxsw_sp_l3proto ul_proto,
1135                   union mlxsw_sp_l3addr saddr,
1136                   u32 ul_tb_id,
1137                   struct mlxsw_sp_ipip_entry *ipip_entry)
1138 {
1139     u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1140     enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1141     union mlxsw_sp_l3addr tun_saddr;
1142 
1143     if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1144         return false;
1145 
1146     tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1147     return tun_ul_tb_id == ul_tb_id &&
1148            mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1149 }
1150 
1151 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1152                          enum mlxsw_sp_ipip_type ipipt)
1153 {
1154     const struct mlxsw_sp_ipip_ops *ipip_ops;
1155 
1156     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1157 
1158     /* Not all tunnels require to increase the default pasing depth
1159      * (96 bytes).
1160      */
1161     if (ipip_ops->inc_parsing_depth)
1162         return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1163 
1164     return 0;
1165 }
1166 
1167 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1168                           enum mlxsw_sp_ipip_type ipipt)
1169 {
1170     const struct mlxsw_sp_ipip_ops *ipip_ops =
1171         mlxsw_sp->router->ipip_ops_arr[ipipt];
1172 
1173     if (ipip_ops->inc_parsing_depth)
1174         mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1175 }
1176 
1177 static int
1178 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1179                   struct mlxsw_sp_fib_entry *fib_entry,
1180                   struct mlxsw_sp_ipip_entry *ipip_entry)
1181 {
1182     u32 tunnel_index;
1183     int err;
1184 
1185     err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1186                   1, &tunnel_index);
1187     if (err)
1188         return err;
1189 
1190     err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1191                             ipip_entry->ipipt);
1192     if (err)
1193         goto err_parsing_depth_inc;
1194 
1195     ipip_entry->decap_fib_entry = fib_entry;
1196     fib_entry->decap.ipip_entry = ipip_entry;
1197     fib_entry->decap.tunnel_index = tunnel_index;
1198 
1199     return 0;
1200 
1201 err_parsing_depth_inc:
1202     mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1203                fib_entry->decap.tunnel_index);
1204     return err;
1205 }
1206 
1207 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1208                       struct mlxsw_sp_fib_entry *fib_entry)
1209 {
1210     enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1211 
1212     /* Unlink this node from the IPIP entry that it's the decap entry of. */
1213     fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1214     fib_entry->decap.ipip_entry = NULL;
1215     mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1216     mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1217                1, fib_entry->decap.tunnel_index);
1218 }
1219 
1220 static struct mlxsw_sp_fib_node *
1221 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1222              size_t addr_len, unsigned char prefix_len);
1223 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1224                      struct mlxsw_sp_fib_entry *fib_entry);
1225 
1226 static void
1227 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1228                  struct mlxsw_sp_ipip_entry *ipip_entry)
1229 {
1230     struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1231 
1232     mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1233     fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1234 
1235     mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1236 }
1237 
1238 static void
1239 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1240                   struct mlxsw_sp_ipip_entry *ipip_entry,
1241                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1242 {
1243     if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1244                       ipip_entry))
1245         return;
1246     decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1247 
1248     if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1249         mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1250 }
1251 
1252 static struct mlxsw_sp_fib_entry *
1253 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1254                      enum mlxsw_sp_l3proto proto,
1255                      const union mlxsw_sp_l3addr *addr,
1256                      enum mlxsw_sp_fib_entry_type type)
1257 {
1258     struct mlxsw_sp_fib_node *fib_node;
1259     unsigned char addr_prefix_len;
1260     struct mlxsw_sp_fib *fib;
1261     struct mlxsw_sp_vr *vr;
1262     const void *addrp;
1263     size_t addr_len;
1264     u32 addr4;
1265 
1266     vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1267     if (!vr)
1268         return NULL;
1269     fib = mlxsw_sp_vr_fib(vr, proto);
1270 
1271     switch (proto) {
1272     case MLXSW_SP_L3_PROTO_IPV4:
1273         addr4 = be32_to_cpu(addr->addr4);
1274         addrp = &addr4;
1275         addr_len = 4;
1276         addr_prefix_len = 32;
1277         break;
1278     case MLXSW_SP_L3_PROTO_IPV6:
1279         addrp = &addr->addr6;
1280         addr_len = 16;
1281         addr_prefix_len = 128;
1282         break;
1283     default:
1284         WARN_ON(1);
1285         return NULL;
1286     }
1287 
1288     fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1289                         addr_prefix_len);
1290     if (!fib_node || fib_node->fib_entry->type != type)
1291         return NULL;
1292 
1293     return fib_node->fib_entry;
1294 }
1295 
1296 /* Given an IPIP entry, find the corresponding decap route. */
1297 static struct mlxsw_sp_fib_entry *
1298 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1299                    struct mlxsw_sp_ipip_entry *ipip_entry)
1300 {
1301     static struct mlxsw_sp_fib_node *fib_node;
1302     const struct mlxsw_sp_ipip_ops *ipip_ops;
1303     unsigned char saddr_prefix_len;
1304     union mlxsw_sp_l3addr saddr;
1305     struct mlxsw_sp_fib *ul_fib;
1306     struct mlxsw_sp_vr *ul_vr;
1307     const void *saddrp;
1308     size_t saddr_len;
1309     u32 ul_tb_id;
1310     u32 saddr4;
1311 
1312     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1313 
1314     ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1315     ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1316     if (!ul_vr)
1317         return NULL;
1318 
1319     ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1320     saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1321                        ipip_entry->ol_dev);
1322 
1323     switch (ipip_ops->ul_proto) {
1324     case MLXSW_SP_L3_PROTO_IPV4:
1325         saddr4 = be32_to_cpu(saddr.addr4);
1326         saddrp = &saddr4;
1327         saddr_len = 4;
1328         saddr_prefix_len = 32;
1329         break;
1330     case MLXSW_SP_L3_PROTO_IPV6:
1331         saddrp = &saddr.addr6;
1332         saddr_len = 16;
1333         saddr_prefix_len = 128;
1334         break;
1335     default:
1336         WARN_ON(1);
1337         return NULL;
1338     }
1339 
1340     fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1341                         saddr_prefix_len);
1342     if (!fib_node ||
1343         fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1344         return NULL;
1345 
1346     return fib_node->fib_entry;
1347 }
1348 
1349 static struct mlxsw_sp_ipip_entry *
1350 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1351                enum mlxsw_sp_ipip_type ipipt,
1352                struct net_device *ol_dev)
1353 {
1354     struct mlxsw_sp_ipip_entry *ipip_entry;
1355 
1356     ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1357     if (IS_ERR(ipip_entry))
1358         return ipip_entry;
1359 
1360     list_add_tail(&ipip_entry->ipip_list_node,
1361               &mlxsw_sp->router->ipip_list);
1362 
1363     return ipip_entry;
1364 }
1365 
1366 static void
1367 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1368                 struct mlxsw_sp_ipip_entry *ipip_entry)
1369 {
1370     list_del(&ipip_entry->ipip_list_node);
1371     mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1372 }
1373 
1374 static bool
1375 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1376                   const struct net_device *ul_dev,
1377                   enum mlxsw_sp_l3proto ul_proto,
1378                   union mlxsw_sp_l3addr ul_dip,
1379                   struct mlxsw_sp_ipip_entry *ipip_entry)
1380 {
1381     u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1382     enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1383 
1384     if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1385         return false;
1386 
1387     return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1388                          ul_tb_id, ipip_entry);
1389 }
1390 
1391 /* Given decap parameters, find the corresponding IPIP entry. */
1392 static struct mlxsw_sp_ipip_entry *
1393 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1394                   enum mlxsw_sp_l3proto ul_proto,
1395                   union mlxsw_sp_l3addr ul_dip)
1396 {
1397     struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1398     struct net_device *ul_dev;
1399 
1400     rcu_read_lock();
1401 
1402     ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1403     if (!ul_dev)
1404         goto out_unlock;
1405 
1406     list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1407                 ipip_list_node)
1408         if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1409                               ul_proto, ul_dip,
1410                               ipip_entry))
1411             goto out_unlock;
1412 
1413     rcu_read_unlock();
1414 
1415     return NULL;
1416 
1417 out_unlock:
1418     rcu_read_unlock();
1419     return ipip_entry;
1420 }
1421 
1422 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1423                       const struct net_device *dev,
1424                       enum mlxsw_sp_ipip_type *p_type)
1425 {
1426     struct mlxsw_sp_router *router = mlxsw_sp->router;
1427     const struct mlxsw_sp_ipip_ops *ipip_ops;
1428     enum mlxsw_sp_ipip_type ipipt;
1429 
1430     for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1431         ipip_ops = router->ipip_ops_arr[ipipt];
1432         if (dev->type == ipip_ops->dev_type) {
1433             if (p_type)
1434                 *p_type = ipipt;
1435             return true;
1436         }
1437     }
1438     return false;
1439 }
1440 
1441 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1442                        const struct net_device *dev)
1443 {
1444     return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1445 }
1446 
1447 static struct mlxsw_sp_ipip_entry *
1448 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1449                    const struct net_device *ol_dev)
1450 {
1451     struct mlxsw_sp_ipip_entry *ipip_entry;
1452 
1453     list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1454                 ipip_list_node)
1455         if (ipip_entry->ol_dev == ol_dev)
1456             return ipip_entry;
1457 
1458     return NULL;
1459 }
1460 
1461 static struct mlxsw_sp_ipip_entry *
1462 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1463                    const struct net_device *ul_dev,
1464                    struct mlxsw_sp_ipip_entry *start)
1465 {
1466     struct mlxsw_sp_ipip_entry *ipip_entry;
1467 
1468     ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1469                     ipip_list_node);
1470     list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1471                      ipip_list_node) {
1472         struct net_device *ol_dev = ipip_entry->ol_dev;
1473         struct net_device *ipip_ul_dev;
1474 
1475         rcu_read_lock();
1476         ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1477         rcu_read_unlock();
1478 
1479         if (ipip_ul_dev == ul_dev)
1480             return ipip_entry;
1481     }
1482 
1483     return NULL;
1484 }
1485 
1486 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1487                        const struct net_device *dev)
1488 {
1489     return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1490 }
1491 
1492 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1493                         const struct net_device *ol_dev,
1494                         enum mlxsw_sp_ipip_type ipipt)
1495 {
1496     const struct mlxsw_sp_ipip_ops *ops
1497         = mlxsw_sp->router->ipip_ops_arr[ipipt];
1498 
1499     return ops->can_offload(mlxsw_sp, ol_dev);
1500 }
1501 
1502 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1503                         struct net_device *ol_dev)
1504 {
1505     enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1506     struct mlxsw_sp_ipip_entry *ipip_entry;
1507     enum mlxsw_sp_l3proto ul_proto;
1508     union mlxsw_sp_l3addr saddr;
1509     u32 ul_tb_id;
1510 
1511     mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1512     if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1513         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1514         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1515         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1516         if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1517                               saddr, ul_tb_id,
1518                               NULL)) {
1519             ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1520                                 ol_dev);
1521             if (IS_ERR(ipip_entry))
1522                 return PTR_ERR(ipip_entry);
1523         }
1524     }
1525 
1526     return 0;
1527 }
1528 
1529 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1530                            struct net_device *ol_dev)
1531 {
1532     struct mlxsw_sp_ipip_entry *ipip_entry;
1533 
1534     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1535     if (ipip_entry)
1536         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1537 }
1538 
1539 static void
1540 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1541                 struct mlxsw_sp_ipip_entry *ipip_entry)
1542 {
1543     struct mlxsw_sp_fib_entry *decap_fib_entry;
1544 
1545     decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1546     if (decap_fib_entry)
1547         mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1548                           decap_fib_entry);
1549 }
1550 
1551 static int
1552 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1553             u16 ul_rif_id, bool enable)
1554 {
1555     struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1556     enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1557     struct mlxsw_sp_rif *rif = &lb_rif->common;
1558     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1559     char ritr_pl[MLXSW_REG_RITR_LEN];
1560     struct in6_addr *saddr6;
1561     u32 saddr4;
1562 
1563     ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1564     switch (lb_cf.ul_protocol) {
1565     case MLXSW_SP_L3_PROTO_IPV4:
1566         saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1567         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1568                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1569         mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1570                            ipip_options, ul_vr_id,
1571                            ul_rif_id, saddr4,
1572                            lb_cf.okey);
1573         break;
1574 
1575     case MLXSW_SP_L3_PROTO_IPV6:
1576         saddr6 = &lb_cf.saddr.addr6;
1577         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1578                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1579         mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1580                            ipip_options, ul_vr_id,
1581                            ul_rif_id, saddr6,
1582                            lb_cf.okey);
1583         break;
1584     }
1585 
1586     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1587 }
1588 
1589 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1590                          struct net_device *ol_dev)
1591 {
1592     struct mlxsw_sp_ipip_entry *ipip_entry;
1593     struct mlxsw_sp_rif_ipip_lb *lb_rif;
1594     int err = 0;
1595 
1596     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1597     if (ipip_entry) {
1598         lb_rif = ipip_entry->ol_lb;
1599         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1600                           lb_rif->ul_rif_id, true);
1601         if (err)
1602             goto out;
1603         lb_rif->common.mtu = ol_dev->mtu;
1604     }
1605 
1606 out:
1607     return err;
1608 }
1609 
1610 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1611                         struct net_device *ol_dev)
1612 {
1613     struct mlxsw_sp_ipip_entry *ipip_entry;
1614 
1615     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1616     if (ipip_entry)
1617         mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1618 }
1619 
1620 static void
1621 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1622                   struct mlxsw_sp_ipip_entry *ipip_entry)
1623 {
1624     if (ipip_entry->decap_fib_entry)
1625         mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1626 }
1627 
1628 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1629                           struct net_device *ol_dev)
1630 {
1631     struct mlxsw_sp_ipip_entry *ipip_entry;
1632 
1633     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1634     if (ipip_entry)
1635         mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1636 }
1637 
1638 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1639                      struct mlxsw_sp_rif *old_rif,
1640                      struct mlxsw_sp_rif *new_rif);
1641 static int
1642 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1643                  struct mlxsw_sp_ipip_entry *ipip_entry,
1644                  bool keep_encap,
1645                  struct netlink_ext_ack *extack)
1646 {
1647     struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1648     struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1649 
1650     new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1651                              ipip_entry->ipipt,
1652                              ipip_entry->ol_dev,
1653                              extack);
1654     if (IS_ERR(new_lb_rif))
1655         return PTR_ERR(new_lb_rif);
1656     ipip_entry->ol_lb = new_lb_rif;
1657 
1658     if (keep_encap)
1659         mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1660                          &new_lb_rif->common);
1661 
1662     mlxsw_sp_rif_destroy(&old_lb_rif->common);
1663 
1664     return 0;
1665 }
1666 
1667 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1668                     struct mlxsw_sp_rif *rif);
1669 
1670 /**
1671  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1672  * @mlxsw_sp: mlxsw_sp.
1673  * @ipip_entry: IPIP entry.
1674  * @recreate_loopback: Recreates the associated loopback RIF.
1675  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1676  *              relevant when recreate_loopback is true.
1677  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1678  *                   is only relevant when recreate_loopback is false.
1679  * @extack: extack.
1680  *
1681  * Return: Non-zero value on failure.
1682  */
1683 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1684                     struct mlxsw_sp_ipip_entry *ipip_entry,
1685                     bool recreate_loopback,
1686                     bool keep_encap,
1687                     bool update_nexthops,
1688                     struct netlink_ext_ack *extack)
1689 {
1690     int err;
1691 
1692     /* RIFs can't be edited, so to update loopback, we need to destroy and
1693      * recreate it. That creates a window of opportunity where RALUE and
1694      * RATR registers end up referencing a RIF that's already gone. RATRs
1695      * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1696      * of RALUE, demote the decap route back.
1697      */
1698     if (ipip_entry->decap_fib_entry)
1699         mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1700 
1701     if (recreate_loopback) {
1702         err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1703                                keep_encap, extack);
1704         if (err)
1705             return err;
1706     } else if (update_nexthops) {
1707         mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1708                         &ipip_entry->ol_lb->common);
1709     }
1710 
1711     if (ipip_entry->ol_dev->flags & IFF_UP)
1712         mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1713 
1714     return 0;
1715 }
1716 
1717 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1718                         struct net_device *ol_dev,
1719                         struct netlink_ext_ack *extack)
1720 {
1721     struct mlxsw_sp_ipip_entry *ipip_entry =
1722         mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1723 
1724     if (!ipip_entry)
1725         return 0;
1726 
1727     return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1728                            true, false, false, extack);
1729 }
1730 
1731 static int
1732 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1733                      struct mlxsw_sp_ipip_entry *ipip_entry,
1734                      struct net_device *ul_dev,
1735                      bool *demote_this,
1736                      struct netlink_ext_ack *extack)
1737 {
1738     u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1739     enum mlxsw_sp_l3proto ul_proto;
1740     union mlxsw_sp_l3addr saddr;
1741 
1742     /* Moving underlay to a different VRF might cause local address
1743      * conflict, and the conflicting tunnels need to be demoted.
1744      */
1745     ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1746     saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1747     if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1748                          saddr, ul_tb_id,
1749                          ipip_entry)) {
1750         *demote_this = true;
1751         return 0;
1752     }
1753 
1754     return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1755                            true, true, false, extack);
1756 }
1757 
1758 static int
1759 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1760                     struct mlxsw_sp_ipip_entry *ipip_entry,
1761                     struct net_device *ul_dev)
1762 {
1763     return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1764                            false, false, true, NULL);
1765 }
1766 
1767 static int
1768 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1769                       struct mlxsw_sp_ipip_entry *ipip_entry,
1770                       struct net_device *ul_dev)
1771 {
1772     /* A down underlay device causes encapsulated packets to not be
1773      * forwarded, but decap still works. So refresh next hops without
1774      * touching anything else.
1775      */
1776     return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1777                            false, false, true, NULL);
1778 }
1779 
1780 static int
1781 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1782                     struct net_device *ol_dev,
1783                     struct netlink_ext_ack *extack)
1784 {
1785     const struct mlxsw_sp_ipip_ops *ipip_ops;
1786     struct mlxsw_sp_ipip_entry *ipip_entry;
1787     int err;
1788 
1789     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1790     if (!ipip_entry)
1791         /* A change might make a tunnel eligible for offloading, but
1792          * that is currently not implemented. What falls to slow path
1793          * stays there.
1794          */
1795         return 0;
1796 
1797     /* A change might make a tunnel not eligible for offloading. */
1798     if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1799                          ipip_entry->ipipt)) {
1800         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1801         return 0;
1802     }
1803 
1804     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1805     err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1806     return err;
1807 }
1808 
1809 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1810                        struct mlxsw_sp_ipip_entry *ipip_entry)
1811 {
1812     struct net_device *ol_dev = ipip_entry->ol_dev;
1813 
1814     if (ol_dev->flags & IFF_UP)
1815         mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1816     mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1817 }
1818 
1819 /* The configuration where several tunnels have the same local address in the
1820  * same underlay table needs special treatment in the HW. That is currently not
1821  * implemented in the driver. This function finds and demotes the first tunnel
1822  * with a given source address, except the one passed in in the argument
1823  * `except'.
1824  */
1825 bool
1826 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1827                      enum mlxsw_sp_l3proto ul_proto,
1828                      union mlxsw_sp_l3addr saddr,
1829                      u32 ul_tb_id,
1830                      const struct mlxsw_sp_ipip_entry *except)
1831 {
1832     struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1833 
1834     list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1835                  ipip_list_node) {
1836         if (ipip_entry != except &&
1837             mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1838                               ul_tb_id, ipip_entry)) {
1839             mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1840             return true;
1841         }
1842     }
1843 
1844     return false;
1845 }
1846 
1847 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1848                              struct net_device *ul_dev)
1849 {
1850     struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1851 
1852     list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1853                  ipip_list_node) {
1854         struct net_device *ol_dev = ipip_entry->ol_dev;
1855         struct net_device *ipip_ul_dev;
1856 
1857         rcu_read_lock();
1858         ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1859         rcu_read_unlock();
1860         if (ipip_ul_dev == ul_dev)
1861             mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1862     }
1863 }
1864 
1865 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1866                         struct net_device *ol_dev,
1867                         unsigned long event,
1868                         struct netdev_notifier_info *info)
1869 {
1870     struct netdev_notifier_changeupper_info *chup;
1871     struct netlink_ext_ack *extack;
1872     int err = 0;
1873 
1874     switch (event) {
1875     case NETDEV_REGISTER:
1876         err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1877         break;
1878     case NETDEV_UNREGISTER:
1879         mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1880         break;
1881     case NETDEV_UP:
1882         mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1883         break;
1884     case NETDEV_DOWN:
1885         mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1886         break;
1887     case NETDEV_CHANGEUPPER:
1888         chup = container_of(info, typeof(*chup), info);
1889         extack = info->extack;
1890         if (netif_is_l3_master(chup->upper_dev))
1891             err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1892                                    ol_dev,
1893                                    extack);
1894         break;
1895     case NETDEV_CHANGE:
1896         extack = info->extack;
1897         err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1898                                   ol_dev, extack);
1899         break;
1900     case NETDEV_CHANGEMTU:
1901         err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1902         break;
1903     }
1904     return err;
1905 }
1906 
1907 static int
1908 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1909                    struct mlxsw_sp_ipip_entry *ipip_entry,
1910                    struct net_device *ul_dev,
1911                    bool *demote_this,
1912                    unsigned long event,
1913                    struct netdev_notifier_info *info)
1914 {
1915     struct netdev_notifier_changeupper_info *chup;
1916     struct netlink_ext_ack *extack;
1917 
1918     switch (event) {
1919     case NETDEV_CHANGEUPPER:
1920         chup = container_of(info, typeof(*chup), info);
1921         extack = info->extack;
1922         if (netif_is_l3_master(chup->upper_dev))
1923             return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1924                                     ipip_entry,
1925                                     ul_dev,
1926                                     demote_this,
1927                                     extack);
1928         break;
1929 
1930     case NETDEV_UP:
1931         return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1932                                ul_dev);
1933     case NETDEV_DOWN:
1934         return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1935                                  ipip_entry,
1936                                  ul_dev);
1937     }
1938     return 0;
1939 }
1940 
1941 static int
1942 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1943                  struct net_device *ul_dev,
1944                  unsigned long event,
1945                  struct netdev_notifier_info *info)
1946 {
1947     struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1948     int err;
1949 
1950     while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1951                                 ul_dev,
1952                                 ipip_entry))) {
1953         struct mlxsw_sp_ipip_entry *prev;
1954         bool demote_this = false;
1955 
1956         err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1957                              ul_dev, &demote_this,
1958                              event, info);
1959         if (err) {
1960             mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1961                                  ul_dev);
1962             return err;
1963         }
1964 
1965         if (demote_this) {
1966             if (list_is_first(&ipip_entry->ipip_list_node,
1967                       &mlxsw_sp->router->ipip_list))
1968                 prev = NULL;
1969             else
1970                 /* This can't be cached from previous iteration,
1971                  * because that entry could be gone now.
1972                  */
1973                 prev = list_prev_entry(ipip_entry,
1974                                ipip_list_node);
1975             mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1976             ipip_entry = prev;
1977         }
1978     }
1979 
1980     return 0;
1981 }
1982 
1983 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1984                       enum mlxsw_sp_l3proto ul_proto,
1985                       const union mlxsw_sp_l3addr *ul_sip,
1986                       u32 tunnel_index)
1987 {
1988     enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1989     struct mlxsw_sp_router *router = mlxsw_sp->router;
1990     struct mlxsw_sp_fib_entry *fib_entry;
1991     int err = 0;
1992 
1993     mutex_lock(&mlxsw_sp->router->lock);
1994 
1995     if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
1996         err = -EINVAL;
1997         goto out;
1998     }
1999 
2000     router->nve_decap_config.ul_tb_id = ul_tb_id;
2001     router->nve_decap_config.tunnel_index = tunnel_index;
2002     router->nve_decap_config.ul_proto = ul_proto;
2003     router->nve_decap_config.ul_sip = *ul_sip;
2004     router->nve_decap_config.valid = true;
2005 
2006     /* It is valid to create a tunnel with a local IP and only later
2007      * assign this IP address to a local interface
2008      */
2009     fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2010                              ul_proto, ul_sip,
2011                              type);
2012     if (!fib_entry)
2013         goto out;
2014 
2015     fib_entry->decap.tunnel_index = tunnel_index;
2016     fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2017 
2018     err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2019     if (err)
2020         goto err_fib_entry_update;
2021 
2022     goto out;
2023 
2024 err_fib_entry_update:
2025     fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2026     mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2027 out:
2028     mutex_unlock(&mlxsw_sp->router->lock);
2029     return err;
2030 }
2031 
2032 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2033                       enum mlxsw_sp_l3proto ul_proto,
2034                       const union mlxsw_sp_l3addr *ul_sip)
2035 {
2036     enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2037     struct mlxsw_sp_router *router = mlxsw_sp->router;
2038     struct mlxsw_sp_fib_entry *fib_entry;
2039 
2040     mutex_lock(&mlxsw_sp->router->lock);
2041 
2042     if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2043         goto out;
2044 
2045     router->nve_decap_config.valid = false;
2046 
2047     fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2048                              ul_proto, ul_sip,
2049                              type);
2050     if (!fib_entry)
2051         goto out;
2052 
2053     fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2054     mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2055 out:
2056     mutex_unlock(&mlxsw_sp->router->lock);
2057 }
2058 
2059 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2060                      u32 ul_tb_id,
2061                      enum mlxsw_sp_l3proto ul_proto,
2062                      const union mlxsw_sp_l3addr *ul_sip)
2063 {
2064     struct mlxsw_sp_router *router = mlxsw_sp->router;
2065 
2066     return router->nve_decap_config.valid &&
2067            router->nve_decap_config.ul_tb_id == ul_tb_id &&
2068            router->nve_decap_config.ul_proto == ul_proto &&
2069            !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2070                sizeof(*ul_sip));
2071 }
2072 
2073 struct mlxsw_sp_neigh_key {
2074     struct neighbour *n;
2075 };
2076 
2077 struct mlxsw_sp_neigh_entry {
2078     struct list_head rif_list_node;
2079     struct rhash_head ht_node;
2080     struct mlxsw_sp_neigh_key key;
2081     u16 rif;
2082     bool connected;
2083     unsigned char ha[ETH_ALEN];
2084     struct list_head nexthop_list; /* list of nexthops using
2085                     * this neigh entry
2086                     */
2087     struct list_head nexthop_neighs_list_node;
2088     unsigned int counter_index;
2089     bool counter_valid;
2090 };
2091 
2092 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2093     .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2094     .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2095     .key_len = sizeof(struct mlxsw_sp_neigh_key),
2096 };
2097 
2098 struct mlxsw_sp_neigh_entry *
2099 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2100             struct mlxsw_sp_neigh_entry *neigh_entry)
2101 {
2102     if (!neigh_entry) {
2103         if (list_empty(&rif->neigh_list))
2104             return NULL;
2105         else
2106             return list_first_entry(&rif->neigh_list,
2107                         typeof(*neigh_entry),
2108                         rif_list_node);
2109     }
2110     if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2111         return NULL;
2112     return list_next_entry(neigh_entry, rif_list_node);
2113 }
2114 
2115 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2116 {
2117     return neigh_entry->key.n->tbl->family;
2118 }
2119 
2120 unsigned char *
2121 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2122 {
2123     return neigh_entry->ha;
2124 }
2125 
2126 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2127 {
2128     struct neighbour *n;
2129 
2130     n = neigh_entry->key.n;
2131     return ntohl(*((__be32 *) n->primary_key));
2132 }
2133 
2134 struct in6_addr *
2135 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2136 {
2137     struct neighbour *n;
2138 
2139     n = neigh_entry->key.n;
2140     return (struct in6_addr *) &n->primary_key;
2141 }
2142 
2143 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2144                    struct mlxsw_sp_neigh_entry *neigh_entry,
2145                    u64 *p_counter)
2146 {
2147     if (!neigh_entry->counter_valid)
2148         return -EINVAL;
2149 
2150     return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2151                      p_counter, NULL);
2152 }
2153 
2154 static struct mlxsw_sp_neigh_entry *
2155 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2156                u16 rif)
2157 {
2158     struct mlxsw_sp_neigh_entry *neigh_entry;
2159 
2160     neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2161     if (!neigh_entry)
2162         return NULL;
2163 
2164     neigh_entry->key.n = n;
2165     neigh_entry->rif = rif;
2166     INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2167 
2168     return neigh_entry;
2169 }
2170 
2171 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2172 {
2173     kfree(neigh_entry);
2174 }
2175 
2176 static int
2177 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2178                 struct mlxsw_sp_neigh_entry *neigh_entry)
2179 {
2180     return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2181                       &neigh_entry->ht_node,
2182                       mlxsw_sp_neigh_ht_params);
2183 }
2184 
2185 static void
2186 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2187                 struct mlxsw_sp_neigh_entry *neigh_entry)
2188 {
2189     rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2190                    &neigh_entry->ht_node,
2191                    mlxsw_sp_neigh_ht_params);
2192 }
2193 
2194 static bool
2195 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2196                     struct mlxsw_sp_neigh_entry *neigh_entry)
2197 {
2198     struct devlink *devlink;
2199     const char *table_name;
2200 
2201     switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2202     case AF_INET:
2203         table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2204         break;
2205     case AF_INET6:
2206         table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2207         break;
2208     default:
2209         WARN_ON(1);
2210         return false;
2211     }
2212 
2213     devlink = priv_to_devlink(mlxsw_sp->core);
2214     return devlink_dpipe_table_counter_enabled(devlink, table_name);
2215 }
2216 
2217 static void
2218 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2219                  struct mlxsw_sp_neigh_entry *neigh_entry)
2220 {
2221     if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2222         return;
2223 
2224     if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2225         return;
2226 
2227     neigh_entry->counter_valid = true;
2228 }
2229 
2230 static void
2231 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2232                 struct mlxsw_sp_neigh_entry *neigh_entry)
2233 {
2234     if (!neigh_entry->counter_valid)
2235         return;
2236     mlxsw_sp_flow_counter_free(mlxsw_sp,
2237                    neigh_entry->counter_index);
2238     neigh_entry->counter_valid = false;
2239 }
2240 
2241 static struct mlxsw_sp_neigh_entry *
2242 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2243 {
2244     struct mlxsw_sp_neigh_entry *neigh_entry;
2245     struct mlxsw_sp_rif *rif;
2246     int err;
2247 
2248     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2249     if (!rif)
2250         return ERR_PTR(-EINVAL);
2251 
2252     neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2253     if (!neigh_entry)
2254         return ERR_PTR(-ENOMEM);
2255 
2256     err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2257     if (err)
2258         goto err_neigh_entry_insert;
2259 
2260     mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2261     atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2262     list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2263 
2264     return neigh_entry;
2265 
2266 err_neigh_entry_insert:
2267     mlxsw_sp_neigh_entry_free(neigh_entry);
2268     return ERR_PTR(err);
2269 }
2270 
2271 static void
2272 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2273                  struct mlxsw_sp_neigh_entry *neigh_entry)
2274 {
2275     list_del(&neigh_entry->rif_list_node);
2276     atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2277     mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2278     mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2279     mlxsw_sp_neigh_entry_free(neigh_entry);
2280 }
2281 
2282 static struct mlxsw_sp_neigh_entry *
2283 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2284 {
2285     struct mlxsw_sp_neigh_key key;
2286 
2287     key.n = n;
2288     return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2289                       &key, mlxsw_sp_neigh_ht_params);
2290 }
2291 
2292 static void
2293 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2294 {
2295     unsigned long interval;
2296 
2297 #if IS_ENABLED(CONFIG_IPV6)
2298     interval = min_t(unsigned long,
2299              NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2300              NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2301 #else
2302     interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2303 #endif
2304     mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2305 }
2306 
2307 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2308                            char *rauhtd_pl,
2309                            int ent_index)
2310 {
2311     u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2312     struct net_device *dev;
2313     struct neighbour *n;
2314     __be32 dipn;
2315     u32 dip;
2316     u16 rif;
2317 
2318     mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2319 
2320     if (WARN_ON_ONCE(rif >= max_rifs))
2321         return;
2322     if (!mlxsw_sp->router->rifs[rif]) {
2323         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2324         return;
2325     }
2326 
2327     dipn = htonl(dip);
2328     dev = mlxsw_sp->router->rifs[rif]->dev;
2329     n = neigh_lookup(&arp_tbl, &dipn, dev);
2330     if (!n)
2331         return;
2332 
2333     netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2334     neigh_event_send(n, NULL);
2335     neigh_release(n);
2336 }
2337 
2338 #if IS_ENABLED(CONFIG_IPV6)
2339 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2340                            char *rauhtd_pl,
2341                            int rec_index)
2342 {
2343     struct net_device *dev;
2344     struct neighbour *n;
2345     struct in6_addr dip;
2346     u16 rif;
2347 
2348     mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2349                      (char *) &dip);
2350 
2351     if (!mlxsw_sp->router->rifs[rif]) {
2352         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2353         return;
2354     }
2355 
2356     dev = mlxsw_sp->router->rifs[rif]->dev;
2357     n = neigh_lookup(&nd_tbl, &dip, dev);
2358     if (!n)
2359         return;
2360 
2361     netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2362     neigh_event_send(n, NULL);
2363     neigh_release(n);
2364 }
2365 #else
2366 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2367                            char *rauhtd_pl,
2368                            int rec_index)
2369 {
2370 }
2371 #endif
2372 
2373 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2374                            char *rauhtd_pl,
2375                            int rec_index)
2376 {
2377     u8 num_entries;
2378     int i;
2379 
2380     num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2381                                 rec_index);
2382     /* Hardware starts counting at 0, so add 1. */
2383     num_entries++;
2384 
2385     /* Each record consists of several neighbour entries. */
2386     for (i = 0; i < num_entries; i++) {
2387         int ent_index;
2388 
2389         ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2390         mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2391                                ent_index);
2392     }
2393 
2394 }
2395 
2396 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2397                            char *rauhtd_pl,
2398                            int rec_index)
2399 {
2400     /* One record contains one entry. */
2401     mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2402                            rec_index);
2403 }
2404 
2405 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2406                           char *rauhtd_pl, int rec_index)
2407 {
2408     switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2409     case MLXSW_REG_RAUHTD_TYPE_IPV4:
2410         mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2411                                rec_index);
2412         break;
2413     case MLXSW_REG_RAUHTD_TYPE_IPV6:
2414         mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2415                                rec_index);
2416         break;
2417     }
2418 }
2419 
2420 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2421 {
2422     u8 num_rec, last_rec_index, num_entries;
2423 
2424     num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2425     last_rec_index = num_rec - 1;
2426 
2427     if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2428         return false;
2429     if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2430         MLXSW_REG_RAUHTD_TYPE_IPV6)
2431         return true;
2432 
2433     num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2434                                 last_rec_index);
2435     if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2436         return true;
2437     return false;
2438 }
2439 
2440 static int
2441 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2442                        char *rauhtd_pl,
2443                        enum mlxsw_reg_rauhtd_type type)
2444 {
2445     int i, num_rec;
2446     int err;
2447 
2448     /* Ensure the RIF we read from the device does not change mid-dump. */
2449     mutex_lock(&mlxsw_sp->router->lock);
2450     do {
2451         mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2452         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2453                       rauhtd_pl);
2454         if (err) {
2455             dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2456             break;
2457         }
2458         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2459         for (i = 0; i < num_rec; i++)
2460             mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2461                               i);
2462     } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2463     mutex_unlock(&mlxsw_sp->router->lock);
2464 
2465     return err;
2466 }
2467 
2468 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2469 {
2470     enum mlxsw_reg_rauhtd_type type;
2471     char *rauhtd_pl;
2472     int err;
2473 
2474     if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2475         return 0;
2476 
2477     rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2478     if (!rauhtd_pl)
2479         return -ENOMEM;
2480 
2481     type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2482     err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2483     if (err)
2484         goto out;
2485 
2486     type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2487     err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2488 out:
2489     kfree(rauhtd_pl);
2490     return err;
2491 }
2492 
2493 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2494 {
2495     struct mlxsw_sp_neigh_entry *neigh_entry;
2496 
2497     mutex_lock(&mlxsw_sp->router->lock);
2498     list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2499                 nexthop_neighs_list_node)
2500         /* If this neigh have nexthops, make the kernel think this neigh
2501          * is active regardless of the traffic.
2502          */
2503         neigh_event_send(neigh_entry->key.n, NULL);
2504     mutex_unlock(&mlxsw_sp->router->lock);
2505 }
2506 
2507 static void
2508 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2509 {
2510     unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2511 
2512     mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2513                    msecs_to_jiffies(interval));
2514 }
2515 
2516 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2517 {
2518     struct mlxsw_sp_router *router;
2519     int err;
2520 
2521     router = container_of(work, struct mlxsw_sp_router,
2522                   neighs_update.dw.work);
2523     err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2524     if (err)
2525         dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2526 
2527     mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2528 
2529     mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2530 }
2531 
2532 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2533 {
2534     struct mlxsw_sp_neigh_entry *neigh_entry;
2535     struct mlxsw_sp_router *router;
2536 
2537     router = container_of(work, struct mlxsw_sp_router,
2538                   nexthop_probe_dw.work);
2539     /* Iterate over nexthop neighbours, find those who are unresolved and
2540      * send arp on them. This solves the chicken-egg problem when
2541      * the nexthop wouldn't get offloaded until the neighbor is resolved
2542      * but it wouldn't get resolved ever in case traffic is flowing in HW
2543      * using different nexthop.
2544      */
2545     mutex_lock(&router->lock);
2546     list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2547                 nexthop_neighs_list_node)
2548         if (!neigh_entry->connected)
2549             neigh_event_send(neigh_entry->key.n, NULL);
2550     mutex_unlock(&router->lock);
2551 
2552     mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2553                    MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2554 }
2555 
2556 static void
2557 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2558                   struct mlxsw_sp_neigh_entry *neigh_entry,
2559                   bool removing, bool dead);
2560 
2561 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2562 {
2563     return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2564             MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2565 }
2566 
2567 static int
2568 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2569                 struct mlxsw_sp_neigh_entry *neigh_entry,
2570                 enum mlxsw_reg_rauht_op op)
2571 {
2572     struct neighbour *n = neigh_entry->key.n;
2573     u32 dip = ntohl(*((__be32 *) n->primary_key));
2574     char rauht_pl[MLXSW_REG_RAUHT_LEN];
2575 
2576     mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2577                   dip);
2578     if (neigh_entry->counter_valid)
2579         mlxsw_reg_rauht_pack_counter(rauht_pl,
2580                          neigh_entry->counter_index);
2581     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2582 }
2583 
2584 static int
2585 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2586                 struct mlxsw_sp_neigh_entry *neigh_entry,
2587                 enum mlxsw_reg_rauht_op op)
2588 {
2589     struct neighbour *n = neigh_entry->key.n;
2590     char rauht_pl[MLXSW_REG_RAUHT_LEN];
2591     const char *dip = n->primary_key;
2592 
2593     mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2594                   dip);
2595     if (neigh_entry->counter_valid)
2596         mlxsw_reg_rauht_pack_counter(rauht_pl,
2597                          neigh_entry->counter_index);
2598     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2599 }
2600 
2601 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2602 {
2603     struct neighbour *n = neigh_entry->key.n;
2604 
2605     /* Packets with a link-local destination address are trapped
2606      * after LPM lookup and never reach the neighbour table, so
2607      * there is no need to program such neighbours to the device.
2608      */
2609     if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2610         IPV6_ADDR_LINKLOCAL)
2611         return true;
2612     return false;
2613 }
2614 
2615 static void
2616 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2617                 struct mlxsw_sp_neigh_entry *neigh_entry,
2618                 bool adding)
2619 {
2620     enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2621     int err;
2622 
2623     if (!adding && !neigh_entry->connected)
2624         return;
2625     neigh_entry->connected = adding;
2626     if (neigh_entry->key.n->tbl->family == AF_INET) {
2627         err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2628                               op);
2629         if (err)
2630             return;
2631     } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2632         if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2633             return;
2634         err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2635                               op);
2636         if (err)
2637             return;
2638     } else {
2639         WARN_ON_ONCE(1);
2640         return;
2641     }
2642 
2643     if (adding)
2644         neigh_entry->key.n->flags |= NTF_OFFLOADED;
2645     else
2646         neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2647 }
2648 
2649 void
2650 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2651                     struct mlxsw_sp_neigh_entry *neigh_entry,
2652                     bool adding)
2653 {
2654     if (adding)
2655         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2656     else
2657         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2658     mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2659 }
2660 
2661 struct mlxsw_sp_netevent_work {
2662     struct work_struct work;
2663     struct mlxsw_sp *mlxsw_sp;
2664     struct neighbour *n;
2665 };
2666 
2667 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2668 {
2669     struct mlxsw_sp_netevent_work *net_work =
2670         container_of(work, struct mlxsw_sp_netevent_work, work);
2671     struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2672     struct mlxsw_sp_neigh_entry *neigh_entry;
2673     struct neighbour *n = net_work->n;
2674     unsigned char ha[ETH_ALEN];
2675     bool entry_connected;
2676     u8 nud_state, dead;
2677 
2678     /* If these parameters are changed after we release the lock,
2679      * then we are guaranteed to receive another event letting us
2680      * know about it.
2681      */
2682     read_lock_bh(&n->lock);
2683     memcpy(ha, n->ha, ETH_ALEN);
2684     nud_state = n->nud_state;
2685     dead = n->dead;
2686     read_unlock_bh(&n->lock);
2687 
2688     mutex_lock(&mlxsw_sp->router->lock);
2689     mlxsw_sp_span_respin(mlxsw_sp);
2690 
2691     entry_connected = nud_state & NUD_VALID && !dead;
2692     neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2693     if (!entry_connected && !neigh_entry)
2694         goto out;
2695     if (!neigh_entry) {
2696         neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2697         if (IS_ERR(neigh_entry))
2698             goto out;
2699     }
2700 
2701     if (neigh_entry->connected && entry_connected &&
2702         !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2703         goto out;
2704 
2705     memcpy(neigh_entry->ha, ha, ETH_ALEN);
2706     mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2707     mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2708                       dead);
2709 
2710     if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2711         mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2712 
2713 out:
2714     mutex_unlock(&mlxsw_sp->router->lock);
2715     neigh_release(n);
2716     kfree(net_work);
2717 }
2718 
2719 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2720 
2721 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2722 {
2723     struct mlxsw_sp_netevent_work *net_work =
2724         container_of(work, struct mlxsw_sp_netevent_work, work);
2725     struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2726 
2727     mlxsw_sp_mp_hash_init(mlxsw_sp);
2728     kfree(net_work);
2729 }
2730 
2731 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2732 
2733 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2734 {
2735     struct mlxsw_sp_netevent_work *net_work =
2736         container_of(work, struct mlxsw_sp_netevent_work, work);
2737     struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2738 
2739     __mlxsw_sp_router_init(mlxsw_sp);
2740     kfree(net_work);
2741 }
2742 
2743 static int mlxsw_sp_router_schedule_work(struct net *net,
2744                      struct notifier_block *nb,
2745                      void (*cb)(struct work_struct *))
2746 {
2747     struct mlxsw_sp_netevent_work *net_work;
2748     struct mlxsw_sp_router *router;
2749 
2750     router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2751     if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2752         return NOTIFY_DONE;
2753 
2754     net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2755     if (!net_work)
2756         return NOTIFY_BAD;
2757 
2758     INIT_WORK(&net_work->work, cb);
2759     net_work->mlxsw_sp = router->mlxsw_sp;
2760     mlxsw_core_schedule_work(&net_work->work);
2761     return NOTIFY_DONE;
2762 }
2763 
2764 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2765                       unsigned long event, void *ptr)
2766 {
2767     struct mlxsw_sp_netevent_work *net_work;
2768     struct mlxsw_sp_port *mlxsw_sp_port;
2769     struct mlxsw_sp *mlxsw_sp;
2770     unsigned long interval;
2771     struct neigh_parms *p;
2772     struct neighbour *n;
2773 
2774     switch (event) {
2775     case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2776         p = ptr;
2777 
2778         /* We don't care about changes in the default table. */
2779         if (!p->dev || (p->tbl->family != AF_INET &&
2780                 p->tbl->family != AF_INET6))
2781             return NOTIFY_DONE;
2782 
2783         /* We are in atomic context and can't take RTNL mutex,
2784          * so use RCU variant to walk the device chain.
2785          */
2786         mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2787         if (!mlxsw_sp_port)
2788             return NOTIFY_DONE;
2789 
2790         mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2791         interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2792         mlxsw_sp->router->neighs_update.interval = interval;
2793 
2794         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2795         break;
2796     case NETEVENT_NEIGH_UPDATE:
2797         n = ptr;
2798 
2799         if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2800             return NOTIFY_DONE;
2801 
2802         mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2803         if (!mlxsw_sp_port)
2804             return NOTIFY_DONE;
2805 
2806         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2807         if (!net_work) {
2808             mlxsw_sp_port_dev_put(mlxsw_sp_port);
2809             return NOTIFY_BAD;
2810         }
2811 
2812         INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2813         net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2814         net_work->n = n;
2815 
2816         /* Take a reference to ensure the neighbour won't be
2817          * destructed until we drop the reference in delayed
2818          * work.
2819          */
2820         neigh_clone(n);
2821         mlxsw_core_schedule_work(&net_work->work);
2822         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2823         break;
2824     case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2825     case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2826         return mlxsw_sp_router_schedule_work(ptr, nb,
2827                 mlxsw_sp_router_mp_hash_event_work);
2828 
2829     case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2830         return mlxsw_sp_router_schedule_work(ptr, nb,
2831                 mlxsw_sp_router_update_priority_work);
2832     }
2833 
2834     return NOTIFY_DONE;
2835 }
2836 
2837 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2838 {
2839     int err;
2840 
2841     err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2842                   &mlxsw_sp_neigh_ht_params);
2843     if (err)
2844         return err;
2845 
2846     /* Initialize the polling interval according to the default
2847      * table.
2848      */
2849     mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2850 
2851     /* Create the delayed works for the activity_update */
2852     INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2853               mlxsw_sp_router_neighs_update_work);
2854     INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2855               mlxsw_sp_router_probe_unresolved_nexthops);
2856     atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2857     mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2858     mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2859     return 0;
2860 }
2861 
2862 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2863 {
2864     cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2865     cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2866     rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2867 }
2868 
2869 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2870                      struct mlxsw_sp_rif *rif)
2871 {
2872     struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2873 
2874     list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2875                  rif_list_node) {
2876         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2877         mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2878     }
2879 }
2880 
2881 enum mlxsw_sp_nexthop_type {
2882     MLXSW_SP_NEXTHOP_TYPE_ETH,
2883     MLXSW_SP_NEXTHOP_TYPE_IPIP,
2884 };
2885 
2886 enum mlxsw_sp_nexthop_action {
2887     /* Nexthop forwards packets to an egress RIF */
2888     MLXSW_SP_NEXTHOP_ACTION_FORWARD,
2889     /* Nexthop discards packets */
2890     MLXSW_SP_NEXTHOP_ACTION_DISCARD,
2891     /* Nexthop traps packets */
2892     MLXSW_SP_NEXTHOP_ACTION_TRAP,
2893 };
2894 
2895 struct mlxsw_sp_nexthop_key {
2896     struct fib_nh *fib_nh;
2897 };
2898 
2899 struct mlxsw_sp_nexthop {
2900     struct list_head neigh_list_node; /* member of neigh entry list */
2901     struct list_head rif_list_node;
2902     struct list_head router_list_node;
2903     struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2904                            * this nexthop belongs to
2905                            */
2906     struct rhash_head ht_node;
2907     struct neigh_table *neigh_tbl;
2908     struct mlxsw_sp_nexthop_key key;
2909     unsigned char gw_addr[sizeof(struct in6_addr)];
2910     int ifindex;
2911     int nh_weight;
2912     int norm_nh_weight;
2913     int num_adj_entries;
2914     struct mlxsw_sp_rif *rif;
2915     u8 should_offload:1, /* set indicates this nexthop should be written
2916                   * to the adjacency table.
2917                   */
2918        offloaded:1, /* set indicates this nexthop was written to the
2919              * adjacency table.
2920              */
2921        update:1; /* set indicates this nexthop should be updated in the
2922               * adjacency table (f.e., its MAC changed).
2923               */
2924     enum mlxsw_sp_nexthop_action action;
2925     enum mlxsw_sp_nexthop_type type;
2926     union {
2927         struct mlxsw_sp_neigh_entry *neigh_entry;
2928         struct mlxsw_sp_ipip_entry *ipip_entry;
2929     };
2930     unsigned int counter_index;
2931     bool counter_valid;
2932 };
2933 
2934 enum mlxsw_sp_nexthop_group_type {
2935     MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2936     MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2937     MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2938 };
2939 
2940 struct mlxsw_sp_nexthop_group_info {
2941     struct mlxsw_sp_nexthop_group *nh_grp;
2942     u32 adj_index;
2943     u16 ecmp_size;
2944     u16 count;
2945     int sum_norm_weight;
2946     u8 adj_index_valid:1,
2947        gateway:1, /* routes using the group use a gateway */
2948        is_resilient:1;
2949     struct list_head list; /* member in nh_res_grp_list */
2950     struct mlxsw_sp_nexthop nexthops[0];
2951 #define nh_rif  nexthops[0].rif
2952 };
2953 
2954 struct mlxsw_sp_nexthop_group_vr_key {
2955     u16 vr_id;
2956     enum mlxsw_sp_l3proto proto;
2957 };
2958 
2959 struct mlxsw_sp_nexthop_group_vr_entry {
2960     struct list_head list; /* member in vr_list */
2961     struct rhash_head ht_node; /* member in vr_ht */
2962     refcount_t ref_count;
2963     struct mlxsw_sp_nexthop_group_vr_key key;
2964 };
2965 
2966 struct mlxsw_sp_nexthop_group {
2967     struct rhash_head ht_node;
2968     struct list_head fib_list; /* list of fib entries that use this group */
2969     union {
2970         struct {
2971             struct fib_info *fi;
2972         } ipv4;
2973         struct {
2974             u32 id;
2975         } obj;
2976     };
2977     struct mlxsw_sp_nexthop_group_info *nhgi;
2978     struct list_head vr_list;
2979     struct rhashtable vr_ht;
2980     enum mlxsw_sp_nexthop_group_type type;
2981     bool can_destroy;
2982 };
2983 
2984 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2985                     struct mlxsw_sp_nexthop *nh)
2986 {
2987     struct devlink *devlink;
2988 
2989     devlink = priv_to_devlink(mlxsw_sp->core);
2990     if (!devlink_dpipe_table_counter_enabled(devlink,
2991                          MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2992         return;
2993 
2994     if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2995         return;
2996 
2997     nh->counter_valid = true;
2998 }
2999 
3000 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3001                    struct mlxsw_sp_nexthop *nh)
3002 {
3003     if (!nh->counter_valid)
3004         return;
3005     mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3006     nh->counter_valid = false;
3007 }
3008 
3009 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3010                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3011 {
3012     if (!nh->counter_valid)
3013         return -EINVAL;
3014 
3015     return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3016                      p_counter, NULL);
3017 }
3018 
3019 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3020                            struct mlxsw_sp_nexthop *nh)
3021 {
3022     if (!nh) {
3023         if (list_empty(&router->nexthop_list))
3024             return NULL;
3025         else
3026             return list_first_entry(&router->nexthop_list,
3027                         typeof(*nh), router_list_node);
3028     }
3029     if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3030         return NULL;
3031     return list_next_entry(nh, router_list_node);
3032 }
3033 
3034 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3035 {
3036     return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3037 }
3038 
3039 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3040 {
3041     if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3042         !mlxsw_sp_nexthop_is_forward(nh))
3043         return NULL;
3044     return nh->neigh_entry->ha;
3045 }
3046 
3047 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3048                  u32 *p_adj_size, u32 *p_adj_hash_index)
3049 {
3050     struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3051     u32 adj_hash_index = 0;
3052     int i;
3053 
3054     if (!nh->offloaded || !nhgi->adj_index_valid)
3055         return -EINVAL;
3056 
3057     *p_adj_index = nhgi->adj_index;
3058     *p_adj_size = nhgi->ecmp_size;
3059 
3060     for (i = 0; i < nhgi->count; i++) {
3061         struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3062 
3063         if (nh_iter == nh)
3064             break;
3065         if (nh_iter->offloaded)
3066             adj_hash_index += nh_iter->num_adj_entries;
3067     }
3068 
3069     *p_adj_hash_index = adj_hash_index;
3070     return 0;
3071 }
3072 
3073 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3074 {
3075     return nh->rif;
3076 }
3077 
3078 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3079 {
3080     struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3081     int i;
3082 
3083     for (i = 0; i < nhgi->count; i++) {
3084         struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3085 
3086         if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3087             return true;
3088     }
3089     return false;
3090 }
3091 
3092 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3093     .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3094     .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3095     .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3096     .automatic_shrinking = true,
3097 };
3098 
3099 static struct mlxsw_sp_nexthop_group_vr_entry *
3100 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3101                        const struct mlxsw_sp_fib *fib)
3102 {
3103     struct mlxsw_sp_nexthop_group_vr_key key;
3104 
3105     memset(&key, 0, sizeof(key));
3106     key.vr_id = fib->vr->id;
3107     key.proto = fib->proto;
3108     return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3109                       mlxsw_sp_nexthop_group_vr_ht_params);
3110 }
3111 
3112 static int
3113 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3114                        const struct mlxsw_sp_fib *fib)
3115 {
3116     struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3117     int err;
3118 
3119     vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3120     if (!vr_entry)
3121         return -ENOMEM;
3122 
3123     vr_entry->key.vr_id = fib->vr->id;
3124     vr_entry->key.proto = fib->proto;
3125     refcount_set(&vr_entry->ref_count, 1);
3126 
3127     err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3128                      mlxsw_sp_nexthop_group_vr_ht_params);
3129     if (err)
3130         goto err_hashtable_insert;
3131 
3132     list_add(&vr_entry->list, &nh_grp->vr_list);
3133 
3134     return 0;
3135 
3136 err_hashtable_insert:
3137     kfree(vr_entry);
3138     return err;
3139 }
3140 
3141 static void
3142 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3143                     struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3144 {
3145     list_del(&vr_entry->list);
3146     rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3147                    mlxsw_sp_nexthop_group_vr_ht_params);
3148     kfree(vr_entry);
3149 }
3150 
3151 static int
3152 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3153                    const struct mlxsw_sp_fib *fib)
3154 {
3155     struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3156 
3157     vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3158     if (vr_entry) {
3159         refcount_inc(&vr_entry->ref_count);
3160         return 0;
3161     }
3162 
3163     return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3164 }
3165 
3166 static void
3167 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3168                  const struct mlxsw_sp_fib *fib)
3169 {
3170     struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3171 
3172     vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3173     if (WARN_ON_ONCE(!vr_entry))
3174         return;
3175 
3176     if (!refcount_dec_and_test(&vr_entry->ref_count))
3177         return;
3178 
3179     mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3180 }
3181 
3182 struct mlxsw_sp_nexthop_group_cmp_arg {
3183     enum mlxsw_sp_nexthop_group_type type;
3184     union {
3185         struct fib_info *fi;
3186         struct mlxsw_sp_fib6_entry *fib6_entry;
3187         u32 id;
3188     };
3189 };
3190 
3191 static bool
3192 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3193                     const struct in6_addr *gw, int ifindex,
3194                     int weight)
3195 {
3196     int i;
3197 
3198     for (i = 0; i < nh_grp->nhgi->count; i++) {
3199         const struct mlxsw_sp_nexthop *nh;
3200 
3201         nh = &nh_grp->nhgi->nexthops[i];
3202         if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3203             ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3204             return true;
3205     }
3206 
3207     return false;
3208 }
3209 
3210 static bool
3211 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3212                 const struct mlxsw_sp_fib6_entry *fib6_entry)
3213 {
3214     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3215 
3216     if (nh_grp->nhgi->count != fib6_entry->nrt6)
3217         return false;
3218 
3219     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3220         struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3221         struct in6_addr *gw;
3222         int ifindex, weight;
3223 
3224         ifindex = fib6_nh->fib_nh_dev->ifindex;
3225         weight = fib6_nh->fib_nh_weight;
3226         gw = &fib6_nh->fib_nh_gw6;
3227         if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3228                              weight))
3229             return false;
3230     }
3231 
3232     return true;
3233 }
3234 
3235 static int
3236 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3237 {
3238     const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3239     const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3240 
3241     if (nh_grp->type != cmp_arg->type)
3242         return 1;
3243 
3244     switch (cmp_arg->type) {
3245     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3246         return cmp_arg->fi != nh_grp->ipv4.fi;
3247     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3248         return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3249                             cmp_arg->fib6_entry);
3250     case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3251         return cmp_arg->id != nh_grp->obj.id;
3252     default:
3253         WARN_ON(1);
3254         return 1;
3255     }
3256 }
3257 
3258 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3259 {
3260     const struct mlxsw_sp_nexthop_group *nh_grp = data;
3261     const struct mlxsw_sp_nexthop *nh;
3262     struct fib_info *fi;
3263     unsigned int val;
3264     int i;
3265 
3266     switch (nh_grp->type) {
3267     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3268         fi = nh_grp->ipv4.fi;
3269         return jhash(&fi, sizeof(fi), seed);
3270     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3271         val = nh_grp->nhgi->count;
3272         for (i = 0; i < nh_grp->nhgi->count; i++) {
3273             nh = &nh_grp->nhgi->nexthops[i];
3274             val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3275             val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3276         }
3277         return jhash(&val, sizeof(val), seed);
3278     case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3279         return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3280     default:
3281         WARN_ON(1);
3282         return 0;
3283     }
3284 }
3285 
3286 static u32
3287 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3288 {
3289     unsigned int val = fib6_entry->nrt6;
3290     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3291 
3292     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3293         struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3294         struct net_device *dev = fib6_nh->fib_nh_dev;
3295         struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3296 
3297         val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3298         val ^= jhash(gw, sizeof(*gw), seed);
3299     }
3300 
3301     return jhash(&val, sizeof(val), seed);
3302 }
3303 
3304 static u32
3305 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3306 {
3307     const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3308 
3309     switch (cmp_arg->type) {
3310     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3311         return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3312     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3313         return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3314     case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3315         return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3316     default:
3317         WARN_ON(1);
3318         return 0;
3319     }
3320 }
3321 
3322 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3323     .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3324     .hashfn      = mlxsw_sp_nexthop_group_hash,
3325     .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3326     .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3327 };
3328 
3329 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3330                      struct mlxsw_sp_nexthop_group *nh_grp)
3331 {
3332     if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3333         !nh_grp->nhgi->gateway)
3334         return 0;
3335 
3336     return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3337                       &nh_grp->ht_node,
3338                       mlxsw_sp_nexthop_group_ht_params);
3339 }
3340 
3341 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3342                       struct mlxsw_sp_nexthop_group *nh_grp)
3343 {
3344     if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3345         !nh_grp->nhgi->gateway)
3346         return;
3347 
3348     rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3349                    &nh_grp->ht_node,
3350                    mlxsw_sp_nexthop_group_ht_params);
3351 }
3352 
3353 static struct mlxsw_sp_nexthop_group *
3354 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3355                    struct fib_info *fi)
3356 {
3357     struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3358 
3359     cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3360     cmp_arg.fi = fi;
3361     return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3362                       &cmp_arg,
3363                       mlxsw_sp_nexthop_group_ht_params);
3364 }
3365 
3366 static struct mlxsw_sp_nexthop_group *
3367 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3368                    struct mlxsw_sp_fib6_entry *fib6_entry)
3369 {
3370     struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3371 
3372     cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3373     cmp_arg.fib6_entry = fib6_entry;
3374     return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3375                       &cmp_arg,
3376                       mlxsw_sp_nexthop_group_ht_params);
3377 }
3378 
3379 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3380     .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3381     .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3382     .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3383 };
3384 
3385 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3386                    struct mlxsw_sp_nexthop *nh)
3387 {
3388     return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3389                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3390 }
3391 
3392 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3393                     struct mlxsw_sp_nexthop *nh)
3394 {
3395     rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3396                    mlxsw_sp_nexthop_ht_params);
3397 }
3398 
3399 static struct mlxsw_sp_nexthop *
3400 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3401             struct mlxsw_sp_nexthop_key key)
3402 {
3403     return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3404                       mlxsw_sp_nexthop_ht_params);
3405 }
3406 
3407 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3408                          enum mlxsw_sp_l3proto proto,
3409                          u16 vr_id,
3410                          u32 adj_index, u16 ecmp_size,
3411                          u32 new_adj_index,
3412                          u16 new_ecmp_size)
3413 {
3414     char raleu_pl[MLXSW_REG_RALEU_LEN];
3415 
3416     mlxsw_reg_raleu_pack(raleu_pl,
3417                  (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3418                  adj_index, ecmp_size, new_adj_index,
3419                  new_ecmp_size);
3420     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3421 }
3422 
3423 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3424                       struct mlxsw_sp_nexthop_group *nh_grp,
3425                       u32 old_adj_index, u16 old_ecmp_size)
3426 {
3427     struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3428     struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3429     int err;
3430 
3431     list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3432         err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3433                             vr_entry->key.proto,
3434                             vr_entry->key.vr_id,
3435                             old_adj_index,
3436                             old_ecmp_size,
3437                             nhgi->adj_index,
3438                             nhgi->ecmp_size);
3439         if (err)
3440             goto err_mass_update_vr;
3441     }
3442     return 0;
3443 
3444 err_mass_update_vr:
3445     list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3446         mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3447                           vr_entry->key.vr_id,
3448                           nhgi->adj_index,
3449                           nhgi->ecmp_size,
3450                           old_adj_index, old_ecmp_size);
3451     return err;
3452 }
3453 
3454 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3455                      u32 adj_index,
3456                      struct mlxsw_sp_nexthop *nh,
3457                      bool force, char *ratr_pl)
3458 {
3459     struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3460     enum mlxsw_reg_ratr_op op;
3461     u16 rif_index;
3462 
3463     rif_index = nh->rif ? nh->rif->rif_index :
3464                   mlxsw_sp->router->lb_rif_index;
3465     op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3466              MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3467     mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3468                 adj_index, rif_index);
3469     switch (nh->action) {
3470     case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3471         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3472         break;
3473     case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3474         mlxsw_reg_ratr_trap_action_set(ratr_pl,
3475                            MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3476         break;
3477     case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3478         mlxsw_reg_ratr_trap_action_set(ratr_pl,
3479                            MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3480         mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3481         break;
3482     default:
3483         WARN_ON_ONCE(1);
3484         return -EINVAL;
3485     }
3486     if (nh->counter_valid)
3487         mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3488     else
3489         mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3490 
3491     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3492 }
3493 
3494 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3495                 struct mlxsw_sp_nexthop *nh, bool force,
3496                 char *ratr_pl)
3497 {
3498     int i;
3499 
3500     for (i = 0; i < nh->num_adj_entries; i++) {
3501         int err;
3502 
3503         err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3504                             nh, force, ratr_pl);
3505         if (err)
3506             return err;
3507     }
3508 
3509     return 0;
3510 }
3511 
3512 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3513                       u32 adj_index,
3514                       struct mlxsw_sp_nexthop *nh,
3515                       bool force, char *ratr_pl)
3516 {
3517     const struct mlxsw_sp_ipip_ops *ipip_ops;
3518 
3519     ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3520     return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3521                     force, ratr_pl);
3522 }
3523 
3524 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3525                     u32 adj_index,
3526                     struct mlxsw_sp_nexthop *nh, bool force,
3527                     char *ratr_pl)
3528 {
3529     int i;
3530 
3531     for (i = 0; i < nh->num_adj_entries; i++) {
3532         int err;
3533 
3534         err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3535                              nh, force, ratr_pl);
3536         if (err)
3537             return err;
3538     }
3539 
3540     return 0;
3541 }
3542 
3543 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3544                    struct mlxsw_sp_nexthop *nh, bool force,
3545                    char *ratr_pl)
3546 {
3547     /* When action is discard or trap, the nexthop must be
3548      * programmed as an Ethernet nexthop.
3549      */
3550     if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3551         nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3552         nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3553         return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3554                            force, ratr_pl);
3555     else
3556         return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3557                             force, ratr_pl);
3558 }
3559 
3560 static int
3561 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3562                   struct mlxsw_sp_nexthop_group_info *nhgi,
3563                   bool reallocate)
3564 {
3565     char ratr_pl[MLXSW_REG_RATR_LEN];
3566     u32 adj_index = nhgi->adj_index; /* base */
3567     struct mlxsw_sp_nexthop *nh;
3568     int i;
3569 
3570     for (i = 0; i < nhgi->count; i++) {
3571         nh = &nhgi->nexthops[i];
3572 
3573         if (!nh->should_offload) {
3574             nh->offloaded = 0;
3575             continue;
3576         }
3577 
3578         if (nh->update || reallocate) {
3579             int err = 0;
3580 
3581             err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3582                               true, ratr_pl);
3583             if (err)
3584                 return err;
3585             nh->update = 0;
3586             nh->offloaded = 1;
3587         }
3588         adj_index += nh->num_adj_entries;
3589     }
3590     return 0;
3591 }
3592 
3593 static int
3594 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3595                     struct mlxsw_sp_nexthop_group *nh_grp)
3596 {
3597     struct mlxsw_sp_fib_entry *fib_entry;
3598     int err;
3599 
3600     list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3601         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3602         if (err)
3603             return err;
3604     }
3605     return 0;
3606 }
3607 
3608 struct mlxsw_sp_adj_grp_size_range {
3609     u16 start; /* Inclusive */
3610     u16 end; /* Inclusive */
3611 };
3612 
3613 /* Ordered by range start value */
3614 static const struct mlxsw_sp_adj_grp_size_range
3615 mlxsw_sp1_adj_grp_size_ranges[] = {
3616     { .start = 1, .end = 64 },
3617     { .start = 512, .end = 512 },
3618     { .start = 1024, .end = 1024 },
3619     { .start = 2048, .end = 2048 },
3620     { .start = 4096, .end = 4096 },
3621 };
3622 
3623 /* Ordered by range start value */
3624 static const struct mlxsw_sp_adj_grp_size_range
3625 mlxsw_sp2_adj_grp_size_ranges[] = {
3626     { .start = 1, .end = 128 },
3627     { .start = 256, .end = 256 },
3628     { .start = 512, .end = 512 },
3629     { .start = 1024, .end = 1024 },
3630     { .start = 2048, .end = 2048 },
3631     { .start = 4096, .end = 4096 },
3632 };
3633 
3634 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3635                        u16 *p_adj_grp_size)
3636 {
3637     int i;
3638 
3639     for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3640         const struct mlxsw_sp_adj_grp_size_range *size_range;
3641 
3642         size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3643 
3644         if (*p_adj_grp_size >= size_range->start &&
3645             *p_adj_grp_size <= size_range->end)
3646             return;
3647 
3648         if (*p_adj_grp_size <= size_range->end) {
3649             *p_adj_grp_size = size_range->end;
3650             return;
3651         }
3652     }
3653 }
3654 
3655 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3656                          u16 *p_adj_grp_size,
3657                          unsigned int alloc_size)
3658 {
3659     int i;
3660 
3661     for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3662         const struct mlxsw_sp_adj_grp_size_range *size_range;
3663 
3664         size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3665 
3666         if (alloc_size >= size_range->end) {
3667             *p_adj_grp_size = size_range->end;
3668             return;
3669         }
3670     }
3671 }
3672 
3673 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3674                      u16 *p_adj_grp_size)
3675 {
3676     unsigned int alloc_size;
3677     int err;
3678 
3679     /* Round up the requested group size to the next size supported
3680      * by the device and make sure the request can be satisfied.
3681      */
3682     mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3683     err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3684                           MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3685                           *p_adj_grp_size, &alloc_size);
3686     if (err)
3687         return err;
3688     /* It is possible the allocation results in more allocated
3689      * entries than requested. Try to use as much of them as
3690      * possible.
3691      */
3692     mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3693 
3694     return 0;
3695 }
3696 
3697 static void
3698 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3699 {
3700     int i, g = 0, sum_norm_weight = 0;
3701     struct mlxsw_sp_nexthop *nh;
3702 
3703     for (i = 0; i < nhgi->count; i++) {
3704         nh = &nhgi->nexthops[i];
3705 
3706         if (!nh->should_offload)
3707             continue;
3708         if (g > 0)
3709             g = gcd(nh->nh_weight, g);
3710         else
3711             g = nh->nh_weight;
3712     }
3713 
3714     for (i = 0; i < nhgi->count; i++) {
3715         nh = &nhgi->nexthops[i];
3716 
3717         if (!nh->should_offload)
3718             continue;
3719         nh->norm_nh_weight = nh->nh_weight / g;
3720         sum_norm_weight += nh->norm_nh_weight;
3721     }
3722 
3723     nhgi->sum_norm_weight = sum_norm_weight;
3724 }
3725 
3726 static void
3727 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3728 {
3729     int i, weight = 0, lower_bound = 0;
3730     int total = nhgi->sum_norm_weight;
3731     u16 ecmp_size = nhgi->ecmp_size;
3732 
3733     for (i = 0; i < nhgi->count; i++) {
3734         struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3735         int upper_bound;
3736 
3737         if (!nh->should_offload)
3738             continue;
3739         weight += nh->norm_nh_weight;
3740         upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3741         nh->num_adj_entries = upper_bound - lower_bound;
3742         lower_bound = upper_bound;
3743     }
3744 }
3745 
3746 static struct mlxsw_sp_nexthop *
3747 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3748              const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3749 
3750 static void
3751 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3752                     struct mlxsw_sp_nexthop_group *nh_grp)
3753 {
3754     int i;
3755 
3756     for (i = 0; i < nh_grp->nhgi->count; i++) {
3757         struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3758 
3759         if (nh->offloaded)
3760             nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3761         else
3762             nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3763     }
3764 }
3765 
3766 static void
3767 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3768                       struct mlxsw_sp_fib6_entry *fib6_entry)
3769 {
3770     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3771 
3772     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3773         struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3774         struct mlxsw_sp_nexthop *nh;
3775 
3776         nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3777         if (nh && nh->offloaded)
3778             fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3779         else
3780             fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3781     }
3782 }
3783 
3784 static void
3785 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3786                     struct mlxsw_sp_nexthop_group *nh_grp)
3787 {
3788     struct mlxsw_sp_fib6_entry *fib6_entry;
3789 
3790     /* Unfortunately, in IPv6 the route and the nexthop are described by
3791      * the same struct, so we need to iterate over all the routes using the
3792      * nexthop group and set / clear the offload indication for them.
3793      */
3794     list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3795                 common.nexthop_group_node)
3796         __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3797 }
3798 
3799 static void
3800 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3801                     const struct mlxsw_sp_nexthop *nh,
3802                     u16 bucket_index)
3803 {
3804     struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3805     bool offload = false, trap = false;
3806 
3807     if (nh->offloaded) {
3808         if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3809             trap = true;
3810         else
3811             offload = true;
3812     }
3813     nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3814                     bucket_index, offload, trap);
3815 }
3816 
3817 static void
3818 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3819                        struct mlxsw_sp_nexthop_group *nh_grp)
3820 {
3821     int i;
3822 
3823     /* Do not update the flags if the nexthop group is being destroyed
3824      * since:
3825      * 1. The nexthop objects is being deleted, in which case the flags are
3826      * irrelevant.
3827      * 2. The nexthop group was replaced by a newer group, in which case
3828      * the flags of the nexthop object were already updated based on the
3829      * new group.
3830      */
3831     if (nh_grp->can_destroy)
3832         return;
3833 
3834     nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3835                  nh_grp->nhgi->adj_index_valid, false);
3836 
3837     /* Update flags of individual nexthop buckets in case of a resilient
3838      * nexthop group.
3839      */
3840     if (!nh_grp->nhgi->is_resilient)
3841         return;
3842 
3843     for (i = 0; i < nh_grp->nhgi->count; i++) {
3844         struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3845 
3846         mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
3847     }
3848 }
3849 
3850 static void
3851 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3852                        struct mlxsw_sp_nexthop_group *nh_grp)
3853 {
3854     switch (nh_grp->type) {
3855     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3856         mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3857         break;
3858     case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3859         mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3860         break;
3861     case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3862         mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3863         break;
3864     }
3865 }
3866 
3867 static int
3868 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3869                    struct mlxsw_sp_nexthop_group *nh_grp)
3870 {
3871     struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3872     u16 ecmp_size, old_ecmp_size;
3873     struct mlxsw_sp_nexthop *nh;
3874     bool offload_change = false;
3875     u32 adj_index;
3876     bool old_adj_index_valid;
3877     u32 old_adj_index;
3878     int i, err2, err;
3879 
3880     if (!nhgi->gateway)
3881         return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3882 
3883     for (i = 0; i < nhgi->count; i++) {
3884         nh = &nhgi->nexthops[i];
3885 
3886         if (nh->should_offload != nh->offloaded) {
3887             offload_change = true;
3888             if (nh->should_offload)
3889                 nh->update = 1;
3890         }
3891     }
3892     if (!offload_change) {
3893         /* Nothing was added or removed, so no need to reallocate. Just
3894          * update MAC on existing adjacency indexes.
3895          */
3896         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3897         if (err) {
3898             dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3899             goto set_trap;
3900         }
3901         /* Flags of individual nexthop buckets might need to be
3902          * updated.
3903          */
3904         mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3905         return 0;
3906     }
3907     mlxsw_sp_nexthop_group_normalize(nhgi);
3908     if (!nhgi->sum_norm_weight) {
3909         /* No neigh of this group is connected so we just set
3910          * the trap and let everthing flow through kernel.
3911          */
3912         err = 0;
3913         goto set_trap;
3914     }
3915 
3916     ecmp_size = nhgi->sum_norm_weight;
3917     err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3918     if (err)
3919         /* No valid allocation size available. */
3920         goto set_trap;
3921 
3922     err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3923                   ecmp_size, &adj_index);
3924     if (err) {
3925         /* We ran out of KVD linear space, just set the
3926          * trap and let everything flow through kernel.
3927          */
3928         dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3929         goto set_trap;
3930     }
3931     old_adj_index_valid = nhgi->adj_index_valid;
3932     old_adj_index = nhgi->adj_index;
3933     old_ecmp_size = nhgi->ecmp_size;
3934     nhgi->adj_index_valid = 1;
3935     nhgi->adj_index = adj_index;
3936     nhgi->ecmp_size = ecmp_size;
3937     mlxsw_sp_nexthop_group_rebalance(nhgi);
3938     err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3939     if (err) {
3940         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3941         goto set_trap;
3942     }
3943 
3944     mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3945 
3946     if (!old_adj_index_valid) {
3947         /* The trap was set for fib entries, so we have to call
3948          * fib entry update to unset it and use adjacency index.
3949          */
3950         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3951         if (err) {
3952             dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3953             goto set_trap;
3954         }
3955         return 0;
3956     }
3957 
3958     err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3959                          old_adj_index, old_ecmp_size);
3960     mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3961                old_ecmp_size, old_adj_index);
3962     if (err) {
3963         dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3964         goto set_trap;
3965     }
3966 
3967     return 0;
3968 
3969 set_trap:
3970     old_adj_index_valid = nhgi->adj_index_valid;
3971     nhgi->adj_index_valid = 0;
3972     for (i = 0; i < nhgi->count; i++) {
3973         nh = &nhgi->nexthops[i];
3974         nh->offloaded = 0;
3975     }
3976     err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3977     if (err2)
3978         dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3979     mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3980     if (old_adj_index_valid)
3981         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3982                    nhgi->ecmp_size, nhgi->adj_index);
3983     return err;
3984 }
3985 
3986 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3987                         bool removing)
3988 {
3989     if (!removing) {
3990         nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3991         nh->should_offload = 1;
3992     } else if (nh->nhgi->is_resilient) {
3993         nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
3994         nh->should_offload = 1;
3995     } else {
3996         nh->should_offload = 0;
3997     }
3998     nh->update = 1;
3999 }
4000 
4001 static int
4002 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4003                     struct mlxsw_sp_neigh_entry *neigh_entry)
4004 {
4005     struct neighbour *n, *old_n = neigh_entry->key.n;
4006     struct mlxsw_sp_nexthop *nh;
4007     bool entry_connected;
4008     u8 nud_state, dead;
4009     int err;
4010 
4011     nh = list_first_entry(&neigh_entry->nexthop_list,
4012                   struct mlxsw_sp_nexthop, neigh_list_node);
4013 
4014     n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4015     if (!n) {
4016         n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4017         if (IS_ERR(n))
4018             return PTR_ERR(n);
4019         neigh_event_send(n, NULL);
4020     }
4021 
4022     mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4023     neigh_entry->key.n = n;
4024     err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4025     if (err)
4026         goto err_neigh_entry_insert;
4027 
4028     read_lock_bh(&n->lock);
4029     nud_state = n->nud_state;
4030     dead = n->dead;
4031     read_unlock_bh(&n->lock);
4032     entry_connected = nud_state & NUD_VALID && !dead;
4033 
4034     list_for_each_entry(nh, &neigh_entry->nexthop_list,
4035                 neigh_list_node) {
4036         neigh_release(old_n);
4037         neigh_clone(n);
4038         __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4039         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4040     }
4041 
4042     neigh_release(n);
4043 
4044     return 0;
4045 
4046 err_neigh_entry_insert:
4047     neigh_entry->key.n = old_n;
4048     mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4049     neigh_release(n);
4050     return err;
4051 }
4052 
4053 static void
4054 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4055                   struct mlxsw_sp_neigh_entry *neigh_entry,
4056                   bool removing, bool dead)
4057 {
4058     struct mlxsw_sp_nexthop *nh;
4059 
4060     if (list_empty(&neigh_entry->nexthop_list))
4061         return;
4062 
4063     if (dead) {
4064         int err;
4065 
4066         err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4067                               neigh_entry);
4068         if (err)
4069             dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4070         return;
4071     }
4072 
4073     list_for_each_entry(nh, &neigh_entry->nexthop_list,
4074                 neigh_list_node) {
4075         __mlxsw_sp_nexthop_neigh_update(nh, removing);
4076         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4077     }
4078 }
4079 
4080 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
4081                       struct mlxsw_sp_rif *rif)
4082 {
4083     if (nh->rif)
4084         return;
4085 
4086     nh->rif = rif;
4087     list_add(&nh->rif_list_node, &rif->nexthop_list);
4088 }
4089 
4090 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
4091 {
4092     if (!nh->rif)
4093         return;
4094 
4095     list_del(&nh->rif_list_node);
4096     nh->rif = NULL;
4097 }
4098 
4099 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4100                        struct mlxsw_sp_nexthop *nh)
4101 {
4102     struct mlxsw_sp_neigh_entry *neigh_entry;
4103     struct neighbour *n;
4104     u8 nud_state, dead;
4105     int err;
4106 
4107     if (!nh->nhgi->gateway || nh->neigh_entry)
4108         return 0;
4109 
4110     /* Take a reference of neigh here ensuring that neigh would
4111      * not be destructed before the nexthop entry is finished.
4112      * The reference is taken either in neigh_lookup() or
4113      * in neigh_create() in case n is not found.
4114      */
4115     n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4116     if (!n) {
4117         n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4118         if (IS_ERR(n))
4119             return PTR_ERR(n);
4120         neigh_event_send(n, NULL);
4121     }
4122     neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4123     if (!neigh_entry) {
4124         neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4125         if (IS_ERR(neigh_entry)) {
4126             err = -EINVAL;
4127             goto err_neigh_entry_create;
4128         }
4129     }
4130 
4131     /* If that is the first nexthop connected to that neigh, add to
4132      * nexthop_neighs_list
4133      */
4134     if (list_empty(&neigh_entry->nexthop_list))
4135         list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4136                   &mlxsw_sp->router->nexthop_neighs_list);
4137 
4138     nh->neigh_entry = neigh_entry;
4139     list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4140     read_lock_bh(&n->lock);
4141     nud_state = n->nud_state;
4142     dead = n->dead;
4143     read_unlock_bh(&n->lock);
4144     __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4145 
4146     return 0;
4147 
4148 err_neigh_entry_create:
4149     neigh_release(n);
4150     return err;
4151 }
4152 
4153 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4154                     struct mlxsw_sp_nexthop *nh)
4155 {
4156     struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4157     struct neighbour *n;
4158 
4159     if (!neigh_entry)
4160         return;
4161     n = neigh_entry->key.n;
4162 
4163     __mlxsw_sp_nexthop_neigh_update(nh, true);
4164     list_del(&nh->neigh_list_node);
4165     nh->neigh_entry = NULL;
4166 
4167     /* If that is the last nexthop connected to that neigh, remove from
4168      * nexthop_neighs_list
4169      */
4170     if (list_empty(&neigh_entry->nexthop_list))
4171         list_del(&neigh_entry->nexthop_neighs_list_node);
4172 
4173     if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4174         mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4175 
4176     neigh_release(n);
4177 }
4178 
4179 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4180 {
4181     struct net_device *ul_dev;
4182     bool is_up;
4183 
4184     rcu_read_lock();
4185     ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4186     is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4187     rcu_read_unlock();
4188 
4189     return is_up;
4190 }
4191 
4192 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4193                        struct mlxsw_sp_nexthop *nh,
4194                        struct mlxsw_sp_ipip_entry *ipip_entry)
4195 {
4196     bool removing;
4197 
4198     if (!nh->nhgi->gateway || nh->ipip_entry)
4199         return;
4200 
4201     nh->ipip_entry = ipip_entry;
4202     removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4203     __mlxsw_sp_nexthop_neigh_update(nh, removing);
4204     mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4205 }
4206 
4207 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4208                        struct mlxsw_sp_nexthop *nh)
4209 {
4210     struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4211 
4212     if (!ipip_entry)
4213         return;
4214 
4215     __mlxsw_sp_nexthop_neigh_update(nh, true);
4216     nh->ipip_entry = NULL;
4217 }
4218 
4219 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4220                     const struct fib_nh *fib_nh,
4221                     enum mlxsw_sp_ipip_type *p_ipipt)
4222 {
4223     struct net_device *dev = fib_nh->fib_nh_dev;
4224 
4225     return dev &&
4226            fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4227            mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4228 }
4229 
4230 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4231                       struct mlxsw_sp_nexthop *nh,
4232                       const struct net_device *dev)
4233 {
4234     const struct mlxsw_sp_ipip_ops *ipip_ops;
4235     struct mlxsw_sp_ipip_entry *ipip_entry;
4236     struct mlxsw_sp_rif *rif;
4237     int err;
4238 
4239     ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4240     if (ipip_entry) {
4241         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4242         if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4243             nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4244             mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4245             return 0;
4246         }
4247     }
4248 
4249     nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4250     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4251     if (!rif)
4252         return 0;
4253 
4254     mlxsw_sp_nexthop_rif_init(nh, rif);
4255     err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4256     if (err)
4257         goto err_neigh_init;
4258 
4259     return 0;
4260 
4261 err_neigh_init:
4262     mlxsw_sp_nexthop_rif_fini(nh);
4263     return err;
4264 }
4265 
4266 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4267                        struct mlxsw_sp_nexthop *nh)
4268 {
4269     switch (nh->type) {
4270     case MLXSW_SP_NEXTHOP_TYPE_ETH:
4271         mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4272         mlxsw_sp_nexthop_rif_fini(nh);
4273         break;
4274     case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4275         mlxsw_sp_nexthop_rif_fini(nh);
4276         mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4277         break;
4278     }
4279 }
4280 
4281 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4282                   struct mlxsw_sp_nexthop_group *nh_grp,
4283                   struct mlxsw_sp_nexthop *nh,
4284                   struct fib_nh *fib_nh)
4285 {
4286     struct net_device *dev = fib_nh->fib_nh_dev;
4287     struct in_device *in_dev;
4288     int err;
4289 
4290     nh->nhgi = nh_grp->nhgi;
4291     nh->key.fib_nh = fib_nh;
4292 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4293     nh->nh_weight = fib_nh->fib_nh_weight;
4294 #else
4295     nh->nh_weight = 1;
4296 #endif
4297     memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4298     nh->neigh_tbl = &arp_tbl;
4299     err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4300     if (err)
4301         return err;
4302 
4303     mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4304     list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4305 
4306     if (!dev)
4307         return 0;
4308     nh->ifindex = dev->ifindex;
4309 
4310     rcu_read_lock();
4311     in_dev = __in_dev_get_rcu(dev);
4312     if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4313         fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4314         rcu_read_unlock();
4315         return 0;
4316     }
4317     rcu_read_unlock();
4318 
4319     err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4320     if (err)
4321         goto err_nexthop_neigh_init;
4322 
4323     return 0;
4324 
4325 err_nexthop_neigh_init:
4326     list_del(&nh->router_list_node);
4327     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4328     mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4329     return err;
4330 }
4331 
4332 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4333                    struct mlxsw_sp_nexthop *nh)
4334 {
4335     mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4336     list_del(&nh->router_list_node);
4337     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4338     mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4339 }
4340 
4341 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4342                     unsigned long event, struct fib_nh *fib_nh)
4343 {
4344     struct mlxsw_sp_nexthop_key key;
4345     struct mlxsw_sp_nexthop *nh;
4346 
4347     key.fib_nh = fib_nh;
4348     nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4349     if (!nh)
4350         return;
4351 
4352     switch (event) {
4353     case FIB_EVENT_NH_ADD:
4354         mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4355         break;
4356     case FIB_EVENT_NH_DEL:
4357         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4358         break;
4359     }
4360 
4361     mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4362 }
4363 
4364 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4365                     struct mlxsw_sp_rif *rif)
4366 {
4367     struct mlxsw_sp_nexthop *nh;
4368     bool removing;
4369 
4370     list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4371         switch (nh->type) {
4372         case MLXSW_SP_NEXTHOP_TYPE_ETH:
4373             removing = false;
4374             break;
4375         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4376             removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
4377             break;
4378         default:
4379             WARN_ON(1);
4380             continue;
4381         }
4382 
4383         __mlxsw_sp_nexthop_neigh_update(nh, removing);
4384         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4385     }
4386 }
4387 
4388 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4389                      struct mlxsw_sp_rif *old_rif,
4390                      struct mlxsw_sp_rif *new_rif)
4391 {
4392     struct mlxsw_sp_nexthop *nh;
4393 
4394     list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4395     list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4396         nh->rif = new_rif;
4397     mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4398 }
4399 
4400 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4401                        struct mlxsw_sp_rif *rif)
4402 {
4403     struct mlxsw_sp_nexthop *nh, *tmp;
4404 
4405     list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4406         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4407         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4408     }
4409 }
4410 
4411 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4412 {
4413     enum mlxsw_reg_ratr_trap_action trap_action;
4414     char ratr_pl[MLXSW_REG_RATR_LEN];
4415     int err;
4416 
4417     err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4418                   &mlxsw_sp->router->adj_trap_index);
4419     if (err)
4420         return err;
4421 
4422     trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4423     mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4424                 MLXSW_REG_RATR_TYPE_ETHERNET,
4425                 mlxsw_sp->router->adj_trap_index,
4426                 mlxsw_sp->router->lb_rif_index);
4427     mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4428     mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4429     err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4430     if (err)
4431         goto err_ratr_write;
4432 
4433     return 0;
4434 
4435 err_ratr_write:
4436     mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4437                mlxsw_sp->router->adj_trap_index);
4438     return err;
4439 }
4440 
4441 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4442 {
4443     mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4444                mlxsw_sp->router->adj_trap_index);
4445 }
4446 
4447 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4448 {
4449     int err;
4450 
4451     if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4452         return 0;
4453 
4454     err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4455     if (err)
4456         return err;
4457 
4458     refcount_set(&mlxsw_sp->router->num_groups, 1);
4459 
4460     return 0;
4461 }
4462 
4463 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4464 {
4465     if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4466         return;
4467 
4468     mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4469 }
4470 
4471 static void
4472 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4473                  const struct mlxsw_sp_nexthop_group *nh_grp,
4474                  unsigned long *activity)
4475 {
4476     char *ratrad_pl;
4477     int i, err;
4478 
4479     ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4480     if (!ratrad_pl)
4481         return;
4482 
4483     mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4484                   nh_grp->nhgi->count);
4485     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4486     if (err)
4487         goto out;
4488 
4489     for (i = 0; i < nh_grp->nhgi->count; i++) {
4490         if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4491             continue;
4492         bitmap_set(activity, i, 1);
4493     }
4494 
4495 out:
4496     kfree(ratrad_pl);
4497 }
4498 
4499 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4500 
4501 static void
4502 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4503                 const struct mlxsw_sp_nexthop_group *nh_grp)
4504 {
4505     unsigned long *activity;
4506 
4507     activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4508     if (!activity)
4509         return;
4510 
4511     mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4512     nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4513                     nh_grp->nhgi->count, activity);
4514 
4515     bitmap_free(activity);
4516 }
4517 
4518 static void
4519 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4520 {
4521     unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4522 
4523     mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4524                    msecs_to_jiffies(interval));
4525 }
4526 
4527 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4528 {
4529     struct mlxsw_sp_nexthop_group_info *nhgi;
4530     struct mlxsw_sp_router *router;
4531     bool reschedule = false;
4532 
4533     router = container_of(work, struct mlxsw_sp_router,
4534                   nh_grp_activity_dw.work);
4535 
4536     mutex_lock(&router->lock);
4537 
4538     list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4539         mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4540         reschedule = true;
4541     }
4542 
4543     mutex_unlock(&router->lock);
4544 
4545     if (!reschedule)
4546         return;
4547     mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4548 }
4549 
4550 static int
4551 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4552                      const struct nh_notifier_single_info *nh,
4553                      struct netlink_ext_ack *extack)
4554 {
4555     int err = -EINVAL;
4556 
4557     if (nh->is_fdb)
4558         NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4559     else if (nh->has_encap)
4560         NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4561     else
4562         err = 0;
4563 
4564     return err;
4565 }
4566 
4567 static int
4568 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4569                       const struct nh_notifier_single_info *nh,
4570                       struct netlink_ext_ack *extack)
4571 {
4572     int err;
4573 
4574     err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4575     if (err)
4576         return err;
4577 
4578     /* Device only nexthops with an IPIP device are programmed as
4579      * encapsulating adjacency entries.
4580      */
4581     if (!nh->gw_family && !nh->is_reject &&
4582         !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4583         NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4584         return -EINVAL;
4585     }
4586 
4587     return 0;
4588 }
4589 
4590 static int
4591 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4592                     const struct nh_notifier_grp_info *nh_grp,
4593                     struct netlink_ext_ack *extack)
4594 {
4595     int i;
4596 
4597     if (nh_grp->is_fdb) {
4598         NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4599         return -EINVAL;
4600     }
4601 
4602     for (i = 0; i < nh_grp->num_nh; i++) {
4603         const struct nh_notifier_single_info *nh;
4604         int err;
4605 
4606         nh = &nh_grp->nh_entries[i].nh;
4607         err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4608                                 extack);
4609         if (err)
4610             return err;
4611     }
4612 
4613     return 0;
4614 }
4615 
4616 static int
4617 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4618                          const struct nh_notifier_res_table_info *nh_res_table,
4619                          struct netlink_ext_ack *extack)
4620 {
4621     unsigned int alloc_size;
4622     bool valid_size = false;
4623     int err, i;
4624 
4625     if (nh_res_table->num_nh_buckets < 32) {
4626         NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4627         return -EINVAL;
4628     }
4629 
4630     for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4631         const struct mlxsw_sp_adj_grp_size_range *size_range;
4632 
4633         size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4634 
4635         if (nh_res_table->num_nh_buckets >= size_range->start &&
4636             nh_res_table->num_nh_buckets <= size_range->end) {
4637             valid_size = true;
4638             break;
4639         }
4640     }
4641 
4642     if (!valid_size) {
4643         NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4644         return -EINVAL;
4645     }
4646 
4647     err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4648                           MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4649                           nh_res_table->num_nh_buckets,
4650                           &alloc_size);
4651     if (err || nh_res_table->num_nh_buckets != alloc_size) {
4652         NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4653         return -EINVAL;
4654     }
4655 
4656     return 0;
4657 }
4658 
4659 static int
4660 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4661                     const struct nh_notifier_res_table_info *nh_res_table,
4662                     struct netlink_ext_ack *extack)
4663 {
4664     int err;
4665     u16 i;
4666 
4667     err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4668                                nh_res_table,
4669                                extack);
4670     if (err)
4671         return err;
4672 
4673     for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4674         const struct nh_notifier_single_info *nh;
4675         int err;
4676 
4677         nh = &nh_res_table->nhs[i];
4678         err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4679                                 extack);
4680         if (err)
4681             return err;
4682     }
4683 
4684     return 0;
4685 }
4686 
4687 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4688                      unsigned long event,
4689                      struct nh_notifier_info *info)
4690 {
4691     struct nh_notifier_single_info *nh;
4692 
4693     if (event != NEXTHOP_EVENT_REPLACE &&
4694         event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4695         event != NEXTHOP_EVENT_BUCKET_REPLACE)
4696         return 0;
4697 
4698     switch (info->type) {
4699     case NH_NOTIFIER_INFO_TYPE_SINGLE:
4700         return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4701                                 info->extack);
4702     case NH_NOTIFIER_INFO_TYPE_GRP:
4703         return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4704                                info->nh_grp,
4705                                info->extack);
4706     case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4707         return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4708                                    info->nh_res_table,
4709                                    info->extack);
4710     case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4711         nh = &info->nh_res_bucket->new_nh;
4712         return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4713                                  info->extack);
4714     default:
4715         NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4716         return -EOPNOTSUPP;
4717     }
4718 }
4719 
4720 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4721                         const struct nh_notifier_info *info)
4722 {
4723     const struct net_device *dev;
4724 
4725     switch (info->type) {
4726     case NH_NOTIFIER_INFO_TYPE_SINGLE:
4727         dev = info->nh->dev;
4728         return info->nh->gw_family || info->nh->is_reject ||
4729                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4730     case NH_NOTIFIER_INFO_TYPE_GRP:
4731     case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4732         /* Already validated earlier. */
4733         return true;
4734     default:
4735         return false;
4736     }
4737 }
4738 
4739 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4740                         struct mlxsw_sp_nexthop *nh)
4741 {
4742     u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4743 
4744     nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4745     nh->should_offload = 1;
4746     /* While nexthops that discard packets do not forward packets
4747      * via an egress RIF, they still need to be programmed using a
4748      * valid RIF, so use the loopback RIF created during init.
4749      */
4750     nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4751 }
4752 
4753 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4754                         struct mlxsw_sp_nexthop *nh)
4755 {
4756     nh->rif = NULL;
4757     nh->should_offload = 0;
4758 }
4759 
4760 static int
4761 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4762               struct mlxsw_sp_nexthop_group *nh_grp,
4763               struct mlxsw_sp_nexthop *nh,
4764               struct nh_notifier_single_info *nh_obj, int weight)
4765 {
4766     struct net_device *dev = nh_obj->dev;
4767     int err;
4768 
4769     nh->nhgi = nh_grp->nhgi;
4770     nh->nh_weight = weight;
4771 
4772     switch (nh_obj->gw_family) {
4773     case AF_INET:
4774         memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4775         nh->neigh_tbl = &arp_tbl;
4776         break;
4777     case AF_INET6:
4778         memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4779 #if IS_ENABLED(CONFIG_IPV6)
4780         nh->neigh_tbl = &nd_tbl;
4781 #endif
4782         break;
4783     }
4784 
4785     mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4786     list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4787     nh->ifindex = dev->ifindex;
4788 
4789     err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4790     if (err)
4791         goto err_type_init;
4792 
4793     if (nh_obj->is_reject)
4794         mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4795 
4796     /* In a resilient nexthop group, all the nexthops must be written to
4797      * the adjacency table. Even if they do not have a valid neighbour or
4798      * RIF.
4799      */
4800     if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
4801         nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4802         nh->should_offload = 1;
4803     }
4804 
4805     return 0;
4806 
4807 err_type_init:
4808     list_del(&nh->router_list_node);
4809     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4810     return err;
4811 }
4812 
4813 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4814                       struct mlxsw_sp_nexthop *nh)
4815 {
4816     if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
4817         mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4818     mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4819     list_del(&nh->router_list_node);
4820     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4821     nh->should_offload = 0;
4822 }
4823 
4824 static int
4825 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4826                      struct mlxsw_sp_nexthop_group *nh_grp,
4827                      struct nh_notifier_info *info)
4828 {
4829     struct mlxsw_sp_nexthop_group_info *nhgi;
4830     struct mlxsw_sp_nexthop *nh;
4831     bool is_resilient = false;
4832     unsigned int nhs;
4833     int err, i;
4834 
4835     switch (info->type) {
4836     case NH_NOTIFIER_INFO_TYPE_SINGLE:
4837         nhs = 1;
4838         break;
4839     case NH_NOTIFIER_INFO_TYPE_GRP:
4840         nhs = info->nh_grp->num_nh;
4841         break;
4842     case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4843         nhs = info->nh_res_table->num_nh_buckets;
4844         is_resilient = true;
4845         break;
4846     default:
4847         return -EINVAL;
4848     }
4849 
4850     nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4851     if (!nhgi)
4852         return -ENOMEM;
4853     nh_grp->nhgi = nhgi;
4854     nhgi->nh_grp = nh_grp;
4855     nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4856     nhgi->is_resilient = is_resilient;
4857     nhgi->count = nhs;
4858     for (i = 0; i < nhgi->count; i++) {
4859         struct nh_notifier_single_info *nh_obj;
4860         int weight;
4861 
4862         nh = &nhgi->nexthops[i];
4863         switch (info->type) {
4864         case NH_NOTIFIER_INFO_TYPE_SINGLE:
4865             nh_obj = info->nh;
4866             weight = 1;
4867             break;
4868         case NH_NOTIFIER_INFO_TYPE_GRP:
4869             nh_obj = &info->nh_grp->nh_entries[i].nh;
4870             weight = info->nh_grp->nh_entries[i].weight;
4871             break;
4872         case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4873             nh_obj = &info->nh_res_table->nhs[i];
4874             weight = 1;
4875             break;
4876         default:
4877             err = -EINVAL;
4878             goto err_nexthop_obj_init;
4879         }
4880         err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4881                         weight);
4882         if (err)
4883             goto err_nexthop_obj_init;
4884     }
4885     err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
4886     if (err)
4887         goto err_group_inc;
4888     err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4889     if (err) {
4890         NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4891         goto err_group_refresh;
4892     }
4893 
4894     /* Add resilient nexthop groups to a list so that the activity of their
4895      * nexthop buckets will be periodically queried and cleared.
4896      */
4897     if (nhgi->is_resilient) {
4898         if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4899             mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
4900         list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
4901     }
4902 
4903     return 0;
4904 
4905 err_group_refresh:
4906     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4907 err_group_inc:
4908     i = nhgi->count;
4909 err_nexthop_obj_init:
4910     for (i--; i >= 0; i--) {
4911         nh = &nhgi->nexthops[i];
4912         mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4913     }
4914     kfree(nhgi);
4915     return err;
4916 }
4917 
4918 static void
4919 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4920                      struct mlxsw_sp_nexthop_group *nh_grp)
4921 {
4922     struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4923     struct mlxsw_sp_router *router = mlxsw_sp->router;
4924     int i;
4925 
4926     if (nhgi->is_resilient) {
4927         list_del(&nhgi->list);
4928         if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4929             cancel_delayed_work(&router->nh_grp_activity_dw);
4930     }
4931 
4932     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4933     for (i = nhgi->count - 1; i >= 0; i--) {
4934         struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4935 
4936         mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4937     }
4938     mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4939     WARN_ON_ONCE(nhgi->adj_index_valid);
4940     kfree(nhgi);
4941 }
4942 
4943 static struct mlxsw_sp_nexthop_group *
4944 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4945                   struct nh_notifier_info *info)
4946 {
4947     struct mlxsw_sp_nexthop_group *nh_grp;
4948     int err;
4949 
4950     nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4951     if (!nh_grp)
4952         return ERR_PTR(-ENOMEM);
4953     INIT_LIST_HEAD(&nh_grp->vr_list);
4954     err = rhashtable_init(&nh_grp->vr_ht,
4955                   &mlxsw_sp_nexthop_group_vr_ht_params);
4956     if (err)
4957         goto err_nexthop_group_vr_ht_init;
4958     INIT_LIST_HEAD(&nh_grp->fib_list);
4959     nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4960     nh_grp->obj.id = info->id;
4961 
4962     err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4963     if (err)
4964         goto err_nexthop_group_info_init;
4965 
4966     nh_grp->can_destroy = false;
4967 
4968     return nh_grp;
4969 
4970 err_nexthop_group_info_init:
4971     rhashtable_destroy(&nh_grp->vr_ht);
4972 err_nexthop_group_vr_ht_init:
4973     kfree(nh_grp);
4974     return ERR_PTR(err);
4975 }
4976 
4977 static void
4978 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
4979                    struct mlxsw_sp_nexthop_group *nh_grp)
4980 {
4981     if (!nh_grp->can_destroy)
4982         return;
4983     mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
4984     WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
4985     WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4986     rhashtable_destroy(&nh_grp->vr_ht);
4987     kfree(nh_grp);
4988 }
4989 
4990 static struct mlxsw_sp_nexthop_group *
4991 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
4992 {
4993     struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
4994 
4995     cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4996     cmp_arg.id = id;
4997     return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
4998                       &cmp_arg,
4999                       mlxsw_sp_nexthop_group_ht_params);
5000 }
5001 
5002 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5003                       struct mlxsw_sp_nexthop_group *nh_grp)
5004 {
5005     return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5006 }
5007 
5008 static int
5009 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5010                    struct mlxsw_sp_nexthop_group *nh_grp,
5011                    struct mlxsw_sp_nexthop_group *old_nh_grp,
5012                    struct netlink_ext_ack *extack)
5013 {
5014     struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5015     struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5016     int err;
5017 
5018     old_nh_grp->nhgi = new_nhgi;
5019     new_nhgi->nh_grp = old_nh_grp;
5020     nh_grp->nhgi = old_nhgi;
5021     old_nhgi->nh_grp = nh_grp;
5022 
5023     if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5024         /* Both the old adjacency index and the new one are valid.
5025          * Routes are currently using the old one. Tell the device to
5026          * replace the old adjacency index with the new one.
5027          */
5028         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5029                              old_nhgi->adj_index,
5030                              old_nhgi->ecmp_size);
5031         if (err) {
5032             NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5033             goto err_out;
5034         }
5035     } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5036         /* The old adjacency index is valid, while the new one is not.
5037          * Iterate over all the routes using the group and change them
5038          * to trap packets to the CPU.
5039          */
5040         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5041         if (err) {
5042             NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5043             goto err_out;
5044         }
5045     } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5046         /* The old adjacency index is invalid, while the new one is.
5047          * Iterate over all the routes using the group and change them
5048          * to forward packets using the new valid index.
5049          */
5050         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5051         if (err) {
5052             NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5053             goto err_out;
5054         }
5055     }
5056 
5057     /* Make sure the flags are set / cleared based on the new nexthop group
5058      * information.
5059      */
5060     mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5061 
5062     /* At this point 'nh_grp' is just a shell that is not used by anyone
5063      * and its nexthop group info is the old info that was just replaced
5064      * with the new one. Remove it.
5065      */
5066     nh_grp->can_destroy = true;
5067     mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5068 
5069     return 0;
5070 
5071 err_out:
5072     old_nhgi->nh_grp = old_nh_grp;
5073     nh_grp->nhgi = new_nhgi;
5074     new_nhgi->nh_grp = nh_grp;
5075     old_nh_grp->nhgi = old_nhgi;
5076     return err;
5077 }
5078 
5079 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5080                     struct nh_notifier_info *info)
5081 {
5082     struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5083     struct netlink_ext_ack *extack = info->extack;
5084     int err;
5085 
5086     nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5087     if (IS_ERR(nh_grp))
5088         return PTR_ERR(nh_grp);
5089 
5090     old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5091     if (!old_nh_grp)
5092         err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5093     else
5094         err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5095                              old_nh_grp, extack);
5096 
5097     if (err) {
5098         nh_grp->can_destroy = true;
5099         mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5100     }
5101 
5102     return err;
5103 }
5104 
5105 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5106                      struct nh_notifier_info *info)
5107 {
5108     struct mlxsw_sp_nexthop_group *nh_grp;
5109 
5110     nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5111     if (!nh_grp)
5112         return;
5113 
5114     nh_grp->can_destroy = true;
5115     mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5116 
5117     /* If the group still has routes using it, then defer the delete
5118      * operation until the last route using it is deleted.
5119      */
5120     if (!list_empty(&nh_grp->fib_list))
5121         return;
5122     mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5123 }
5124 
5125 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5126                          u32 adj_index, char *ratr_pl)
5127 {
5128     MLXSW_REG_ZERO(ratr, ratr_pl);
5129     mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5130     mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5131     mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5132 
5133     return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5134 }
5135 
5136 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5137 {
5138     /* Clear the opcode and activity on both the old and new payload as
5139      * they are irrelevant for the comparison.
5140      */
5141     mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5142     mlxsw_reg_ratr_a_set(ratr_pl, 0);
5143     mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5144     mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5145 
5146     /* If the contents of the adjacency entry are consistent with the
5147      * replacement request, then replacement was successful.
5148      */
5149     if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5150         return 0;
5151 
5152     return -EINVAL;
5153 }
5154 
5155 static int
5156 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5157                        struct mlxsw_sp_nexthop *nh,
5158                        struct nh_notifier_info *info)
5159 {
5160     u16 bucket_index = info->nh_res_bucket->bucket_index;
5161     struct netlink_ext_ack *extack = info->extack;
5162     bool force = info->nh_res_bucket->force;
5163     char ratr_pl_new[MLXSW_REG_RATR_LEN];
5164     char ratr_pl[MLXSW_REG_RATR_LEN];
5165     u32 adj_index;
5166     int err;
5167 
5168     /* No point in trying an atomic replacement if the idle timer interval
5169      * is smaller than the interval in which we query and clear activity.
5170      */
5171     if (!force && info->nh_res_bucket->idle_timer_ms <
5172         MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5173         force = true;
5174 
5175     adj_index = nh->nhgi->adj_index + bucket_index;
5176     err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5177     if (err) {
5178         NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5179         return err;
5180     }
5181 
5182     if (!force) {
5183         err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5184                             ratr_pl_new);
5185         if (err) {
5186             NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5187             return err;
5188         }
5189 
5190         err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5191         if (err) {
5192             NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5193             return err;
5194         }
5195     }
5196 
5197     nh->update = 0;
5198     nh->offloaded = 1;
5199     mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5200 
5201     return 0;
5202 }
5203 
5204 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5205                            struct nh_notifier_info *info)
5206 {
5207     u16 bucket_index = info->nh_res_bucket->bucket_index;
5208     struct netlink_ext_ack *extack = info->extack;
5209     struct mlxsw_sp_nexthop_group_info *nhgi;
5210     struct nh_notifier_single_info *nh_obj;
5211     struct mlxsw_sp_nexthop_group *nh_grp;
5212     struct mlxsw_sp_nexthop *nh;
5213     int err;
5214 
5215     nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5216     if (!nh_grp) {
5217         NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5218         return -EINVAL;
5219     }
5220 
5221     nhgi = nh_grp->nhgi;
5222 
5223     if (bucket_index >= nhgi->count) {
5224         NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5225         return -EINVAL;
5226     }
5227 
5228     nh = &nhgi->nexthops[bucket_index];
5229     mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5230 
5231     nh_obj = &info->nh_res_bucket->new_nh;
5232     err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5233     if (err) {
5234         NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5235         goto err_nexthop_obj_init;
5236     }
5237 
5238     err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5239     if (err)
5240         goto err_nexthop_obj_bucket_adj_update;
5241 
5242     return 0;
5243 
5244 err_nexthop_obj_bucket_adj_update:
5245     mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5246 err_nexthop_obj_init:
5247     nh_obj = &info->nh_res_bucket->old_nh;
5248     mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5249     /* The old adjacency entry was not overwritten */
5250     nh->update = 0;
5251     nh->offloaded = 1;
5252     return err;
5253 }
5254 
5255 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5256                       unsigned long event, void *ptr)
5257 {
5258     struct nh_notifier_info *info = ptr;
5259     struct mlxsw_sp_router *router;
5260     int err = 0;
5261 
5262     router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5263     err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5264     if (err)
5265         goto out;
5266 
5267     mutex_lock(&router->lock);
5268 
5269     switch (event) {
5270     case NEXTHOP_EVENT_REPLACE:
5271         err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5272         break;
5273     case NEXTHOP_EVENT_DEL:
5274         mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5275         break;
5276     case NEXTHOP_EVENT_BUCKET_REPLACE:
5277         err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5278                               info);
5279         break;
5280     default:
5281         break;
5282     }
5283 
5284     mutex_unlock(&router->lock);
5285 
5286 out:
5287     return notifier_from_errno(err);
5288 }
5289 
5290 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5291                    struct fib_info *fi)
5292 {
5293     const struct fib_nh *nh = fib_info_nh(fi, 0);
5294 
5295     return nh->fib_nh_gw_family ||
5296            mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5297 }
5298 
5299 static int
5300 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5301                   struct mlxsw_sp_nexthop_group *nh_grp)
5302 {
5303     unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5304     struct mlxsw_sp_nexthop_group_info *nhgi;
5305     struct mlxsw_sp_nexthop *nh;
5306     int err, i;
5307 
5308     nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5309     if (!nhgi)
5310         return -ENOMEM;
5311     nh_grp->nhgi = nhgi;
5312     nhgi->nh_grp = nh_grp;
5313     nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5314     nhgi->count = nhs;
5315     for (i = 0; i < nhgi->count; i++) {
5316         struct fib_nh *fib_nh;
5317 
5318         nh = &nhgi->nexthops[i];
5319         fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5320         err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5321         if (err)
5322             goto err_nexthop4_init;
5323     }
5324     err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5325     if (err)
5326         goto err_group_inc;
5327     err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5328     if (err)
5329         goto err_group_refresh;
5330 
5331     return 0;
5332 
5333 err_group_refresh:
5334     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5335 err_group_inc:
5336     i = nhgi->count;
5337 err_nexthop4_init:
5338     for (i--; i >= 0; i--) {
5339         nh = &nhgi->nexthops[i];
5340         mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5341     }
5342     kfree(nhgi);
5343     return err;
5344 }
5345 
5346 static void
5347 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5348                   struct mlxsw_sp_nexthop_group *nh_grp)
5349 {
5350     struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5351     int i;
5352 
5353     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5354     for (i = nhgi->count - 1; i >= 0; i--) {
5355         struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5356 
5357         mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5358     }
5359     mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5360     WARN_ON_ONCE(nhgi->adj_index_valid);
5361     kfree(nhgi);
5362 }
5363 
5364 static struct mlxsw_sp_nexthop_group *
5365 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5366 {
5367     struct mlxsw_sp_nexthop_group *nh_grp;
5368     int err;
5369 
5370     nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5371     if (!nh_grp)
5372         return ERR_PTR(-ENOMEM);
5373     INIT_LIST_HEAD(&nh_grp->vr_list);
5374     err = rhashtable_init(&nh_grp->vr_ht,
5375                   &mlxsw_sp_nexthop_group_vr_ht_params);
5376     if (err)
5377         goto err_nexthop_group_vr_ht_init;
5378     INIT_LIST_HEAD(&nh_grp->fib_list);
5379     nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5380     nh_grp->ipv4.fi = fi;
5381     fib_info_hold(fi);
5382 
5383     err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5384     if (err)
5385         goto err_nexthop_group_info_init;
5386 
5387     err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5388     if (err)
5389         goto err_nexthop_group_insert;
5390 
5391     nh_grp->can_destroy = true;
5392 
5393     return nh_grp;
5394 
5395 err_nexthop_group_insert:
5396     mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5397 err_nexthop_group_info_init:
5398     fib_info_put(fi);
5399     rhashtable_destroy(&nh_grp->vr_ht);
5400 err_nexthop_group_vr_ht_init:
5401     kfree(nh_grp);
5402     return ERR_PTR(err);
5403 }
5404 
5405 static void
5406 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5407                 struct mlxsw_sp_nexthop_group *nh_grp)
5408 {
5409     if (!nh_grp->can_destroy)
5410         return;
5411     mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5412     mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5413     fib_info_put(nh_grp->ipv4.fi);
5414     WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5415     rhashtable_destroy(&nh_grp->vr_ht);
5416     kfree(nh_grp);
5417 }
5418 
5419 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5420                        struct mlxsw_sp_fib_entry *fib_entry,
5421                        struct fib_info *fi)
5422 {
5423     struct mlxsw_sp_nexthop_group *nh_grp;
5424 
5425     if (fi->nh) {
5426         nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5427                                fi->nh->id);
5428         if (WARN_ON_ONCE(!nh_grp))
5429             return -EINVAL;
5430         goto out;
5431     }
5432 
5433     nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5434     if (!nh_grp) {
5435         nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5436         if (IS_ERR(nh_grp))
5437             return PTR_ERR(nh_grp);
5438     }
5439 out:
5440     list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5441     fib_entry->nh_group = nh_grp;
5442     return 0;
5443 }
5444 
5445 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5446                     struct mlxsw_sp_fib_entry *fib_entry)
5447 {
5448     struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5449 
5450     list_del(&fib_entry->nexthop_group_node);
5451     if (!list_empty(&nh_grp->fib_list))
5452         return;
5453 
5454     if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5455         mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5456         return;
5457     }
5458 
5459     mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5460 }
5461 
5462 static bool
5463 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5464 {
5465     struct mlxsw_sp_fib4_entry *fib4_entry;
5466 
5467     fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5468                   common);
5469     return !fib4_entry->dscp;
5470 }
5471 
5472 static bool
5473 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5474 {
5475     struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5476 
5477     switch (fib_entry->fib_node->fib->proto) {
5478     case MLXSW_SP_L3_PROTO_IPV4:
5479         if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5480             return false;
5481         break;
5482     case MLXSW_SP_L3_PROTO_IPV6:
5483         break;
5484     }
5485 
5486     switch (fib_entry->type) {
5487     case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5488         return !!nh_group->nhgi->adj_index_valid;
5489     case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5490         return !!nh_group->nhgi->nh_rif;
5491     case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5492     case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5493     case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5494         return true;
5495     default:
5496         return false;
5497     }
5498 }
5499 
5500 static struct mlxsw_sp_nexthop *
5501 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5502              const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5503 {
5504     int i;
5505 
5506     for (i = 0; i < nh_grp->nhgi->count; i++) {
5507         struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5508         struct fib6_info *rt = mlxsw_sp_rt6->rt;
5509 
5510         if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
5511             ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5512                     &rt->fib6_nh->fib_nh_gw6))
5513             return nh;
5514     }
5515 
5516     return NULL;
5517 }
5518 
5519 static void
5520 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5521                       struct fib_entry_notifier_info *fen_info)
5522 {
5523     u32 *p_dst = (u32 *) &fen_info->dst;
5524     struct fib_rt_info fri;
5525 
5526     fri.fi = fen_info->fi;
5527     fri.tb_id = fen_info->tb_id;
5528     fri.dst = cpu_to_be32(*p_dst);
5529     fri.dst_len = fen_info->dst_len;
5530     fri.dscp = fen_info->dscp;
5531     fri.type = fen_info->type;
5532     fri.offload = false;
5533     fri.trap = false;
5534     fri.offload_failed = true;
5535     fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5536 }
5537 
5538 static void
5539 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5540                  struct mlxsw_sp_fib_entry *fib_entry)
5541 {
5542     u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5543     int dst_len = fib_entry->fib_node->key.prefix_len;
5544     struct mlxsw_sp_fib4_entry *fib4_entry;
5545     struct fib_rt_info fri;
5546     bool should_offload;
5547 
5548     should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5549     fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5550                   common);
5551     fri.fi = fib4_entry->fi;
5552     fri.tb_id = fib4_entry->tb_id;
5553     fri.dst = cpu_to_be32(*p_dst);
5554     fri.dst_len = dst_len;
5555     fri.dscp = fib4_entry->dscp;
5556     fri.type = fib4_entry->type;
5557     fri.offload = should_offload;
5558     fri.trap = !should_offload;
5559     fri.offload_failed = false;
5560     fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5561 }
5562 
5563 static void
5564 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5565                    struct mlxsw_sp_fib_entry *fib_entry)
5566 {
5567     u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5568     int dst_len = fib_entry->fib_node->key.prefix_len;
5569     struct mlxsw_sp_fib4_entry *fib4_entry;
5570     struct fib_rt_info fri;
5571 
5572     fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5573                   common);
5574     fri.fi = fib4_entry->fi;
5575     fri.tb_id = fib4_entry->tb_id;
5576     fri.dst = cpu_to_be32(*p_dst);
5577     fri.dst_len = dst_len;
5578     fri.dscp = fib4_entry->dscp;
5579     fri.type = fib4_entry->type;
5580     fri.offload = false;
5581     fri.trap = false;
5582     fri.offload_failed = false;
5583     fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5584 }
5585 
5586 #if IS_ENABLED(CONFIG_IPV6)
5587 static void
5588 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5589                       struct fib6_info **rt_arr,
5590                       unsigned int nrt6)
5591 {
5592     int i;
5593 
5594     /* In IPv6 a multipath route is represented using multiple routes, so
5595      * we need to set the flags on all of them.
5596      */
5597     for (i = 0; i < nrt6; i++)
5598         fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5599                        false, false, true);
5600 }
5601 #else
5602 static void
5603 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5604                       struct fib6_info **rt_arr,
5605                       unsigned int nrt6)
5606 {
5607 }
5608 #endif
5609 
5610 #if IS_ENABLED(CONFIG_IPV6)
5611 static void
5612 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5613                  struct mlxsw_sp_fib_entry *fib_entry)
5614 {
5615     struct mlxsw_sp_fib6_entry *fib6_entry;
5616     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5617     bool should_offload;
5618 
5619     should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5620 
5621     /* In IPv6 a multipath route is represented using multiple routes, so
5622      * we need to set the flags on all of them.
5623      */
5624     fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5625                   common);
5626     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5627         fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5628                        should_offload, !should_offload, false);
5629 }
5630 #else
5631 static void
5632 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5633                  struct mlxsw_sp_fib_entry *fib_entry)
5634 {
5635 }
5636 #endif
5637 
5638 #if IS_ENABLED(CONFIG_IPV6)
5639 static void
5640 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5641                    struct mlxsw_sp_fib_entry *fib_entry)
5642 {
5643     struct mlxsw_sp_fib6_entry *fib6_entry;
5644     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5645 
5646     fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5647                   common);
5648     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5649         fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5650                        false, false, false);
5651 }
5652 #else
5653 static void
5654 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5655                    struct mlxsw_sp_fib_entry *fib_entry)
5656 {
5657 }
5658 #endif
5659 
5660 static void
5661 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5662                 struct mlxsw_sp_fib_entry *fib_entry)
5663 {
5664     switch (fib_entry->fib_node->fib->proto) {
5665     case MLXSW_SP_L3_PROTO_IPV4:
5666         mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5667         break;
5668     case MLXSW_SP_L3_PROTO_IPV6:
5669         mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5670         break;
5671     }
5672 }
5673 
5674 static void
5675 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5676                   struct mlxsw_sp_fib_entry *fib_entry)
5677 {
5678     switch (fib_entry->fib_node->fib->proto) {
5679     case MLXSW_SP_L3_PROTO_IPV4:
5680         mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5681         break;
5682     case MLXSW_SP_L3_PROTO_IPV6:
5683         mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5684         break;
5685     }
5686 }
5687 
5688 static void
5689 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5690                     struct mlxsw_sp_fib_entry *fib_entry,
5691                     enum mlxsw_reg_ralue_op op)
5692 {
5693     switch (op) {
5694     case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5695         mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5696         break;
5697     case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5698         mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5699         break;
5700     default:
5701         break;
5702     }
5703 }
5704 
5705 static void
5706 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5707                   const struct mlxsw_sp_fib_entry *fib_entry,
5708                   enum mlxsw_reg_ralue_op op)
5709 {
5710     struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5711     enum mlxsw_reg_ralxx_protocol proto;
5712     u32 *p_dip;
5713 
5714     proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5715 
5716     switch (fib->proto) {
5717     case MLXSW_SP_L3_PROTO_IPV4:
5718         p_dip = (u32 *) fib_entry->fib_node->key.addr;
5719         mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5720                       fib_entry->fib_node->key.prefix_len,
5721                       *p_dip);
5722         break;
5723     case MLXSW_SP_L3_PROTO_IPV6:
5724         mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5725                       fib_entry->fib_node->key.prefix_len,
5726                       fib_entry->fib_node->key.addr);
5727         break;
5728     }
5729 }
5730 
5731 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5732                     struct mlxsw_sp_fib_entry *fib_entry,
5733                     enum mlxsw_reg_ralue_op op)
5734 {
5735     struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5736     struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5737     char ralue_pl[MLXSW_REG_RALUE_LEN];
5738     enum mlxsw_reg_ralue_trap_action trap_action;
5739     u16 trap_id = 0;
5740     u32 adjacency_index = 0;
5741     u16 ecmp_size = 0;
5742 
5743     /* In case the nexthop group adjacency index is valid, use it
5744      * with provided ECMP size. Otherwise, setup trap and pass
5745      * traffic to kernel.
5746      */
5747     if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5748         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5749         adjacency_index = nhgi->adj_index;
5750         ecmp_size = nhgi->ecmp_size;
5751     } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
5752         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5753         adjacency_index = mlxsw_sp->router->adj_trap_index;
5754         ecmp_size = 1;
5755     } else {
5756         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5757         trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5758     }
5759 
5760     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5761     mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5762                     adjacency_index, ecmp_size);
5763     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5764 }
5765 
5766 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5767                        struct mlxsw_sp_fib_entry *fib_entry,
5768                        enum mlxsw_reg_ralue_op op)
5769 {
5770     struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
5771     enum mlxsw_reg_ralue_trap_action trap_action;
5772     char ralue_pl[MLXSW_REG_RALUE_LEN];
5773     u16 trap_id = 0;
5774     u16 rif_index = 0;
5775 
5776     if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5777         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5778         rif_index = rif->rif_index;
5779     } else {
5780         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5781         trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5782     }
5783 
5784     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5785     mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
5786                        rif_index);
5787     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5788 }
5789 
5790 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5791                       struct mlxsw_sp_fib_entry *fib_entry,
5792                       enum mlxsw_reg_ralue_op op)
5793 {
5794     char ralue_pl[MLXSW_REG_RALUE_LEN];
5795 
5796     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5797     mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5798     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5799 }
5800 
5801 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5802                        struct mlxsw_sp_fib_entry *fib_entry,
5803                        enum mlxsw_reg_ralue_op op)
5804 {
5805     enum mlxsw_reg_ralue_trap_action trap_action;
5806     char ralue_pl[MLXSW_REG_RALUE_LEN];
5807 
5808     trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5809     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5810     mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
5811     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5812 }
5813 
5814 static int
5815 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5816                   struct mlxsw_sp_fib_entry *fib_entry,
5817                   enum mlxsw_reg_ralue_op op)
5818 {
5819     enum mlxsw_reg_ralue_trap_action trap_action;
5820     char ralue_pl[MLXSW_REG_RALUE_LEN];
5821     u16 trap_id;
5822 
5823     trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5824     trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5825 
5826     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5827     mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
5828     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5829 }
5830 
5831 static int
5832 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5833                  struct mlxsw_sp_fib_entry *fib_entry,
5834                  enum mlxsw_reg_ralue_op op)
5835 {
5836     struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5837     const struct mlxsw_sp_ipip_ops *ipip_ops;
5838     char ralue_pl[MLXSW_REG_RALUE_LEN];
5839     int err;
5840 
5841     if (WARN_ON(!ipip_entry))
5842         return -EINVAL;
5843 
5844     ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5845     err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5846                      fib_entry->decap.tunnel_index);
5847     if (err)
5848         return err;
5849 
5850     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5851     mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5852                        fib_entry->decap.tunnel_index);
5853     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5854 }
5855 
5856 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5857                        struct mlxsw_sp_fib_entry *fib_entry,
5858                        enum mlxsw_reg_ralue_op op)
5859 {
5860     char ralue_pl[MLXSW_REG_RALUE_LEN];
5861 
5862     mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5863     mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5864                        fib_entry->decap.tunnel_index);
5865     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5866 }
5867 
5868 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5869                    struct mlxsw_sp_fib_entry *fib_entry,
5870                    enum mlxsw_reg_ralue_op op)
5871 {
5872     switch (fib_entry->type) {
5873     case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5874         return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
5875     case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5876         return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
5877     case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5878         return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
5879     case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5880         return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
5881     case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5882         return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
5883                              op);
5884     case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5885         return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
5886                             fib_entry, op);
5887     case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5888         return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
5889     }
5890     return -EINVAL;
5891 }
5892 
5893 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5894                  struct mlxsw_sp_fib_entry *fib_entry,
5895                  enum mlxsw_reg_ralue_op op)
5896 {
5897     int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
5898 
5899     if (err)
5900         return err;
5901 
5902     mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5903 
5904     return err;
5905 }
5906 
5907 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5908                      struct mlxsw_sp_fib_entry *fib_entry)
5909 {
5910     return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5911                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
5912 }
5913 
5914 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5915                   struct mlxsw_sp_fib_entry *fib_entry)
5916 {
5917     return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5918                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
5919 }
5920 
5921 static int
5922 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5923                  const struct fib_entry_notifier_info *fen_info,
5924                  struct mlxsw_sp_fib_entry *fib_entry)
5925 {
5926     struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5927     union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5928     struct mlxsw_sp_router *router = mlxsw_sp->router;
5929     u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5930     int ifindex = nhgi->nexthops[0].ifindex;
5931     struct mlxsw_sp_ipip_entry *ipip_entry;
5932 
5933     switch (fen_info->type) {
5934     case RTN_LOCAL:
5935         ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5936                                    MLXSW_SP_L3_PROTO_IPV4, dip);
5937         if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5938             fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5939             return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
5940                                  fib_entry,
5941                                  ipip_entry);
5942         }
5943         if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
5944                          MLXSW_SP_L3_PROTO_IPV4,
5945                          &dip)) {
5946             u32 tunnel_index;
5947 
5948             tunnel_index = router->nve_decap_config.tunnel_index;
5949             fib_entry->decap.tunnel_index = tunnel_index;
5950             fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
5951             return 0;
5952         }
5953         fallthrough;
5954     case RTN_BROADCAST:
5955         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5956         return 0;
5957     case RTN_BLACKHOLE:
5958         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5959         return 0;
5960     case RTN_UNREACHABLE:
5961     case RTN_PROHIBIT:
5962         /* Packets hitting these routes need to be trapped, but
5963          * can do so with a lower priority than packets directed
5964          * at the host, so use action type local instead of trap.
5965          */
5966         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5967         return 0;
5968     case RTN_UNICAST:
5969         if (nhgi->gateway)
5970             fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5971         else
5972             fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5973         return 0;
5974     default:
5975         return -EINVAL;
5976     }
5977 }
5978 
5979 static void
5980 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5981                   struct mlxsw_sp_fib_entry *fib_entry)
5982 {
5983     switch (fib_entry->type) {
5984     case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5985         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
5986         break;
5987     default:
5988         break;
5989     }
5990 }
5991 
5992 static void
5993 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5994                    struct mlxsw_sp_fib4_entry *fib4_entry)
5995 {
5996     mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
5997 }
5998 
5999 static struct mlxsw_sp_fib4_entry *
6000 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6001                struct mlxsw_sp_fib_node *fib_node,
6002                const struct fib_entry_notifier_info *fen_info)
6003 {
6004     struct mlxsw_sp_fib4_entry *fib4_entry;
6005     struct mlxsw_sp_fib_entry *fib_entry;
6006     int err;
6007 
6008     fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6009     if (!fib4_entry)
6010         return ERR_PTR(-ENOMEM);
6011     fib_entry = &fib4_entry->common;
6012 
6013     err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6014     if (err)
6015         goto err_nexthop4_group_get;
6016 
6017     err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6018                          fib_node->fib);
6019     if (err)
6020         goto err_nexthop_group_vr_link;
6021 
6022     err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6023     if (err)
6024         goto err_fib4_entry_type_set;
6025 
6026     fib4_entry->fi = fen_info->fi;
6027     fib_info_hold(fib4_entry->fi);
6028     fib4_entry->tb_id = fen_info->tb_id;
6029     fib4_entry->type = fen_info->type;
6030     fib4_entry->dscp = fen_info->dscp;
6031 
6032     fib_entry->fib_node = fib_node;
6033 
6034     return fib4_entry;
6035 
6036 err_fib4_entry_type_set:
6037     mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6038 err_nexthop_group_vr_link:
6039     mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6040 err_nexthop4_group_get:
6041     kfree(fib4_entry);
6042     return ERR_PTR(err);
6043 }
6044 
6045 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6046                     struct mlxsw_sp_fib4_entry *fib4_entry)
6047 {
6048     struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6049 
6050     fib_info_put(fib4_entry->fi);
6051     mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6052     mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6053                      fib_node->fib);
6054     mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6055     kfree(fib4_entry);
6056 }
6057 
6058 static struct mlxsw_sp_fib4_entry *
6059 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6060                const struct fib_entry_notifier_info *fen_info)
6061 {
6062     struct mlxsw_sp_fib4_entry *fib4_entry;
6063     struct mlxsw_sp_fib_node *fib_node;
6064     struct mlxsw_sp_fib *fib;
6065     struct mlxsw_sp_vr *vr;
6066 
6067     vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6068     if (!vr)
6069         return NULL;
6070     fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6071 
6072     fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6073                         sizeof(fen_info->dst),
6074                         fen_info->dst_len);
6075     if (!fib_node)
6076         return NULL;
6077 
6078     fib4_entry = container_of(fib_node->fib_entry,
6079                   struct mlxsw_sp_fib4_entry, common);
6080     if (fib4_entry->tb_id == fen_info->tb_id &&
6081         fib4_entry->dscp == fen_info->dscp &&
6082         fib4_entry->type == fen_info->type &&
6083         fib4_entry->fi == fen_info->fi)
6084         return fib4_entry;
6085 
6086     return NULL;
6087 }
6088 
6089 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6090     .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6091     .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6092     .key_len = sizeof(struct mlxsw_sp_fib_key),
6093     .automatic_shrinking = true,
6094 };
6095 
6096 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6097                     struct mlxsw_sp_fib_node *fib_node)
6098 {
6099     return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6100                       mlxsw_sp_fib_ht_params);
6101 }
6102 
6103 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6104                      struct mlxsw_sp_fib_node *fib_node)
6105 {
6106     rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6107                    mlxsw_sp_fib_ht_params);
6108 }
6109 
6110 static struct mlxsw_sp_fib_node *
6111 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6112              size_t addr_len, unsigned char prefix_len)
6113 {
6114     struct mlxsw_sp_fib_key key;
6115 
6116     memset(&key, 0, sizeof(key));
6117     memcpy(key.addr, addr, addr_len);
6118     key.prefix_len = prefix_len;
6119     return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6120 }
6121 
6122 static struct mlxsw_sp_fib_node *
6123 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6124              size_t addr_len, unsigned char prefix_len)
6125 {
6126     struct mlxsw_sp_fib_node *fib_node;
6127 
6128     fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6129     if (!fib_node)
6130         return NULL;
6131 
6132     list_add(&fib_node->list, &fib->node_list);
6133     memcpy(fib_node->key.addr, addr, addr_len);
6134     fib_node->key.prefix_len = prefix_len;
6135 
6136     return fib_node;
6137 }
6138 
6139 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6140 {
6141     list_del(&fib_node->list);
6142     kfree(fib_node);
6143 }
6144 
6145 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6146                       struct mlxsw_sp_fib_node *fib_node)
6147 {
6148     struct mlxsw_sp_prefix_usage req_prefix_usage;
6149     struct mlxsw_sp_fib *fib = fib_node->fib;
6150     struct mlxsw_sp_lpm_tree *lpm_tree;
6151     int err;
6152 
6153     lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6154     if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6155         goto out;
6156 
6157     mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6158     mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6159     lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6160                      fib->proto);
6161     if (IS_ERR(lpm_tree))
6162         return PTR_ERR(lpm_tree);
6163 
6164     err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6165     if (err)
6166         goto err_lpm_tree_replace;
6167 
6168 out:
6169     lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6170     return 0;
6171 
6172 err_lpm_tree_replace:
6173     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6174     return err;
6175 }
6176 
6177 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6178                      struct mlxsw_sp_fib_node *fib_node)
6179 {
6180     struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6181     struct mlxsw_sp_prefix_usage req_prefix_usage;
6182     struct mlxsw_sp_fib *fib = fib_node->fib;
6183     int err;
6184 
6185     if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6186         return;
6187     /* Try to construct a new LPM tree from the current prefix usage
6188      * minus the unused one. If we fail, continue using the old one.
6189      */
6190     mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6191     mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6192                     fib_node->key.prefix_len);
6193     lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6194                      fib->proto);
6195     if (IS_ERR(lpm_tree))
6196         return;
6197 
6198     err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6199     if (err)
6200         goto err_lpm_tree_replace;
6201 
6202     return;
6203 
6204 err_lpm_tree_replace:
6205     mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6206 }
6207 
6208 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6209                   struct mlxsw_sp_fib_node *fib_node,
6210                   struct mlxsw_sp_fib *fib)
6211 {
6212     int err;
6213 
6214     err = mlxsw_sp_fib_node_insert(fib, fib_node);
6215     if (err)
6216         return err;
6217     fib_node->fib = fib;
6218 
6219     err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6220     if (err)
6221         goto err_fib_lpm_tree_link;
6222 
6223     return 0;
6224 
6225 err_fib_lpm_tree_link:
6226     fib_node->fib = NULL;
6227     mlxsw_sp_fib_node_remove(fib, fib_node);
6228     return err;
6229 }
6230 
6231 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6232                    struct mlxsw_sp_fib_node *fib_node)
6233 {
6234     struct mlxsw_sp_fib *fib = fib_node->fib;
6235 
6236     mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6237     fib_node->fib = NULL;
6238     mlxsw_sp_fib_node_remove(fib, fib_node);
6239 }
6240 
6241 static struct mlxsw_sp_fib_node *
6242 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6243               size_t addr_len, unsigned char prefix_len,
6244               enum mlxsw_sp_l3proto proto)
6245 {
6246     struct mlxsw_sp_fib_node *fib_node;
6247     struct mlxsw_sp_fib *fib;
6248     struct mlxsw_sp_vr *vr;
6249     int err;
6250 
6251     vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6252     if (IS_ERR(vr))
6253         return ERR_CAST(vr);
6254     fib = mlxsw_sp_vr_fib(vr, proto);
6255 
6256     fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6257     if (fib_node)
6258         return fib_node;
6259 
6260     fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6261     if (!fib_node) {
6262         err = -ENOMEM;
6263         goto err_fib_node_create;
6264     }
6265 
6266     err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6267     if (err)
6268         goto err_fib_node_init;
6269 
6270     return fib_node;
6271 
6272 err_fib_node_init:
6273     mlxsw_sp_fib_node_destroy(fib_node);
6274 err_fib_node_create:
6275     mlxsw_sp_vr_put(mlxsw_sp, vr);
6276     return ERR_PTR(err);
6277 }
6278 
6279 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6280                   struct mlxsw_sp_fib_node *fib_node)
6281 {
6282     struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6283 
6284     if (fib_node->fib_entry)
6285         return;
6286     mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6287     mlxsw_sp_fib_node_destroy(fib_node);
6288     mlxsw_sp_vr_put(mlxsw_sp, vr);
6289 }
6290 
6291 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6292                     struct mlxsw_sp_fib_entry *fib_entry)
6293 {
6294     struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6295     int err;
6296 
6297     fib_node->fib_entry = fib_entry;
6298 
6299     err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6300     if (err)
6301         goto err_fib_entry_update;
6302 
6303     return 0;
6304 
6305 err_fib_entry_update:
6306     fib_node->fib_entry = NULL;
6307     return err;
6308 }
6309 
6310 static void
6311 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6312                    struct mlxsw_sp_fib_entry *fib_entry)
6313 {
6314     struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6315 
6316     mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6317     fib_node->fib_entry = NULL;
6318 }
6319 
6320 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6321 {
6322     struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6323     struct mlxsw_sp_fib4_entry *fib4_replaced;
6324 
6325     if (!fib_node->fib_entry)
6326         return true;
6327 
6328     fib4_replaced = container_of(fib_node->fib_entry,
6329                      struct mlxsw_sp_fib4_entry, common);
6330     if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6331         fib4_replaced->tb_id == RT_TABLE_LOCAL)
6332         return false;
6333 
6334     return true;
6335 }
6336 
6337 static int
6338 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6339                  const struct fib_entry_notifier_info *fen_info)
6340 {
6341     struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6342     struct mlxsw_sp_fib_entry *replaced;
6343     struct mlxsw_sp_fib_node *fib_node;
6344     int err;
6345 
6346     if (fen_info->fi->nh &&
6347         !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6348         return 0;
6349 
6350     fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6351                      &fen_info->dst, sizeof(fen_info->dst),
6352                      fen_info->dst_len,
6353                      MLXSW_SP_L3_PROTO_IPV4);
6354     if (IS_ERR(fib_node)) {
6355         dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6356         return PTR_ERR(fib_node);
6357     }
6358 
6359     fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6360     if (IS_ERR(fib4_entry)) {
6361         dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6362         err = PTR_ERR(fib4_entry);
6363         goto err_fib4_entry_create;
6364     }
6365 
6366     if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6367         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6368         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6369         return 0;
6370     }
6371 
6372     replaced = fib_node->fib_entry;
6373     err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6374     if (err) {
6375         dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6376         goto err_fib_node_entry_link;
6377     }
6378 
6379     /* Nothing to replace */
6380     if (!replaced)
6381         return 0;
6382 
6383     mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6384     fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6385                      common);
6386     mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6387 
6388     return 0;
6389 
6390 err_fib_node_entry_link:
6391     fib_node->fib_entry = replaced;
6392     mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6393 err_fib4_entry_create:
6394     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6395     return err;
6396 }
6397 
6398 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6399                      struct fib_entry_notifier_info *fen_info)
6400 {
6401     struct mlxsw_sp_fib4_entry *fib4_entry;
6402     struct mlxsw_sp_fib_node *fib_node;
6403 
6404     fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6405     if (!fib4_entry)
6406         return;
6407     fib_node = fib4_entry->common.fib_node;
6408 
6409     mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6410     mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6411     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6412 }
6413 
6414 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6415 {
6416     /* Multicast routes aren't supported, so ignore them. Neighbour
6417      * Discovery packets are specifically trapped.
6418      */
6419     if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6420         return true;
6421 
6422     /* Cloned routes are irrelevant in the forwarding path. */
6423     if (rt->fib6_flags & RTF_CACHE)
6424         return true;
6425 
6426     return false;
6427 }
6428 
6429 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6430 {
6431     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6432 
6433     mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6434     if (!mlxsw_sp_rt6)
6435         return ERR_PTR(-ENOMEM);
6436 
6437     /* In case of route replace, replaced route is deleted with
6438      * no notification. Take reference to prevent accessing freed
6439      * memory.
6440      */
6441     mlxsw_sp_rt6->rt = rt;
6442     fib6_info_hold(rt);
6443 
6444     return mlxsw_sp_rt6;
6445 }
6446 
6447 #if IS_ENABLED(CONFIG_IPV6)
6448 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6449 {
6450     fib6_info_release(rt);
6451 }
6452 #else
6453 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6454 {
6455 }
6456 #endif
6457 
6458 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6459 {
6460     struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6461 
6462     if (!mlxsw_sp_rt6->rt->nh)
6463         fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6464     mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6465     kfree(mlxsw_sp_rt6);
6466 }
6467 
6468 static struct fib6_info *
6469 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6470 {
6471     return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6472                 list)->rt;
6473 }
6474 
6475 static struct mlxsw_sp_rt6 *
6476 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6477                 const struct fib6_info *rt)
6478 {
6479     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6480 
6481     list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6482         if (mlxsw_sp_rt6->rt == rt)
6483             return mlxsw_sp_rt6;
6484     }
6485 
6486     return NULL;
6487 }
6488 
6489 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6490                     const struct fib6_info *rt,
6491                     enum mlxsw_sp_ipip_type *ret)
6492 {
6493     return rt->fib6_nh->fib_nh_dev &&
6494            mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6495 }
6496 
6497 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6498                   struct mlxsw_sp_nexthop_group *nh_grp,
6499                   struct mlxsw_sp_nexthop *nh,
6500                   const struct fib6_info *rt)
6501 {
6502     struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6503     int err;
6504 
6505     nh->nhgi = nh_grp->nhgi;
6506     nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6507     memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6508 #if IS_ENABLED(CONFIG_IPV6)
6509     nh->neigh_tbl = &nd_tbl;
6510 #endif
6511     mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6512 
6513     list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6514 
6515     if (!dev)
6516         return 0;
6517     nh->ifindex = dev->ifindex;
6518 
6519     err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6520     if (err)
6521         goto err_nexthop_type_init;
6522 
6523     return 0;
6524 
6525 err_nexthop_type_init:
6526     list_del(&nh->router_list_node);
6527     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6528     return err;
6529 }
6530 
6531 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6532                    struct mlxsw_sp_nexthop *nh)
6533 {
6534     mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6535     list_del(&nh->router_list_node);
6536     mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6537 }
6538 
6539 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6540                     const struct fib6_info *rt)
6541 {
6542     return rt->fib6_nh->fib_nh_gw_family ||
6543            mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6544 }
6545 
6546 static int
6547 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6548                   struct mlxsw_sp_nexthop_group *nh_grp,
6549                   struct mlxsw_sp_fib6_entry *fib6_entry)
6550 {
6551     struct mlxsw_sp_nexthop_group_info *nhgi;
6552     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6553     struct mlxsw_sp_nexthop *nh;
6554     int err, i;
6555 
6556     nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6557                GFP_KERNEL);
6558     if (!nhgi)
6559         return -ENOMEM;
6560     nh_grp->nhgi = nhgi;
6561     nhgi->nh_grp = nh_grp;
6562     mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6563                     struct mlxsw_sp_rt6, list);
6564     nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6565     nhgi->count = fib6_entry->nrt6;
6566     for (i = 0; i < nhgi->count; i++) {
6567         struct fib6_info *rt = mlxsw_sp_rt6->rt;
6568 
6569         nh = &nhgi->nexthops[i];
6570         err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6571         if (err)
6572             goto err_nexthop6_init;
6573         mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6574     }
6575     nh_grp->nhgi = nhgi;
6576     err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6577     if (err)
6578         goto err_group_inc;
6579     err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6580     if (err)
6581         goto err_group_refresh;
6582 
6583     return 0;
6584 
6585 err_group_refresh:
6586     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6587 err_group_inc:
6588     i = nhgi->count;
6589 err_nexthop6_init:
6590     for (i--; i >= 0; i--) {
6591         nh = &nhgi->nexthops[i];
6592         mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6593     }
6594     kfree(nhgi);
6595     return err;
6596 }
6597 
6598 static void
6599 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6600                   struct mlxsw_sp_nexthop_group *nh_grp)
6601 {
6602     struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6603     int i;
6604 
6605     mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6606     for (i = nhgi->count - 1; i >= 0; i--) {
6607         struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6608 
6609         mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6610     }
6611     mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6612     WARN_ON_ONCE(nhgi->adj_index_valid);
6613     kfree(nhgi);
6614 }
6615 
6616 static struct mlxsw_sp_nexthop_group *
6617 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6618                    struct mlxsw_sp_fib6_entry *fib6_entry)
6619 {
6620     struct mlxsw_sp_nexthop_group *nh_grp;
6621     int err;
6622 
6623     nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6624     if (!nh_grp)
6625         return ERR_PTR(-ENOMEM);
6626     INIT_LIST_HEAD(&nh_grp->vr_list);
6627     err = rhashtable_init(&nh_grp->vr_ht,
6628                   &mlxsw_sp_nexthop_group_vr_ht_params);
6629     if (err)
6630         goto err_nexthop_group_vr_ht_init;
6631     INIT_LIST_HEAD(&nh_grp->fib_list);
6632     nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6633 
6634     err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6635     if (err)
6636         goto err_nexthop_group_info_init;
6637 
6638     err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6639     if (err)
6640         goto err_nexthop_group_insert;
6641 
6642     nh_grp->can_destroy = true;
6643 
6644     return nh_grp;
6645 
6646 err_nexthop_group_insert:
6647     mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6648 err_nexthop_group_info_init:
6649     rhashtable_destroy(&nh_grp->vr_ht);
6650 err_nexthop_group_vr_ht_init:
6651     kfree(nh_grp);
6652     return ERR_PTR(err);
6653 }
6654 
6655 static void
6656 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6657                 struct mlxsw_sp_nexthop_group *nh_grp)
6658 {
6659     if (!nh_grp->can_destroy)
6660         return;
6661     mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6662     mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6663     WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6664     rhashtable_destroy(&nh_grp->vr_ht);
6665     kfree(nh_grp);
6666 }
6667 
6668 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6669                        struct mlxsw_sp_fib6_entry *fib6_entry)
6670 {
6671     struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6672     struct mlxsw_sp_nexthop_group *nh_grp;
6673 
6674     if (rt->nh) {
6675         nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6676                                rt->nh->id);
6677         if (WARN_ON_ONCE(!nh_grp))
6678             return -EINVAL;
6679         goto out;
6680     }
6681 
6682     nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6683     if (!nh_grp) {
6684         nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6685         if (IS_ERR(nh_grp))
6686             return PTR_ERR(nh_grp);
6687     }
6688 
6689     /* The route and the nexthop are described by the same struct, so we
6690      * need to the update the nexthop offload indication for the new route.
6691      */
6692     __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6693 
6694 out:
6695     list_add_tail(&fib6_entry->common.nexthop_group_node,
6696               &nh_grp->fib_list);
6697     fib6_entry->common.nh_group = nh_grp;
6698 
6699     return 0;
6700 }
6701 
6702 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6703                     struct mlxsw_sp_fib_entry *fib_entry)
6704 {
6705     struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6706 
6707     list_del(&fib_entry->nexthop_group_node);
6708     if (!list_empty(&nh_grp->fib_list))
6709         return;
6710 
6711     if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6712         mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6713         return;
6714     }
6715 
6716     mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6717 }
6718 
6719 static int
6720 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6721                    struct mlxsw_sp_fib6_entry *fib6_entry)
6722 {
6723     struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6724     struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6725     int err;
6726 
6727     mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6728     fib6_entry->common.nh_group = NULL;
6729     list_del(&fib6_entry->common.nexthop_group_node);
6730 
6731     err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6732     if (err)
6733         goto err_nexthop6_group_get;
6734 
6735     err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6736                          fib_node->fib);
6737     if (err)
6738         goto err_nexthop_group_vr_link;
6739 
6740     /* In case this entry is offloaded, then the adjacency index
6741      * currently associated with it in the device's table is that
6742      * of the old group. Start using the new one instead.
6743      */
6744     err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6745     if (err)
6746         goto err_fib_entry_update;
6747 
6748     if (list_empty(&old_nh_grp->fib_list))
6749         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6750 
6751     return 0;
6752 
6753 err_fib_entry_update:
6754     mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6755                      fib_node->fib);
6756 err_nexthop_group_vr_link:
6757     mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6758 err_nexthop6_group_get:
6759     list_add_tail(&fib6_entry->common.nexthop_group_node,
6760               &old_nh_grp->fib_list);
6761     fib6_entry->common.nh_group = old_nh_grp;
6762     mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6763     return err;
6764 }
6765 
6766 static int
6767 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6768                 struct mlxsw_sp_fib6_entry *fib6_entry,
6769                 struct fib6_info **rt_arr, unsigned int nrt6)
6770 {
6771     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6772     int err, i;
6773 
6774     for (i = 0; i < nrt6; i++) {
6775         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6776         if (IS_ERR(mlxsw_sp_rt6)) {
6777             err = PTR_ERR(mlxsw_sp_rt6);
6778             goto err_rt6_unwind;
6779         }
6780 
6781         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6782         fib6_entry->nrt6++;
6783     }
6784 
6785     err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6786     if (err)
6787         goto err_rt6_unwind;
6788 
6789     return 0;
6790 
6791 err_rt6_unwind:
6792     for (; i > 0; i--) {
6793         fib6_entry->nrt6--;
6794         mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6795                            struct mlxsw_sp_rt6, list);
6796         list_del(&mlxsw_sp_rt6->list);
6797         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6798     }
6799     return err;
6800 }
6801 
6802 static void
6803 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6804                 struct mlxsw_sp_fib6_entry *fib6_entry,
6805                 struct fib6_info **rt_arr, unsigned int nrt6)
6806 {
6807     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6808     int i;
6809 
6810     for (i = 0; i < nrt6; i++) {
6811         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6812                                rt_arr[i]);
6813         if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6814             continue;
6815 
6816         fib6_entry->nrt6--;
6817         list_del(&mlxsw_sp_rt6->list);
6818         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6819     }
6820 
6821     mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6822 }
6823 
6824 static int
6825 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
6826                    struct mlxsw_sp_fib_entry *fib_entry,
6827                    const struct fib6_info *rt)
6828 {
6829     struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6830     union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
6831     u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
6832     struct mlxsw_sp_router *router = mlxsw_sp->router;
6833     int ifindex = nhgi->nexthops[0].ifindex;
6834     struct mlxsw_sp_ipip_entry *ipip_entry;
6835 
6836     fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6837     ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6838                                MLXSW_SP_L3_PROTO_IPV6,
6839                                dip);
6840 
6841     if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6842         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6843         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
6844                              ipip_entry);
6845     }
6846     if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6847                      MLXSW_SP_L3_PROTO_IPV6, &dip)) {
6848         u32 tunnel_index;
6849 
6850         tunnel_index = router->nve_decap_config.tunnel_index;
6851         fib_entry->decap.tunnel_index = tunnel_index;
6852         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6853     }
6854 
6855     return 0;
6856 }
6857 
6858 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6859                     struct mlxsw_sp_fib_entry *fib_entry,
6860                     const struct fib6_info *rt)
6861 {
6862     if (rt->fib6_flags & RTF_LOCAL)
6863         return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
6864                               rt);
6865     if (rt->fib6_flags & RTF_ANYCAST)
6866         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6867     else if (rt->fib6_type == RTN_BLACKHOLE)
6868         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6869     else if (rt->fib6_flags & RTF_REJECT)
6870         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6871     else if (fib_entry->nh_group->nhgi->gateway)
6872         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6873     else
6874         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6875 
6876     return 0;
6877 }
6878 
6879 static void
6880 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6881 {
6882     struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6883 
6884     list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6885                  list) {
6886         fib6_entry->nrt6--;
6887         list_del(&mlxsw_sp_rt6->list);
6888         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6889     }
6890 }
6891 
6892 static struct mlxsw_sp_fib6_entry *
6893 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6894                struct mlxsw_sp_fib_node *fib_node,
6895                struct fib6_info **rt_arr, unsigned int nrt6)
6896 {
6897     struct mlxsw_sp_fib6_entry *fib6_entry;
6898     struct mlxsw_sp_fib_entry *fib_entry;
6899     struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6900     int err, i;
6901 
6902     fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6903     if (!fib6_entry)
6904         return ERR_PTR(-ENOMEM);
6905     fib_entry = &fib6_entry->common;
6906 
6907     INIT_LIST_HEAD(&fib6_entry->rt6_list);
6908 
6909     for (i = 0; i < nrt6; i++) {
6910         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6911         if (IS_ERR(mlxsw_sp_rt6)) {
6912             err = PTR_ERR(mlxsw_sp_rt6);
6913             goto err_rt6_unwind;
6914         }
6915         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6916         fib6_entry->nrt6++;
6917     }
6918 
6919     err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6920     if (err)
6921         goto err_rt6_unwind;
6922 
6923     err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6924                          fib_node->fib);
6925     if (err)
6926         goto err_nexthop_group_vr_link;
6927 
6928     err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6929     if (err)
6930         goto err_fib6_entry_type_set;
6931 
6932     fib_entry->fib_node = fib_node;
6933 
6934     return fib6_entry;
6935 
6936 err_fib6_entry_type_set:
6937     mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6938 err_nexthop_group_vr_link:
6939     mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6940 err_rt6_unwind:
6941     for (; i > 0; i--) {
6942         fib6_entry->nrt6--;
6943         mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6944                            struct mlxsw_sp_rt6, list);
6945         list_del(&mlxsw_sp_rt6->list);
6946         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6947     }
6948     kfree(fib6_entry);
6949     return ERR_PTR(err);
6950 }
6951 
6952 static void
6953 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6954                    struct mlxsw_sp_fib6_entry *fib6_entry)
6955 {
6956     mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
6957 }
6958 
6959 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6960                     struct mlxsw_sp_fib6_entry *fib6_entry)
6961 {
6962     struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6963 
6964     mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
6965     mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6966                      fib_node->fib);
6967     mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6968     mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
6969     WARN_ON(fib6_entry->nrt6);
6970     kfree(fib6_entry);
6971 }
6972 
6973 static struct mlxsw_sp_fib6_entry *
6974 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6975                const struct fib6_info *rt)
6976 {
6977     struct mlxsw_sp_fib6_entry *fib6_entry;
6978     struct mlxsw_sp_fib_node *fib_node;
6979     struct mlxsw_sp_fib *fib;
6980     struct fib6_info *cmp_rt;
6981     struct mlxsw_sp_vr *vr;
6982 
6983     vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
6984     if (!vr)
6985         return NULL;
6986     fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
6987 
6988     fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
6989                         sizeof(rt->fib6_dst.addr),
6990                         rt->fib6_dst.plen);
6991     if (!fib_node)
6992         return NULL;
6993 
6994     fib6_entry = container_of(fib_node->fib_entry,
6995                   struct mlxsw_sp_fib6_entry, common);
6996     cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6997     if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
6998         rt->fib6_metric == cmp_rt->fib6_metric &&
6999         mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7000         return fib6_entry;
7001 
7002     return NULL;
7003 }
7004 
7005 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7006 {
7007     struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7008     struct mlxsw_sp_fib6_entry *fib6_replaced;
7009     struct fib6_info *rt, *rt_replaced;
7010 
7011     if (!fib_node->fib_entry)
7012         return true;
7013 
7014     fib6_replaced = container_of(fib_node->fib_entry,
7015                      struct mlxsw_sp_fib6_entry,
7016                      common);
7017     rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7018     rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7019     if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7020         rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7021         return false;
7022 
7023     return true;
7024 }
7025 
7026 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7027                     struct fib6_info **rt_arr,
7028                     unsigned int nrt6)
7029 {
7030     struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7031     struct mlxsw_sp_fib_entry *replaced;
7032     struct mlxsw_sp_fib_node *fib_node;
7033     struct fib6_info *rt = rt_arr[0];
7034     int err;
7035 
7036     if (rt->fib6_src.plen)
7037         return -EINVAL;
7038 
7039     if (mlxsw_sp_fib6_rt_should_ignore(rt))
7040         return 0;
7041 
7042     if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7043         return 0;
7044 
7045     fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7046                      &rt->fib6_dst.addr,
7047                      sizeof(rt->fib6_dst.addr),
7048                      rt->fib6_dst.plen,
7049                      MLXSW_SP_L3_PROTO_IPV6);
7050     if (IS_ERR(fib_node))
7051         return PTR_ERR(fib_node);
7052 
7053     fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7054                         nrt6);
7055     if (IS_ERR(fib6_entry)) {
7056         err = PTR_ERR(fib6_entry);
7057         goto err_fib6_entry_create;
7058     }
7059 
7060     if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7061         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7062         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7063         return 0;
7064     }
7065 
7066     replaced = fib_node->fib_entry;
7067     err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7068     if (err)
7069         goto err_fib_node_entry_link;
7070 
7071     /* Nothing to replace */
7072     if (!replaced)
7073         return 0;
7074 
7075     mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7076     fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7077                      common);
7078     mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7079 
7080     return 0;
7081 
7082 err_fib_node_entry_link:
7083     fib_node->fib_entry = replaced;
7084     mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7085 err_fib6_entry_create:
7086     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7087     return err;
7088 }
7089 
7090 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7091                        struct fib6_info **rt_arr,
7092                        unsigned int nrt6)
7093 {
7094     struct mlxsw_sp_fib6_entry *fib6_entry;
7095     struct mlxsw_sp_fib_node *fib_node;
7096     struct fib6_info *rt = rt_arr[0];
7097     int err;
7098 
7099     if (rt->fib6_src.plen)
7100         return -EINVAL;
7101 
7102     if (mlxsw_sp_fib6_rt_should_ignore(rt))
7103         return 0;
7104 
7105     fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7106                      &rt->fib6_dst.addr,
7107                      sizeof(rt->fib6_dst.addr),
7108                      rt->fib6_dst.plen,
7109                      MLXSW_SP_L3_PROTO_IPV6);
7110     if (IS_ERR(fib_node))
7111         return PTR_ERR(fib_node);
7112 
7113     if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7114         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7115         return -EINVAL;
7116     }
7117 
7118     fib6_entry = container_of(fib_node->fib_entry,
7119                   struct mlxsw_sp_fib6_entry, common);
7120     err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7121                           nrt6);
7122     if (err)
7123         goto err_fib6_entry_nexthop_add;
7124 
7125     return 0;
7126 
7127 err_fib6_entry_nexthop_add:
7128     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7129     return err;
7130 }
7131 
7132 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7133                      struct fib6_info **rt_arr,
7134                      unsigned int nrt6)
7135 {
7136     struct mlxsw_sp_fib6_entry *fib6_entry;
7137     struct mlxsw_sp_fib_node *fib_node;
7138     struct fib6_info *rt = rt_arr[0];
7139 
7140     if (mlxsw_sp_fib6_rt_should_ignore(rt))
7141         return;
7142 
7143     /* Multipath routes are first added to the FIB trie and only then
7144      * notified. If we vetoed the addition, we will get a delete
7145      * notification for a route we do not have. Therefore, do not warn if
7146      * route was not found.
7147      */
7148     fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7149     if (!fib6_entry)
7150         return;
7151 
7152     /* If not all the nexthops are deleted, then only reduce the nexthop
7153      * group.
7154      */
7155     if (nrt6 != fib6_entry->nrt6) {
7156         mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7157                         nrt6);
7158         return;
7159     }
7160 
7161     fib_node = fib6_entry->common.fib_node;
7162 
7163     mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7164     mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7165     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7166 }
7167 
7168 static struct mlxsw_sp_mr_table *
7169 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7170 {
7171     if (family == RTNL_FAMILY_IPMR)
7172         return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7173     else
7174         return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7175 }
7176 
7177 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7178                      struct mfc_entry_notifier_info *men_info,
7179                      bool replace)
7180 {
7181     struct mlxsw_sp_mr_table *mrt;
7182     struct mlxsw_sp_vr *vr;
7183 
7184     vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7185     if (IS_ERR(vr))
7186         return PTR_ERR(vr);
7187 
7188     mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7189     return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7190 }
7191 
7192 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7193                       struct mfc_entry_notifier_info *men_info)
7194 {
7195     struct mlxsw_sp_mr_table *mrt;
7196     struct mlxsw_sp_vr *vr;
7197 
7198     vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7199     if (WARN_ON(!vr))
7200         return;
7201 
7202     mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7203     mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7204     mlxsw_sp_vr_put(mlxsw_sp, vr);
7205 }
7206 
7207 static int
7208 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7209                   struct vif_entry_notifier_info *ven_info)
7210 {
7211     struct mlxsw_sp_mr_table *mrt;
7212     struct mlxsw_sp_rif *rif;
7213     struct mlxsw_sp_vr *vr;
7214 
7215     vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7216     if (IS_ERR(vr))
7217         return PTR_ERR(vr);
7218 
7219     mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7220     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7221     return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7222                    ven_info->vif_index,
7223                    ven_info->vif_flags, rif);
7224 }
7225 
7226 static void
7227 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7228                   struct vif_entry_notifier_info *ven_info)
7229 {
7230     struct mlxsw_sp_mr_table *mrt;
7231     struct mlxsw_sp_vr *vr;
7232 
7233     vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7234     if (WARN_ON(!vr))
7235         return;
7236 
7237     mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7238     mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7239     mlxsw_sp_vr_put(mlxsw_sp, vr);
7240 }
7241 
7242 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7243                      struct mlxsw_sp_fib_node *fib_node)
7244 {
7245     struct mlxsw_sp_fib4_entry *fib4_entry;
7246 
7247     fib4_entry = container_of(fib_node->fib_entry,
7248                   struct mlxsw_sp_fib4_entry, common);
7249     mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7250     mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7251     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7252 }
7253 
7254 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7255                      struct mlxsw_sp_fib_node *fib_node)
7256 {
7257     struct mlxsw_sp_fib6_entry *fib6_entry;
7258 
7259     fib6_entry = container_of(fib_node->fib_entry,
7260                   struct mlxsw_sp_fib6_entry, common);
7261     mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7262     mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7263     mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7264 }
7265 
7266 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7267                     struct mlxsw_sp_fib_node *fib_node)
7268 {
7269     switch (fib_node->fib->proto) {
7270     case MLXSW_SP_L3_PROTO_IPV4:
7271         mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7272         break;
7273     case MLXSW_SP_L3_PROTO_IPV6:
7274         mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7275         break;
7276     }
7277 }
7278 
7279 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7280                   struct mlxsw_sp_vr *vr,
7281                   enum mlxsw_sp_l3proto proto)
7282 {
7283     struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7284     struct mlxsw_sp_fib_node *fib_node, *tmp;
7285 
7286     list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7287         bool do_break = &tmp->list == &fib->node_list;
7288 
7289         mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7290         if (do_break)
7291             break;
7292     }
7293 }
7294 
7295 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7296 {
7297     int i, j;
7298 
7299     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
7300         struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7301 
7302         if (!mlxsw_sp_vr_is_used(vr))
7303             continue;
7304 
7305         for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7306             mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7307         mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7308 
7309         /* If virtual router was only used for IPv4, then it's no
7310          * longer used.
7311          */
7312         if (!mlxsw_sp_vr_is_used(vr))
7313             continue;
7314         mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7315     }
7316 }
7317 
7318 struct mlxsw_sp_fib6_event_work {
7319     struct fib6_info **rt_arr;
7320     unsigned int nrt6;
7321 };
7322 
7323 struct mlxsw_sp_fib_event_work {
7324     struct work_struct work;
7325     union {
7326         struct mlxsw_sp_fib6_event_work fib6_work;
7327         struct fib_entry_notifier_info fen_info;
7328         struct fib_rule_notifier_info fr_info;
7329         struct fib_nh_notifier_info fnh_info;
7330         struct mfc_entry_notifier_info men_info;
7331         struct vif_entry_notifier_info ven_info;
7332     };
7333     struct mlxsw_sp *mlxsw_sp;
7334     unsigned long event;
7335 };
7336 
7337 static int
7338 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7339                    struct fib6_entry_notifier_info *fen6_info)
7340 {
7341     struct fib6_info *rt = fen6_info->rt;
7342     struct fib6_info **rt_arr;
7343     struct fib6_info *iter;
7344     unsigned int nrt6;
7345     int i = 0;
7346 
7347     nrt6 = fen6_info->nsiblings + 1;
7348 
7349     rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7350     if (!rt_arr)
7351         return -ENOMEM;
7352 
7353     fib6_work->rt_arr = rt_arr;
7354     fib6_work->nrt6 = nrt6;
7355 
7356     rt_arr[0] = rt;
7357     fib6_info_hold(rt);
7358 
7359     if (!fen6_info->nsiblings)
7360         return 0;
7361 
7362     list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7363         if (i == fen6_info->nsiblings)
7364             break;
7365 
7366         rt_arr[i + 1] = iter;
7367         fib6_info_hold(iter);
7368         i++;
7369     }
7370     WARN_ON_ONCE(i != fen6_info->nsiblings);
7371 
7372     return 0;
7373 }
7374 
7375 static void
7376 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7377 {
7378     int i;
7379 
7380     for (i = 0; i < fib6_work->nrt6; i++)
7381         mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7382     kfree(fib6_work->rt_arr);
7383 }
7384 
7385 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7386 {
7387     struct mlxsw_sp_fib_event_work *fib_work =
7388         container_of(work, struct mlxsw_sp_fib_event_work, work);
7389     struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7390     int err;
7391 
7392     mutex_lock(&mlxsw_sp->router->lock);
7393     mlxsw_sp_span_respin(mlxsw_sp);
7394 
7395     switch (fib_work->event) {
7396     case FIB_EVENT_ENTRY_REPLACE:
7397         err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7398                            &fib_work->fen_info);
7399         if (err) {
7400             dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7401             mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7402                                   &fib_work->fen_info);
7403         }
7404         fib_info_put(fib_work->fen_info.fi);
7405         break;
7406     case FIB_EVENT_ENTRY_DEL:
7407         mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7408         fib_info_put(fib_work->fen_info.fi);
7409         break;
7410     case FIB_EVENT_NH_ADD:
7411     case FIB_EVENT_NH_DEL:
7412         mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7413                     fib_work->fnh_info.fib_nh);
7414         fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7415         break;
7416     }
7417     mutex_unlock(&mlxsw_sp->router->lock);
7418     kfree(fib_work);
7419 }
7420 
7421 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7422 {
7423     struct mlxsw_sp_fib_event_work *fib_work =
7424             container_of(work, struct mlxsw_sp_fib_event_work, work);
7425     struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7426     struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7427     int err;
7428 
7429     mutex_lock(&mlxsw_sp->router->lock);
7430     mlxsw_sp_span_respin(mlxsw_sp);
7431 
7432     switch (fib_work->event) {
7433     case FIB_EVENT_ENTRY_REPLACE:
7434         err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7435                            fib6_work->rt_arr,
7436                            fib6_work->nrt6);
7437         if (err) {
7438             dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7439             mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7440                                   fib6_work->rt_arr,
7441                                   fib6_work->nrt6);
7442         }
7443         mlxsw_sp_router_fib6_work_fini(fib6_work);
7444         break;
7445     case FIB_EVENT_ENTRY_APPEND:
7446         err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7447                           fib6_work->rt_arr,
7448                           fib6_work->nrt6);
7449         if (err) {
7450             dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7451             mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7452                                   fib6_work->rt_arr,
7453                                   fib6_work->nrt6);
7454         }
7455         mlxsw_sp_router_fib6_work_fini(fib6_work);
7456         break;
7457     case FIB_EVENT_ENTRY_DEL:
7458         mlxsw_sp_router_fib6_del(mlxsw_sp,
7459                      fib6_work->rt_arr,
7460                      fib6_work->nrt6);
7461         mlxsw_sp_router_fib6_work_fini(fib6_work);
7462         break;
7463     }
7464     mutex_unlock(&mlxsw_sp->router->lock);
7465     kfree(fib_work);
7466 }
7467 
7468 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7469 {
7470     struct mlxsw_sp_fib_event_work *fib_work =
7471         container_of(work, struct mlxsw_sp_fib_event_work, work);
7472     struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7473     bool replace;
7474     int err;
7475 
7476     rtnl_lock();
7477     mutex_lock(&mlxsw_sp->router->lock);
7478     switch (fib_work->event) {
7479     case FIB_EVENT_ENTRY_REPLACE:
7480     case FIB_EVENT_ENTRY_ADD:
7481         replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7482 
7483         err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7484                         replace);
7485         if (err)
7486             dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7487         mr_cache_put(fib_work->men_info.mfc);
7488         break;
7489     case FIB_EVENT_ENTRY_DEL:
7490         mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7491         mr_cache_put(fib_work->men_info.mfc);
7492         break;
7493     case FIB_EVENT_VIF_ADD:
7494         err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7495                             &fib_work->ven_info);
7496         if (err)
7497             dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7498         dev_put(fib_work->ven_info.dev);
7499         break;
7500     case FIB_EVENT_VIF_DEL:
7501         mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7502                           &fib_work->ven_info);
7503         dev_put(fib_work->ven_info.dev);
7504         break;
7505     }
7506     mutex_unlock(&mlxsw_sp->router->lock);
7507     rtnl_unlock();
7508     kfree(fib_work);
7509 }
7510 
7511 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7512                        struct fib_notifier_info *info)
7513 {
7514     struct fib_entry_notifier_info *fen_info;
7515     struct fib_nh_notifier_info *fnh_info;
7516 
7517     switch (fib_work->event) {
7518     case FIB_EVENT_ENTRY_REPLACE:
7519     case FIB_EVENT_ENTRY_DEL:
7520         fen_info = container_of(info, struct fib_entry_notifier_info,
7521                     info);
7522         fib_work->fen_info = *fen_info;
7523         /* Take reference on fib_info to prevent it from being
7524          * freed while work is queued. Release it afterwards.
7525          */
7526         fib_info_hold(fib_work->fen_info.fi);
7527         break;
7528     case FIB_EVENT_NH_ADD:
7529     case FIB_EVENT_NH_DEL:
7530         fnh_info = container_of(info, struct fib_nh_notifier_info,
7531                     info);
7532         fib_work->fnh_info = *fnh_info;
7533         fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7534         break;
7535     }
7536 }
7537 
7538 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7539                       struct fib_notifier_info *info)
7540 {
7541     struct fib6_entry_notifier_info *fen6_info;
7542     int err;
7543 
7544     switch (fib_work->event) {
7545     case FIB_EVENT_ENTRY_REPLACE:
7546     case FIB_EVENT_ENTRY_APPEND:
7547     case FIB_EVENT_ENTRY_DEL:
7548         fen6_info = container_of(info, struct fib6_entry_notifier_info,
7549                      info);
7550         err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7551                              fen6_info);
7552         if (err)
7553             return err;
7554         break;
7555     }
7556 
7557     return 0;
7558 }
7559 
7560 static void
7561 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7562                 struct fib_notifier_info *info)
7563 {
7564     switch (fib_work->event) {
7565     case FIB_EVENT_ENTRY_REPLACE:
7566     case FIB_EVENT_ENTRY_ADD:
7567     case FIB_EVENT_ENTRY_DEL:
7568         memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7569         mr_cache_hold(fib_work->men_info.mfc);
7570         break;
7571     case FIB_EVENT_VIF_ADD:
7572     case FIB_EVENT_VIF_DEL:
7573         memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7574         dev_hold(fib_work->ven_info.dev);
7575         break;
7576     }
7577 }
7578 
7579 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7580                       struct fib_notifier_info *info,
7581                       struct mlxsw_sp *mlxsw_sp)
7582 {
7583     struct netlink_ext_ack *extack = info->extack;
7584     struct fib_rule_notifier_info *fr_info;
7585     struct fib_rule *rule;
7586     int err = 0;
7587 
7588     /* nothing to do at the moment */
7589     if (event == FIB_EVENT_RULE_DEL)
7590         return 0;
7591 
7592     fr_info = container_of(info, struct fib_rule_notifier_info, info);
7593     rule = fr_info->rule;
7594 
7595     /* Rule only affects locally generated traffic */
7596     if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7597         return 0;
7598 
7599     switch (info->family) {
7600     case AF_INET:
7601         if (!fib4_rule_default(rule) && !rule->l3mdev)
7602             err = -EOPNOTSUPP;
7603         break;
7604     case AF_INET6:
7605         if (!fib6_rule_default(rule) && !rule->l3mdev)
7606             err = -EOPNOTSUPP;
7607         break;
7608     case RTNL_FAMILY_IPMR:
7609         if (!ipmr_rule_default(rule) && !rule->l3mdev)
7610             err = -EOPNOTSUPP;
7611         break;
7612     case RTNL_FAMILY_IP6MR:
7613         if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7614             err = -EOPNOTSUPP;
7615         break;
7616     }
7617 
7618     if (err < 0)
7619         NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7620 
7621     return err;
7622 }
7623 
7624 /* Called with rcu_read_lock() */
7625 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7626                      unsigned long event, void *ptr)
7627 {
7628     struct mlxsw_sp_fib_event_work *fib_work;
7629     struct fib_notifier_info *info = ptr;
7630     struct mlxsw_sp_router *router;
7631     int err;
7632 
7633     if ((info->family != AF_INET && info->family != AF_INET6 &&
7634          info->family != RTNL_FAMILY_IPMR &&
7635          info->family != RTNL_FAMILY_IP6MR))
7636         return NOTIFY_DONE;
7637 
7638     router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7639 
7640     switch (event) {
7641     case FIB_EVENT_RULE_ADD:
7642     case FIB_EVENT_RULE_DEL:
7643         err = mlxsw_sp_router_fib_rule_event(event, info,
7644                              router->mlxsw_sp);
7645         return notifier_from_errno(err);
7646     case FIB_EVENT_ENTRY_ADD:
7647     case FIB_EVENT_ENTRY_REPLACE:
7648     case FIB_EVENT_ENTRY_APPEND:
7649         if (info->family == AF_INET) {
7650             struct fib_entry_notifier_info *fen_info = ptr;
7651 
7652             if (fen_info->fi->fib_nh_is_v6) {
7653                 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7654                 return notifier_from_errno(-EINVAL);
7655             }
7656         }
7657         break;
7658     }
7659 
7660     fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7661     if (!fib_work)
7662         return NOTIFY_BAD;
7663 
7664     fib_work->mlxsw_sp = router->mlxsw_sp;
7665     fib_work->event = event;
7666 
7667     switch (info->family) {
7668     case AF_INET:
7669         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7670         mlxsw_sp_router_fib4_event(fib_work, info);
7671         break;
7672     case AF_INET6:
7673         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7674         err = mlxsw_sp_router_fib6_event(fib_work, info);
7675         if (err)
7676             goto err_fib_event;
7677         break;
7678     case RTNL_FAMILY_IP6MR:
7679     case RTNL_FAMILY_IPMR:
7680         INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7681         mlxsw_sp_router_fibmr_event(fib_work, info);
7682         break;
7683     }
7684 
7685     mlxsw_core_schedule_work(&fib_work->work);
7686 
7687     return NOTIFY_DONE;
7688 
7689 err_fib_event:
7690     kfree(fib_work);
7691     return NOTIFY_BAD;
7692 }
7693 
7694 static struct mlxsw_sp_rif *
7695 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7696              const struct net_device *dev)
7697 {
7698     int i;
7699 
7700     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7701         if (mlxsw_sp->router->rifs[i] &&
7702             mlxsw_sp->router->rifs[i]->dev == dev)
7703             return mlxsw_sp->router->rifs[i];
7704 
7705     return NULL;
7706 }
7707 
7708 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7709              const struct net_device *dev)
7710 {
7711     struct mlxsw_sp_rif *rif;
7712 
7713     mutex_lock(&mlxsw_sp->router->lock);
7714     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7715     mutex_unlock(&mlxsw_sp->router->lock);
7716 
7717     return rif;
7718 }
7719 
7720 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7721 {
7722     struct mlxsw_sp_rif *rif;
7723     u16 vid = 0;
7724 
7725     mutex_lock(&mlxsw_sp->router->lock);
7726     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7727     if (!rif)
7728         goto out;
7729 
7730     /* We only return the VID for VLAN RIFs. Otherwise we return an
7731      * invalid value (0).
7732      */
7733     if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7734         goto out;
7735 
7736     vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7737 
7738 out:
7739     mutex_unlock(&mlxsw_sp->router->lock);
7740     return vid;
7741 }
7742 
7743 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7744 {
7745     char ritr_pl[MLXSW_REG_RITR_LEN];
7746     int err;
7747 
7748     mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7749     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7750     if (err)
7751         return err;
7752 
7753     mlxsw_reg_ritr_enable_set(ritr_pl, false);
7754     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7755 }
7756 
7757 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7758                       struct mlxsw_sp_rif *rif)
7759 {
7760     mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7761     mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7762     mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7763 }
7764 
7765 static bool
7766 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7767                unsigned long event)
7768 {
7769     struct inet6_dev *inet6_dev;
7770     bool addr_list_empty = true;
7771     struct in_device *idev;
7772 
7773     switch (event) {
7774     case NETDEV_UP:
7775         return rif == NULL;
7776     case NETDEV_DOWN:
7777         rcu_read_lock();
7778         idev = __in_dev_get_rcu(dev);
7779         if (idev && idev->ifa_list)
7780             addr_list_empty = false;
7781 
7782         inet6_dev = __in6_dev_get(dev);
7783         if (addr_list_empty && inet6_dev &&
7784             !list_empty(&inet6_dev->addr_list))
7785             addr_list_empty = false;
7786         rcu_read_unlock();
7787 
7788         /* macvlans do not have a RIF, but rather piggy back on the
7789          * RIF of their lower device.
7790          */
7791         if (netif_is_macvlan(dev) && addr_list_empty)
7792             return true;
7793 
7794         if (rif && addr_list_empty &&
7795             !netif_is_l3_slave(rif->dev))
7796             return true;
7797         /* It is possible we already removed the RIF ourselves
7798          * if it was assigned to a netdev that is now a bridge
7799          * or LAG slave.
7800          */
7801         return false;
7802     }
7803 
7804     return false;
7805 }
7806 
7807 static enum mlxsw_sp_rif_type
7808 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7809               const struct net_device *dev)
7810 {
7811     enum mlxsw_sp_fid_type type;
7812 
7813     if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7814         return MLXSW_SP_RIF_TYPE_IPIP_LB;
7815 
7816     /* Otherwise RIF type is derived from the type of the underlying FID. */
7817     if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7818         type = MLXSW_SP_FID_TYPE_8021Q;
7819     else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7820         type = MLXSW_SP_FID_TYPE_8021Q;
7821     else if (netif_is_bridge_master(dev))
7822         type = MLXSW_SP_FID_TYPE_8021D;
7823     else
7824         type = MLXSW_SP_FID_TYPE_RFID;
7825 
7826     return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7827 }
7828 
7829 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
7830 {
7831     int i;
7832 
7833     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
7834         if (!mlxsw_sp->router->rifs[i]) {
7835             *p_rif_index = i;
7836             return 0;
7837         }
7838     }
7839 
7840     return -ENOBUFS;
7841 }
7842 
7843 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7844                            u16 vr_id,
7845                            struct net_device *l3_dev)
7846 {
7847     struct mlxsw_sp_rif *rif;
7848 
7849     rif = kzalloc(rif_size, GFP_KERNEL);
7850     if (!rif)
7851         return NULL;
7852 
7853     INIT_LIST_HEAD(&rif->nexthop_list);
7854     INIT_LIST_HEAD(&rif->neigh_list);
7855     if (l3_dev) {
7856         ether_addr_copy(rif->addr, l3_dev->dev_addr);
7857         rif->mtu = l3_dev->mtu;
7858         rif->dev = l3_dev;
7859     }
7860     rif->vr_id = vr_id;
7861     rif->rif_index = rif_index;
7862 
7863     return rif;
7864 }
7865 
7866 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7867                        u16 rif_index)
7868 {
7869     return mlxsw_sp->router->rifs[rif_index];
7870 }
7871 
7872 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7873 {
7874     return rif->rif_index;
7875 }
7876 
7877 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7878 {
7879     return lb_rif->common.rif_index;
7880 }
7881 
7882 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7883 {
7884     u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
7885     struct mlxsw_sp_vr *ul_vr;
7886 
7887     ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7888     if (WARN_ON(IS_ERR(ul_vr)))
7889         return 0;
7890 
7891     return ul_vr->id;
7892 }
7893 
7894 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7895 {
7896     return lb_rif->ul_rif_id;
7897 }
7898 
7899 static bool
7900 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
7901 {
7902     return mlxsw_sp_rif_counter_valid_get(rif,
7903                           MLXSW_SP_RIF_COUNTER_EGRESS) &&
7904            mlxsw_sp_rif_counter_valid_get(rif,
7905                           MLXSW_SP_RIF_COUNTER_INGRESS);
7906 }
7907 
7908 static int
7909 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
7910 {
7911     int err;
7912 
7913     err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7914     if (err)
7915         return err;
7916 
7917     /* Clear stale data. */
7918     err = mlxsw_sp_rif_counter_fetch_clear(rif,
7919                            MLXSW_SP_RIF_COUNTER_INGRESS,
7920                            NULL);
7921     if (err)
7922         goto err_clear_ingress;
7923 
7924     err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7925     if (err)
7926         goto err_alloc_egress;
7927 
7928     /* Clear stale data. */
7929     err = mlxsw_sp_rif_counter_fetch_clear(rif,
7930                            MLXSW_SP_RIF_COUNTER_EGRESS,
7931                            NULL);
7932     if (err)
7933         goto err_clear_egress;
7934 
7935     return 0;
7936 
7937 err_clear_egress:
7938     mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7939 err_alloc_egress:
7940 err_clear_ingress:
7941     mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7942     return err;
7943 }
7944 
7945 static void
7946 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
7947 {
7948     mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7949     mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7950 }
7951 
7952 static void
7953 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
7954                       struct netdev_notifier_offload_xstats_info *info)
7955 {
7956     if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
7957         return;
7958     netdev_offload_xstats_report_used(info->report_used);
7959 }
7960 
7961 static int
7962 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
7963                     struct rtnl_hw_stats64 *p_stats)
7964 {
7965     struct mlxsw_sp_rif_counter_set_basic ingress;
7966     struct mlxsw_sp_rif_counter_set_basic egress;
7967     int err;
7968 
7969     err = mlxsw_sp_rif_counter_fetch_clear(rif,
7970                            MLXSW_SP_RIF_COUNTER_INGRESS,
7971                            &ingress);
7972     if (err)
7973         return err;
7974 
7975     err = mlxsw_sp_rif_counter_fetch_clear(rif,
7976                            MLXSW_SP_RIF_COUNTER_EGRESS,
7977                            &egress);
7978     if (err)
7979         return err;
7980 
7981 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)      \
7982         ((SET.good_unicast_ ## SFX) +       \
7983          (SET.good_multicast_ ## SFX) +     \
7984          (SET.good_broadcast_ ## SFX))
7985 
7986     p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
7987     p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
7988     p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
7989     p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
7990     p_stats->rx_errors = ingress.error_packets;
7991     p_stats->tx_errors = egress.error_packets;
7992     p_stats->rx_dropped = ingress.discard_packets;
7993     p_stats->tx_dropped = egress.discard_packets;
7994     p_stats->multicast = ingress.good_multicast_packets +
7995                  ingress.good_broadcast_packets;
7996 
7997 #undef MLXSW_SP_ROUTER_ALL_GOOD
7998 
7999     return 0;
8000 }
8001 
8002 static int
8003 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8004                        struct netdev_notifier_offload_xstats_info *info)
8005 {
8006     struct rtnl_hw_stats64 stats = {};
8007     int err;
8008 
8009     if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8010         return 0;
8011 
8012     err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8013     if (err)
8014         return err;
8015 
8016     netdev_offload_xstats_report_delta(info->report_delta, &stats);
8017     return 0;
8018 }
8019 
8020 struct mlxsw_sp_router_hwstats_notify_work {
8021     struct work_struct work;
8022     struct net_device *dev;
8023 };
8024 
8025 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8026 {
8027     struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8028         container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8029                  work);
8030 
8031     rtnl_lock();
8032     rtnl_offload_xstats_notify(hws_work->dev);
8033     rtnl_unlock();
8034     dev_put(hws_work->dev);
8035     kfree(hws_work);
8036 }
8037 
8038 static void
8039 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8040 {
8041     struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8042 
8043     /* To collect notification payload, the core ends up sending another
8044      * notifier block message, which would deadlock on the attempt to
8045      * acquire the router lock again. Just postpone the notification until
8046      * later.
8047      */
8048 
8049     hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8050     if (!hws_work)
8051         return;
8052 
8053     INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8054     dev_hold(dev);
8055     hws_work->dev = dev;
8056     mlxsw_core_schedule_work(&hws_work->work);
8057 }
8058 
8059 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8060 {
8061     return rif->dev->ifindex;
8062 }
8063 
8064 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
8065 {
8066     return rif->dev;
8067 }
8068 
8069 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8070 {
8071     struct rtnl_hw_stats64 stats = {};
8072 
8073     if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8074         netdev_offload_xstats_push_delta(rif->dev,
8075                          NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8076                          &stats);
8077 }
8078 
8079 static struct mlxsw_sp_rif *
8080 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8081             const struct mlxsw_sp_rif_params *params,
8082             struct netlink_ext_ack *extack)
8083 {
8084     u32 tb_id = l3mdev_fib_table(params->dev);
8085     const struct mlxsw_sp_rif_ops *ops;
8086     struct mlxsw_sp_fid *fid = NULL;
8087     enum mlxsw_sp_rif_type type;
8088     struct mlxsw_sp_rif *rif;
8089     struct mlxsw_sp_vr *vr;
8090     u16 rif_index;
8091     int i, err;
8092 
8093     type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8094     ops = mlxsw_sp->router->rif_ops_arr[type];
8095 
8096     vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8097     if (IS_ERR(vr))
8098         return ERR_CAST(vr);
8099     vr->rif_count++;
8100 
8101     err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
8102     if (err) {
8103         NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8104         goto err_rif_index_alloc;
8105     }
8106 
8107     rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
8108     if (!rif) {
8109         err = -ENOMEM;
8110         goto err_rif_alloc;
8111     }
8112     dev_hold(rif->dev);
8113     mlxsw_sp->router->rifs[rif_index] = rif;
8114     rif->mlxsw_sp = mlxsw_sp;
8115     rif->ops = ops;
8116 
8117     if (ops->fid_get) {
8118         fid = ops->fid_get(rif, extack);
8119         if (IS_ERR(fid)) {
8120             err = PTR_ERR(fid);
8121             goto err_fid_get;
8122         }
8123         rif->fid = fid;
8124     }
8125 
8126     if (ops->setup)
8127         ops->setup(rif, params);
8128 
8129     err = ops->configure(rif, extack);
8130     if (err)
8131         goto err_configure;
8132 
8133     for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8134         err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8135         if (err)
8136             goto err_mr_rif_add;
8137     }
8138 
8139     if (netdev_offload_xstats_enabled(rif->dev,
8140                       NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8141         err = mlxsw_sp_router_port_l3_stats_enable(rif);
8142         if (err)
8143             goto err_stats_enable;
8144         mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
8145     } else {
8146         mlxsw_sp_rif_counters_alloc(rif);
8147     }
8148 
8149     atomic_inc(&mlxsw_sp->router->rifs_count);
8150     return rif;
8151 
8152 err_stats_enable:
8153 err_mr_rif_add:
8154     for (i--; i >= 0; i--)
8155         mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8156     ops->deconfigure(rif);
8157 err_configure:
8158     if (fid)
8159         mlxsw_sp_fid_put(fid);
8160 err_fid_get:
8161     mlxsw_sp->router->rifs[rif_index] = NULL;
8162     dev_put(rif->dev);
8163     kfree(rif);
8164 err_rif_alloc:
8165 err_rif_index_alloc:
8166     vr->rif_count--;
8167     mlxsw_sp_vr_put(mlxsw_sp, vr);
8168     return ERR_PTR(err);
8169 }
8170 
8171 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8172 {
8173     const struct mlxsw_sp_rif_ops *ops = rif->ops;
8174     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8175     struct mlxsw_sp_fid *fid = rif->fid;
8176     struct mlxsw_sp_vr *vr;
8177     int i;
8178 
8179     atomic_dec(&mlxsw_sp->router->rifs_count);
8180     mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8181     vr = &mlxsw_sp->router->vrs[rif->vr_id];
8182 
8183     if (netdev_offload_xstats_enabled(rif->dev,
8184                       NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8185         mlxsw_sp_rif_push_l3_stats(rif);
8186         mlxsw_sp_router_port_l3_stats_disable(rif);
8187         mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
8188     } else {
8189         mlxsw_sp_rif_counters_free(rif);
8190     }
8191 
8192     for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8193         mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8194     ops->deconfigure(rif);
8195     if (fid)
8196         /* Loopback RIFs are not associated with a FID. */
8197         mlxsw_sp_fid_put(fid);
8198     mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8199     dev_put(rif->dev);
8200     kfree(rif);
8201     vr->rif_count--;
8202     mlxsw_sp_vr_put(mlxsw_sp, vr);
8203 }
8204 
8205 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8206                  struct net_device *dev)
8207 {
8208     struct mlxsw_sp_rif *rif;
8209 
8210     mutex_lock(&mlxsw_sp->router->lock);
8211     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8212     if (!rif)
8213         goto out;
8214     mlxsw_sp_rif_destroy(rif);
8215 out:
8216     mutex_unlock(&mlxsw_sp->router->lock);
8217 }
8218 
8219 static void
8220 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8221                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8222 {
8223     struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8224 
8225     params->vid = mlxsw_sp_port_vlan->vid;
8226     params->lag = mlxsw_sp_port->lagged;
8227     if (params->lag)
8228         params->lag_id = mlxsw_sp_port->lag_id;
8229     else
8230         params->system_port = mlxsw_sp_port->local_port;
8231 }
8232 
8233 static struct mlxsw_sp_rif_subport *
8234 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8235 {
8236     return container_of(rif, struct mlxsw_sp_rif_subport, common);
8237 }
8238 
8239 static struct mlxsw_sp_rif *
8240 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8241              const struct mlxsw_sp_rif_params *params,
8242              struct netlink_ext_ack *extack)
8243 {
8244     struct mlxsw_sp_rif_subport *rif_subport;
8245     struct mlxsw_sp_rif *rif;
8246 
8247     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8248     if (!rif)
8249         return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8250 
8251     rif_subport = mlxsw_sp_rif_subport_rif(rif);
8252     refcount_inc(&rif_subport->ref_count);
8253     return rif;
8254 }
8255 
8256 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8257 {
8258     struct mlxsw_sp_rif_subport *rif_subport;
8259 
8260     rif_subport = mlxsw_sp_rif_subport_rif(rif);
8261     if (!refcount_dec_and_test(&rif_subport->ref_count))
8262         return;
8263 
8264     mlxsw_sp_rif_destroy(rif);
8265 }
8266 
8267 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8268                         struct mlxsw_sp_rif_mac_profile *profile,
8269                         struct netlink_ext_ack *extack)
8270 {
8271     u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8272     struct mlxsw_sp_router *router = mlxsw_sp->router;
8273     int id;
8274 
8275     id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8276                max_rif_mac_profiles, GFP_KERNEL);
8277 
8278     if (id >= 0) {
8279         profile->id = id;
8280         return 0;
8281     }
8282 
8283     if (id == -ENOSPC)
8284         NL_SET_ERR_MSG_MOD(extack,
8285                    "Exceeded number of supported router interface MAC profiles");
8286 
8287     return id;
8288 }
8289 
8290 static struct mlxsw_sp_rif_mac_profile *
8291 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8292 {
8293     struct mlxsw_sp_rif_mac_profile *profile;
8294 
8295     profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8296                  mac_profile);
8297     WARN_ON(!profile);
8298     return profile;
8299 }
8300 
8301 static struct mlxsw_sp_rif_mac_profile *
8302 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8303 {
8304     struct mlxsw_sp_rif_mac_profile *profile;
8305 
8306     profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8307     if (!profile)
8308         return NULL;
8309 
8310     ether_addr_copy(profile->mac_prefix, mac);
8311     refcount_set(&profile->ref_count, 1);
8312     return profile;
8313 }
8314 
8315 static struct mlxsw_sp_rif_mac_profile *
8316 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8317 {
8318     struct mlxsw_sp_router *router = mlxsw_sp->router;
8319     struct mlxsw_sp_rif_mac_profile *profile;
8320     int id;
8321 
8322     idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8323         if (ether_addr_equal_masked(profile->mac_prefix, mac,
8324                         mlxsw_sp->mac_mask))
8325             return profile;
8326     }
8327 
8328     return NULL;
8329 }
8330 
8331 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8332 {
8333     const struct mlxsw_sp *mlxsw_sp = priv;
8334 
8335     return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8336 }
8337 
8338 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8339 {
8340     const struct mlxsw_sp *mlxsw_sp = priv;
8341 
8342     return atomic_read(&mlxsw_sp->router->rifs_count);
8343 }
8344 
8345 static struct mlxsw_sp_rif_mac_profile *
8346 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8347                 struct netlink_ext_ack *extack)
8348 {
8349     struct mlxsw_sp_rif_mac_profile *profile;
8350     int err;
8351 
8352     profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8353     if (!profile)
8354         return ERR_PTR(-ENOMEM);
8355 
8356     err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8357     if (err)
8358         goto profile_index_alloc_err;
8359 
8360     atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8361     return profile;
8362 
8363 profile_index_alloc_err:
8364     kfree(profile);
8365     return ERR_PTR(err);
8366 }
8367 
8368 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8369                          u8 mac_profile)
8370 {
8371     struct mlxsw_sp_rif_mac_profile *profile;
8372 
8373     atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8374     profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8375     kfree(profile);
8376 }
8377 
8378 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8379                     const char *mac, u8 *p_mac_profile,
8380                     struct netlink_ext_ack *extack)
8381 {
8382     struct mlxsw_sp_rif_mac_profile *profile;
8383 
8384     profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8385     if (profile) {
8386         refcount_inc(&profile->ref_count);
8387         goto out;
8388     }
8389 
8390     profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8391     if (IS_ERR(profile))
8392         return PTR_ERR(profile);
8393 
8394 out:
8395     *p_mac_profile = profile->id;
8396     return 0;
8397 }
8398 
8399 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8400                      u8 mac_profile)
8401 {
8402     struct mlxsw_sp_rif_mac_profile *profile;
8403 
8404     profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8405                mac_profile);
8406     if (WARN_ON(!profile))
8407         return;
8408 
8409     if (!refcount_dec_and_test(&profile->ref_count))
8410         return;
8411 
8412     mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8413 }
8414 
8415 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8416 {
8417     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8418     struct mlxsw_sp_rif_mac_profile *profile;
8419 
8420     profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8421                rif->mac_profile_id);
8422     if (WARN_ON(!profile))
8423         return false;
8424 
8425     return refcount_read(&profile->ref_count) > 1;
8426 }
8427 
8428 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8429                      const char *new_mac)
8430 {
8431     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8432     struct mlxsw_sp_rif_mac_profile *profile;
8433 
8434     profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8435                rif->mac_profile_id);
8436     if (WARN_ON(!profile))
8437         return -EINVAL;
8438 
8439     ether_addr_copy(profile->mac_prefix, new_mac);
8440     return 0;
8441 }
8442 
8443 static int
8444 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8445                  struct mlxsw_sp_rif *rif,
8446                  const char *new_mac,
8447                  struct netlink_ext_ack *extack)
8448 {
8449     u8 mac_profile;
8450     int err;
8451 
8452     if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8453         !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8454         return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8455 
8456     err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8457                        &mac_profile, extack);
8458     if (err)
8459         return err;
8460 
8461     mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8462     rif->mac_profile_id = mac_profile;
8463     return 0;
8464 }
8465 
8466 static int
8467 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8468                  struct net_device *l3_dev,
8469                  struct netlink_ext_ack *extack)
8470 {
8471     struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8472     struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8473     struct mlxsw_sp_rif_params params = {
8474         .dev = l3_dev,
8475     };
8476     u16 vid = mlxsw_sp_port_vlan->vid;
8477     struct mlxsw_sp_rif *rif;
8478     struct mlxsw_sp_fid *fid;
8479     int err;
8480 
8481     mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
8482     rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
8483     if (IS_ERR(rif))
8484         return PTR_ERR(rif);
8485 
8486     /* FID was already created, just take a reference */
8487     fid = rif->ops->fid_get(rif, extack);
8488     err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8489     if (err)
8490         goto err_fid_port_vid_map;
8491 
8492     err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8493     if (err)
8494         goto err_port_vid_learning_set;
8495 
8496     err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8497                     BR_STATE_FORWARDING);
8498     if (err)
8499         goto err_port_vid_stp_set;
8500 
8501     mlxsw_sp_port_vlan->fid = fid;
8502 
8503     return 0;
8504 
8505 err_port_vid_stp_set:
8506     mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8507 err_port_vid_learning_set:
8508     mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8509 err_fid_port_vid_map:
8510     mlxsw_sp_fid_put(fid);
8511     mlxsw_sp_rif_subport_put(rif);
8512     return err;
8513 }
8514 
8515 static void
8516 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8517 {
8518     struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8519     struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8520     struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8521     u16 vid = mlxsw_sp_port_vlan->vid;
8522 
8523     if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8524         return;
8525 
8526     mlxsw_sp_port_vlan->fid = NULL;
8527     mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8528     mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8529     mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8530     mlxsw_sp_fid_put(fid);
8531     mlxsw_sp_rif_subport_put(rif);
8532 }
8533 
8534 int
8535 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8536                    struct net_device *l3_dev,
8537                    struct netlink_ext_ack *extack)
8538 {
8539     struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8540     struct mlxsw_sp_rif *rif;
8541     int err = 0;
8542 
8543     mutex_lock(&mlxsw_sp->router->lock);
8544     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8545     if (!rif)
8546         goto out;
8547 
8548     err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8549                            extack);
8550 out:
8551     mutex_unlock(&mlxsw_sp->router->lock);
8552     return err;
8553 }
8554 
8555 void
8556 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8557 {
8558     struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8559 
8560     mutex_lock(&mlxsw_sp->router->lock);
8561     __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8562     mutex_unlock(&mlxsw_sp->router->lock);
8563 }
8564 
8565 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8566                          struct net_device *port_dev,
8567                          unsigned long event, u16 vid,
8568                          struct netlink_ext_ack *extack)
8569 {
8570     struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8571     struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8572 
8573     mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8574     if (WARN_ON(!mlxsw_sp_port_vlan))
8575         return -EINVAL;
8576 
8577     switch (event) {
8578     case NETDEV_UP:
8579         return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8580                             l3_dev, extack);
8581     case NETDEV_DOWN:
8582         __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8583         break;
8584     }
8585 
8586     return 0;
8587 }
8588 
8589 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
8590                     unsigned long event,
8591                     struct netlink_ext_ack *extack)
8592 {
8593     if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev))
8594         return 0;
8595 
8596     return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
8597                          MLXSW_SP_DEFAULT_VID, extack);
8598 }
8599 
8600 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
8601                      struct net_device *lag_dev,
8602                      unsigned long event, u16 vid,
8603                      struct netlink_ext_ack *extack)
8604 {
8605     struct net_device *port_dev;
8606     struct list_head *iter;
8607     int err;
8608 
8609     netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
8610         if (mlxsw_sp_port_dev_check(port_dev)) {
8611             err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
8612                                 port_dev,
8613                                 event, vid,
8614                                 extack);
8615             if (err)
8616                 return err;
8617         }
8618     }
8619 
8620     return 0;
8621 }
8622 
8623 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
8624                        unsigned long event,
8625                        struct netlink_ext_ack *extack)
8626 {
8627     if (netif_is_bridge_port(lag_dev))
8628         return 0;
8629 
8630     return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
8631                          MLXSW_SP_DEFAULT_VID, extack);
8632 }
8633 
8634 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8635                       struct net_device *l3_dev,
8636                       unsigned long event,
8637                       struct netlink_ext_ack *extack)
8638 {
8639     struct mlxsw_sp_rif_params params = {
8640         .dev = l3_dev,
8641     };
8642     struct mlxsw_sp_rif *rif;
8643 
8644     switch (event) {
8645     case NETDEV_UP:
8646         if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
8647             u16 proto;
8648 
8649             br_vlan_get_proto(l3_dev, &proto);
8650             if (proto == ETH_P_8021AD) {
8651                 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
8652                 return -EOPNOTSUPP;
8653             }
8654         }
8655         rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8656         if (IS_ERR(rif))
8657             return PTR_ERR(rif);
8658         break;
8659     case NETDEV_DOWN:
8660         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8661         mlxsw_sp_rif_destroy(rif);
8662         break;
8663     }
8664 
8665     return 0;
8666 }
8667 
8668 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
8669                     struct net_device *vlan_dev,
8670                     unsigned long event,
8671                     struct netlink_ext_ack *extack)
8672 {
8673     struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
8674     u16 vid = vlan_dev_vlan_id(vlan_dev);
8675 
8676     if (netif_is_bridge_port(vlan_dev))
8677         return 0;
8678 
8679     if (mlxsw_sp_port_dev_check(real_dev))
8680         return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
8681                              event, vid, extack);
8682     else if (netif_is_lag_master(real_dev))
8683         return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
8684                              vid, extack);
8685     else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
8686         return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
8687                               extack);
8688 
8689     return 0;
8690 }
8691 
8692 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
8693 {
8694     u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
8695     u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8696 
8697     return ether_addr_equal_masked(mac, vrrp4, mask);
8698 }
8699 
8700 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
8701 {
8702     u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
8703     u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8704 
8705     return ether_addr_equal_masked(mac, vrrp6, mask);
8706 }
8707 
8708 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8709                 const u8 *mac, bool adding)
8710 {
8711     char ritr_pl[MLXSW_REG_RITR_LEN];
8712     u8 vrrp_id = adding ? mac[5] : 0;
8713     int err;
8714 
8715     if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
8716         !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8717         return 0;
8718 
8719     mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8720     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8721     if (err)
8722         return err;
8723 
8724     if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8725         mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8726     else
8727         mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8728 
8729     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8730 }
8731 
8732 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8733                     const struct net_device *macvlan_dev,
8734                     struct netlink_ext_ack *extack)
8735 {
8736     struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8737     struct mlxsw_sp_rif *rif;
8738     int err;
8739 
8740     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8741     if (!rif) {
8742         NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8743         return -EOPNOTSUPP;
8744     }
8745 
8746     err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8747                   mlxsw_sp_fid_index(rif->fid), true);
8748     if (err)
8749         return err;
8750 
8751     err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8752                    macvlan_dev->dev_addr, true);
8753     if (err)
8754         goto err_rif_vrrp_add;
8755 
8756     /* Make sure the bridge driver does not have this MAC pointing at
8757      * some other port.
8758      */
8759     if (rif->ops->fdb_del)
8760         rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8761 
8762     return 0;
8763 
8764 err_rif_vrrp_add:
8765     mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8766                 mlxsw_sp_fid_index(rif->fid), false);
8767     return err;
8768 }
8769 
8770 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8771                        const struct net_device *macvlan_dev)
8772 {
8773     struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8774     struct mlxsw_sp_rif *rif;
8775 
8776     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8777     /* If we do not have a RIF, then we already took care of
8778      * removing the macvlan's MAC during RIF deletion.
8779      */
8780     if (!rif)
8781         return;
8782     mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8783                  false);
8784     mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8785                 mlxsw_sp_fid_index(rif->fid), false);
8786 }
8787 
8788 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8789                   const struct net_device *macvlan_dev)
8790 {
8791     mutex_lock(&mlxsw_sp->router->lock);
8792     __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8793     mutex_unlock(&mlxsw_sp->router->lock);
8794 }
8795 
8796 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8797                        struct net_device *macvlan_dev,
8798                        unsigned long event,
8799                        struct netlink_ext_ack *extack)
8800 {
8801     switch (event) {
8802     case NETDEV_UP:
8803         return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8804     case NETDEV_DOWN:
8805         __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8806         break;
8807     }
8808 
8809     return 0;
8810 }
8811 
8812 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8813                      struct net_device *dev,
8814                      unsigned long event,
8815                      struct netlink_ext_ack *extack)
8816 {
8817     if (mlxsw_sp_port_dev_check(dev))
8818         return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8819     else if (netif_is_lag_master(dev))
8820         return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8821     else if (netif_is_bridge_master(dev))
8822         return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8823                               extack);
8824     else if (is_vlan_dev(dev))
8825         return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8826                             extack);
8827     else if (netif_is_macvlan(dev))
8828         return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8829                                extack);
8830     else
8831         return 0;
8832 }
8833 
8834 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8835                    unsigned long event, void *ptr)
8836 {
8837     struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8838     struct net_device *dev = ifa->ifa_dev->dev;
8839     struct mlxsw_sp_router *router;
8840     struct mlxsw_sp_rif *rif;
8841     int err = 0;
8842 
8843     /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8844     if (event == NETDEV_UP)
8845         return NOTIFY_DONE;
8846 
8847     router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8848     mutex_lock(&router->lock);
8849     rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8850     if (!mlxsw_sp_rif_should_config(rif, dev, event))
8851         goto out;
8852 
8853     err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8854 out:
8855     mutex_unlock(&router->lock);
8856     return notifier_from_errno(err);
8857 }
8858 
8859 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8860                   unsigned long event, void *ptr)
8861 {
8862     struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8863     struct net_device *dev = ivi->ivi_dev->dev;
8864     struct mlxsw_sp *mlxsw_sp;
8865     struct mlxsw_sp_rif *rif;
8866     int err = 0;
8867 
8868     mlxsw_sp = mlxsw_sp_lower_get(dev);
8869     if (!mlxsw_sp)
8870         return NOTIFY_DONE;
8871 
8872     mutex_lock(&mlxsw_sp->router->lock);
8873     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8874     if (!mlxsw_sp_rif_should_config(rif, dev, event))
8875         goto out;
8876 
8877     err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8878 out:
8879     mutex_unlock(&mlxsw_sp->router->lock);
8880     return notifier_from_errno(err);
8881 }
8882 
8883 struct mlxsw_sp_inet6addr_event_work {
8884     struct work_struct work;
8885     struct mlxsw_sp *mlxsw_sp;
8886     struct net_device *dev;
8887     unsigned long event;
8888 };
8889 
8890 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8891 {
8892     struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8893         container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8894     struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8895     struct net_device *dev = inet6addr_work->dev;
8896     unsigned long event = inet6addr_work->event;
8897     struct mlxsw_sp_rif *rif;
8898 
8899     rtnl_lock();
8900     mutex_lock(&mlxsw_sp->router->lock);
8901 
8902     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8903     if (!mlxsw_sp_rif_should_config(rif, dev, event))
8904         goto out;
8905 
8906     __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8907 out:
8908     mutex_unlock(&mlxsw_sp->router->lock);
8909     rtnl_unlock();
8910     dev_put(dev);
8911     kfree(inet6addr_work);
8912 }
8913 
8914 /* Called with rcu_read_lock() */
8915 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8916                     unsigned long event, void *ptr)
8917 {
8918     struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8919     struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8920     struct net_device *dev = if6->idev->dev;
8921     struct mlxsw_sp_router *router;
8922 
8923     /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
8924     if (event == NETDEV_UP)
8925         return NOTIFY_DONE;
8926 
8927     inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
8928     if (!inet6addr_work)
8929         return NOTIFY_BAD;
8930 
8931     router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
8932     INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
8933     inet6addr_work->mlxsw_sp = router->mlxsw_sp;
8934     inet6addr_work->dev = dev;
8935     inet6addr_work->event = event;
8936     dev_hold(dev);
8937     mlxsw_core_schedule_work(&inet6addr_work->work);
8938 
8939     return NOTIFY_DONE;
8940 }
8941 
8942 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
8943                    unsigned long event, void *ptr)
8944 {
8945     struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
8946     struct net_device *dev = i6vi->i6vi_dev->dev;
8947     struct mlxsw_sp *mlxsw_sp;
8948     struct mlxsw_sp_rif *rif;
8949     int err = 0;
8950 
8951     mlxsw_sp = mlxsw_sp_lower_get(dev);
8952     if (!mlxsw_sp)
8953         return NOTIFY_DONE;
8954 
8955     mutex_lock(&mlxsw_sp->router->lock);
8956     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8957     if (!mlxsw_sp_rif_should_config(rif, dev, event))
8958         goto out;
8959 
8960     err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
8961 out:
8962     mutex_unlock(&mlxsw_sp->router->lock);
8963     return notifier_from_errno(err);
8964 }
8965 
8966 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8967                  const char *mac, int mtu, u8 mac_profile)
8968 {
8969     char ritr_pl[MLXSW_REG_RITR_LEN];
8970     int err;
8971 
8972     mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8973     err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8974     if (err)
8975         return err;
8976 
8977     mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
8978     mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
8979     mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
8980     mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
8981     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8982 }
8983 
8984 static int
8985 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
8986                   struct mlxsw_sp_rif *rif,
8987                   struct netlink_ext_ack *extack)
8988 {
8989     struct net_device *dev = rif->dev;
8990     u8 old_mac_profile;
8991     u16 fid_index;
8992     int err;
8993 
8994     fid_index = mlxsw_sp_fid_index(rif->fid);
8995 
8996     err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
8997     if (err)
8998         return err;
8999 
9000     old_mac_profile = rif->mac_profile_id;
9001     err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9002                            extack);
9003     if (err)
9004         goto err_rif_mac_profile_replace;
9005 
9006     err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9007                 dev->mtu, rif->mac_profile_id);
9008     if (err)
9009         goto err_rif_edit;
9010 
9011     err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9012     if (err)
9013         goto err_rif_fdb_op;
9014 
9015     if (rif->mtu != dev->mtu) {
9016         struct mlxsw_sp_vr *vr;
9017         int i;
9018 
9019         /* The RIF is relevant only to its mr_table instance, as unlike
9020          * unicast routing, in multicast routing a RIF cannot be shared
9021          * between several multicast routing tables.
9022          */
9023         vr = &mlxsw_sp->router->vrs[rif->vr_id];
9024         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9025             mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9026                            rif, dev->mtu);
9027     }
9028 
9029     ether_addr_copy(rif->addr, dev->dev_addr);
9030     rif->mtu = dev->mtu;
9031 
9032     netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9033 
9034     return 0;
9035 
9036 err_rif_fdb_op:
9037     mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9038               old_mac_profile);
9039 err_rif_edit:
9040     mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9041 err_rif_mac_profile_replace:
9042     mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9043     return err;
9044 }
9045 
9046 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9047                 struct netdev_notifier_pre_changeaddr_info *info)
9048 {
9049     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9050     struct mlxsw_sp_rif_mac_profile *profile;
9051     struct netlink_ext_ack *extack;
9052     u8 max_rif_mac_profiles;
9053     u64 occ;
9054 
9055     extack = netdev_notifier_info_to_extack(&info->info);
9056 
9057     profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9058     if (profile)
9059         return 0;
9060 
9061     max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9062     occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9063     if (occ < max_rif_mac_profiles)
9064         return 0;
9065 
9066     if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9067         return 0;
9068 
9069     NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9070     return -ENOBUFS;
9071 }
9072 
9073 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9074 {
9075     switch (event) {
9076     case NETDEV_OFFLOAD_XSTATS_ENABLE:
9077     case NETDEV_OFFLOAD_XSTATS_DISABLE:
9078     case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9079     case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9080         return true;
9081     }
9082 
9083     return false;
9084 }
9085 
9086 static int
9087 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9088                     unsigned long event,
9089                     struct netdev_notifier_offload_xstats_info *info)
9090 {
9091     switch (info->type) {
9092     case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9093         break;
9094     default:
9095         return 0;
9096     }
9097 
9098     switch (event) {
9099     case NETDEV_OFFLOAD_XSTATS_ENABLE:
9100         return mlxsw_sp_router_port_l3_stats_enable(rif);
9101     case NETDEV_OFFLOAD_XSTATS_DISABLE:
9102         mlxsw_sp_router_port_l3_stats_disable(rif);
9103         return 0;
9104     case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9105         mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9106         return 0;
9107     case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9108         return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9109     }
9110 
9111     WARN_ON_ONCE(1);
9112     return 0;
9113 }
9114 
9115 static int
9116 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9117                       struct net_device *dev,
9118                       unsigned long event,
9119                       struct netdev_notifier_offload_xstats_info *info)
9120 {
9121     struct mlxsw_sp_rif *rif;
9122 
9123     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9124     if (!rif)
9125         return 0;
9126 
9127     return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9128 }
9129 
9130 static bool mlxsw_sp_is_router_event(unsigned long event)
9131 {
9132     switch (event) {
9133     case NETDEV_PRE_CHANGEADDR:
9134     case NETDEV_CHANGEADDR:
9135     case NETDEV_CHANGEMTU:
9136         return true;
9137     default:
9138         return false;
9139     }
9140 }
9141 
9142 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9143                         unsigned long event, void *ptr)
9144 {
9145     struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9146     struct mlxsw_sp *mlxsw_sp;
9147     struct mlxsw_sp_rif *rif;
9148 
9149     mlxsw_sp = mlxsw_sp_lower_get(dev);
9150     if (!mlxsw_sp)
9151         return 0;
9152 
9153     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9154     if (!rif)
9155         return 0;
9156 
9157     switch (event) {
9158     case NETDEV_CHANGEMTU:
9159     case NETDEV_CHANGEADDR:
9160         return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9161     case NETDEV_PRE_CHANGEADDR:
9162         return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9163     default:
9164         WARN_ON_ONCE(1);
9165         break;
9166     }
9167 
9168     return 0;
9169 }
9170 
9171 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9172                   struct net_device *l3_dev,
9173                   struct netlink_ext_ack *extack)
9174 {
9175     struct mlxsw_sp_rif *rif;
9176 
9177     /* If netdev is already associated with a RIF, then we need to
9178      * destroy it and create a new one with the new virtual router ID.
9179      */
9180     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9181     if (rif)
9182         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
9183                       extack);
9184 
9185     return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
9186 }
9187 
9188 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9189                     struct net_device *l3_dev)
9190 {
9191     struct mlxsw_sp_rif *rif;
9192 
9193     rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9194     if (!rif)
9195         return;
9196     __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
9197 }
9198 
9199 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9200 {
9201     struct netdev_notifier_changeupper_info *info = ptr;
9202 
9203     if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9204         return false;
9205     return netif_is_l3_master(info->upper_dev);
9206 }
9207 
9208 static int
9209 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9210                  struct netdev_notifier_changeupper_info *info)
9211 {
9212     struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9213     int err = 0;
9214 
9215     /* We do not create a RIF for a macvlan, but only use it to
9216      * direct more MAC addresses to the router.
9217      */
9218     if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9219         return 0;
9220 
9221     switch (event) {
9222     case NETDEV_PRECHANGEUPPER:
9223         break;
9224     case NETDEV_CHANGEUPPER:
9225         if (info->linking) {
9226             struct netlink_ext_ack *extack;
9227 
9228             extack = netdev_notifier_info_to_extack(&info->info);
9229             err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9230         } else {
9231             mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9232         }
9233         break;
9234     }
9235 
9236     return err;
9237 }
9238 
9239 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
9240                        unsigned long event, void *ptr)
9241 {
9242     struct net_device *dev = netdev_notifier_info_to_dev(ptr);
9243     struct mlxsw_sp_router *router;
9244     struct mlxsw_sp *mlxsw_sp;
9245     int err = 0;
9246 
9247     router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
9248     mlxsw_sp = router->mlxsw_sp;
9249 
9250     mutex_lock(&mlxsw_sp->router->lock);
9251 
9252     if (mlxsw_sp_is_offload_xstats_event(event))
9253         err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
9254                                 event, ptr);
9255     else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
9256         err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
9257                                event, ptr);
9258     else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
9259         err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
9260                                event, ptr);
9261     else if (mlxsw_sp_is_router_event(event))
9262         err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
9263     else if (mlxsw_sp_is_vrf_event(event, ptr))
9264         err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
9265 
9266     mutex_unlock(&mlxsw_sp->router->lock);
9267 
9268     return notifier_from_errno(err);
9269 }
9270 
9271 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
9272                     struct netdev_nested_priv *priv)
9273 {
9274     struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
9275 
9276     if (!netif_is_macvlan(dev))
9277         return 0;
9278 
9279     return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9280                    mlxsw_sp_fid_index(rif->fid), false);
9281 }
9282 
9283 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
9284 {
9285     struct netdev_nested_priv priv = {
9286         .data = (void *)rif,
9287     };
9288 
9289     if (!netif_is_macvlan_port(rif->dev))
9290         return 0;
9291 
9292     netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
9293     return netdev_walk_all_upper_dev_rcu(rif->dev,
9294                          __mlxsw_sp_rif_macvlan_flush, &priv);
9295 }
9296 
9297 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
9298                        const struct mlxsw_sp_rif_params *params)
9299 {
9300     struct mlxsw_sp_rif_subport *rif_subport;
9301 
9302     rif_subport = mlxsw_sp_rif_subport_rif(rif);
9303     refcount_set(&rif_subport->ref_count, 1);
9304     rif_subport->vid = params->vid;
9305     rif_subport->lag = params->lag;
9306     if (params->lag)
9307         rif_subport->lag_id = params->lag_id;
9308     else
9309         rif_subport->system_port = params->system_port;
9310 }
9311 
9312 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
9313 {
9314     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9315     struct mlxsw_sp_rif_subport *rif_subport;
9316     char ritr_pl[MLXSW_REG_RITR_LEN];
9317     u16 efid;
9318 
9319     rif_subport = mlxsw_sp_rif_subport_rif(rif);
9320     mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
9321                 rif->rif_index, rif->vr_id, rif->dev->mtu);
9322     mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
9323     mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9324     efid = mlxsw_sp_fid_index(rif->fid);
9325     mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
9326                   rif_subport->lag ? rif_subport->lag_id :
9327                              rif_subport->system_port,
9328                   efid, 0);
9329     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9330 }
9331 
9332 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
9333                       struct netlink_ext_ack *extack)
9334 {
9335     u8 mac_profile;
9336     int err;
9337 
9338     err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
9339                        &mac_profile, extack);
9340     if (err)
9341         return err;
9342     rif->mac_profile_id = mac_profile;
9343 
9344     err = mlxsw_sp_rif_subport_op(rif, true);
9345     if (err)
9346         goto err_rif_subport_op;
9347 
9348     err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9349                   mlxsw_sp_fid_index(rif->fid), true);
9350     if (err)
9351         goto err_rif_fdb_op;
9352 
9353     err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9354     if (err)
9355         goto err_fid_rif_set;
9356 
9357     return 0;
9358 
9359 err_fid_rif_set:
9360     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9361                 mlxsw_sp_fid_index(rif->fid), false);
9362 err_rif_fdb_op:
9363     mlxsw_sp_rif_subport_op(rif, false);
9364 err_rif_subport_op:
9365     mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
9366     return err;
9367 }
9368 
9369 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
9370 {
9371     struct mlxsw_sp_fid *fid = rif->fid;
9372 
9373     mlxsw_sp_fid_rif_unset(fid);
9374     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9375                 mlxsw_sp_fid_index(fid), false);
9376     mlxsw_sp_rif_macvlan_flush(rif);
9377     mlxsw_sp_rif_subport_op(rif, false);
9378     mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9379 }
9380 
9381 static struct mlxsw_sp_fid *
9382 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
9383                  struct netlink_ext_ack *extack)
9384 {
9385     return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
9386 }
9387 
9388 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
9389     .type           = MLXSW_SP_RIF_TYPE_SUBPORT,
9390     .rif_size       = sizeof(struct mlxsw_sp_rif_subport),
9391     .setup          = mlxsw_sp_rif_subport_setup,
9392     .configure      = mlxsw_sp_rif_subport_configure,
9393     .deconfigure        = mlxsw_sp_rif_subport_deconfigure,
9394     .fid_get        = mlxsw_sp_rif_subport_fid_get,
9395 };
9396 
9397 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
9398 {
9399     enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
9400     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9401     char ritr_pl[MLXSW_REG_RITR_LEN];
9402 
9403     mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
9404                 rif->dev->mtu);
9405     mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
9406     mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9407     mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
9408 
9409     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9410 }
9411 
9412 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
9413 {
9414     return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
9415 }
9416 
9417 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
9418                       struct netlink_ext_ack *extack)
9419 {
9420     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9421     u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9422     u8 mac_profile;
9423     int err;
9424 
9425     err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9426                        &mac_profile, extack);
9427     if (err)
9428         return err;
9429     rif->mac_profile_id = mac_profile;
9430 
9431     err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
9432     if (err)
9433         goto err_rif_fid_op;
9434 
9435     err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9436                      mlxsw_sp_router_port(mlxsw_sp), true);
9437     if (err)
9438         goto err_fid_mc_flood_set;
9439 
9440     err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9441                      mlxsw_sp_router_port(mlxsw_sp), true);
9442     if (err)
9443         goto err_fid_bc_flood_set;
9444 
9445     err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9446                   mlxsw_sp_fid_index(rif->fid), true);
9447     if (err)
9448         goto err_rif_fdb_op;
9449 
9450     err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9451     if (err)
9452         goto err_fid_rif_set;
9453 
9454     return 0;
9455 
9456 err_fid_rif_set:
9457     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9458                 mlxsw_sp_fid_index(rif->fid), false);
9459 err_rif_fdb_op:
9460     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9461                    mlxsw_sp_router_port(mlxsw_sp), false);
9462 err_fid_bc_flood_set:
9463     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9464                    mlxsw_sp_router_port(mlxsw_sp), false);
9465 err_fid_mc_flood_set:
9466     mlxsw_sp_rif_fid_op(rif, fid_index, false);
9467 err_rif_fid_op:
9468     mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9469     return err;
9470 }
9471 
9472 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
9473 {
9474     u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9475     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9476     struct mlxsw_sp_fid *fid = rif->fid;
9477 
9478     mlxsw_sp_fid_rif_unset(fid);
9479     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9480                 mlxsw_sp_fid_index(fid), false);
9481     mlxsw_sp_rif_macvlan_flush(rif);
9482     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9483                    mlxsw_sp_router_port(mlxsw_sp), false);
9484     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9485                    mlxsw_sp_router_port(mlxsw_sp), false);
9486     mlxsw_sp_rif_fid_op(rif, fid_index, false);
9487     mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9488 }
9489 
9490 static struct mlxsw_sp_fid *
9491 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
9492              struct netlink_ext_ack *extack)
9493 {
9494     return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
9495 }
9496 
9497 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9498 {
9499     struct switchdev_notifier_fdb_info info = {};
9500     struct net_device *dev;
9501 
9502     dev = br_fdb_find_port(rif->dev, mac, 0);
9503     if (!dev)
9504         return;
9505 
9506     info.addr = mac;
9507     info.vid = 0;
9508     call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9509                  NULL);
9510 }
9511 
9512 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
9513     .type           = MLXSW_SP_RIF_TYPE_FID,
9514     .rif_size       = sizeof(struct mlxsw_sp_rif),
9515     .configure      = mlxsw_sp_rif_fid_configure,
9516     .deconfigure        = mlxsw_sp_rif_fid_deconfigure,
9517     .fid_get        = mlxsw_sp_rif_fid_fid_get,
9518     .fdb_del        = mlxsw_sp_rif_fid_fdb_del,
9519 };
9520 
9521 static struct mlxsw_sp_fid *
9522 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
9523               struct netlink_ext_ack *extack)
9524 {
9525     struct net_device *br_dev;
9526     u16 vid;
9527     int err;
9528 
9529     if (is_vlan_dev(rif->dev)) {
9530         vid = vlan_dev_vlan_id(rif->dev);
9531         br_dev = vlan_dev_real_dev(rif->dev);
9532         if (WARN_ON(!netif_is_bridge_master(br_dev)))
9533             return ERR_PTR(-EINVAL);
9534     } else {
9535         err = br_vlan_get_pvid(rif->dev, &vid);
9536         if (err < 0 || !vid) {
9537             NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
9538             return ERR_PTR(-EINVAL);
9539         }
9540     }
9541 
9542     return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
9543 }
9544 
9545 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9546 {
9547     struct switchdev_notifier_fdb_info info = {};
9548     u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9549     struct net_device *br_dev;
9550     struct net_device *dev;
9551 
9552     br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
9553     dev = br_fdb_find_port(br_dev, mac, vid);
9554     if (!dev)
9555         return;
9556 
9557     info.addr = mac;
9558     info.vid = vid;
9559     call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9560                  NULL);
9561 }
9562 
9563 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
9564                 bool enable)
9565 {
9566     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9567     char ritr_pl[MLXSW_REG_RITR_LEN];
9568 
9569     mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
9570                     rif->dev->mtu, rif->dev->dev_addr,
9571                     rif->mac_profile_id, vid, efid);
9572 
9573     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9574 }
9575 
9576 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
9577                        struct netlink_ext_ack *extack)
9578 {
9579     u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9580     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9581     u8 mac_profile;
9582     int err;
9583 
9584     err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9585                        &mac_profile, extack);
9586     if (err)
9587         return err;
9588     rif->mac_profile_id = mac_profile;
9589 
9590     err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
9591     if (err)
9592         goto err_rif_vlan_fid_op;
9593 
9594     err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9595                      mlxsw_sp_router_port(mlxsw_sp), true);
9596     if (err)
9597         goto err_fid_mc_flood_set;
9598 
9599     err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9600                      mlxsw_sp_router_port(mlxsw_sp), true);
9601     if (err)
9602         goto err_fid_bc_flood_set;
9603 
9604     err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9605                   mlxsw_sp_fid_index(rif->fid), true);
9606     if (err)
9607         goto err_rif_fdb_op;
9608 
9609     err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9610     if (err)
9611         goto err_fid_rif_set;
9612 
9613     return 0;
9614 
9615 err_fid_rif_set:
9616     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9617                 mlxsw_sp_fid_index(rif->fid), false);
9618 err_rif_fdb_op:
9619     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9620                    mlxsw_sp_router_port(mlxsw_sp), false);
9621 err_fid_bc_flood_set:
9622     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9623                    mlxsw_sp_router_port(mlxsw_sp), false);
9624 err_fid_mc_flood_set:
9625     mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9626 err_rif_vlan_fid_op:
9627     mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9628     return err;
9629 }
9630 
9631 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
9632 {
9633     u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9634     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9635 
9636     mlxsw_sp_fid_rif_unset(rif->fid);
9637     mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9638                 mlxsw_sp_fid_index(rif->fid), false);
9639     mlxsw_sp_rif_macvlan_flush(rif);
9640     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9641                    mlxsw_sp_router_port(mlxsw_sp), false);
9642     mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9643                    mlxsw_sp_router_port(mlxsw_sp), false);
9644     mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9645     mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9646 }
9647 
9648 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9649                     struct netlink_ext_ack *extack)
9650 {
9651     return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
9652 }
9653 
9654 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
9655     .type           = MLXSW_SP_RIF_TYPE_VLAN,
9656     .rif_size       = sizeof(struct mlxsw_sp_rif),
9657     .configure      = mlxsw_sp1_rif_vlan_configure,
9658     .deconfigure        = mlxsw_sp_rif_vlan_deconfigure,
9659     .fid_get        = mlxsw_sp_rif_vlan_fid_get,
9660     .fdb_del        = mlxsw_sp_rif_vlan_fdb_del,
9661 };
9662 
9663 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9664                     struct netlink_ext_ack *extack)
9665 {
9666     u16 efid = mlxsw_sp_fid_index(rif->fid);
9667 
9668     return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
9669 }
9670 
9671 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
9672     .type           = MLXSW_SP_RIF_TYPE_VLAN,
9673     .rif_size       = sizeof(struct mlxsw_sp_rif),
9674     .configure      = mlxsw_sp2_rif_vlan_configure,
9675     .deconfigure        = mlxsw_sp_rif_vlan_deconfigure,
9676     .fid_get        = mlxsw_sp_rif_vlan_fid_get,
9677     .fdb_del        = mlxsw_sp_rif_vlan_fdb_del,
9678 };
9679 
9680 static struct mlxsw_sp_rif_ipip_lb *
9681 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
9682 {
9683     return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
9684 }
9685 
9686 static void
9687 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
9688                const struct mlxsw_sp_rif_params *params)
9689 {
9690     struct mlxsw_sp_rif_params_ipip_lb *params_lb;
9691     struct mlxsw_sp_rif_ipip_lb *rif_lb;
9692 
9693     params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
9694                  common);
9695     rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
9696     rif_lb->lb_config = params_lb->lb_config;
9697 }
9698 
9699 static int
9700 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
9701                 struct netlink_ext_ack *extack)
9702 {
9703     struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9704     u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9705     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9706     struct mlxsw_sp_vr *ul_vr;
9707     int err;
9708 
9709     ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
9710     if (IS_ERR(ul_vr))
9711         return PTR_ERR(ul_vr);
9712 
9713     err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
9714     if (err)
9715         goto err_loopback_op;
9716 
9717     lb_rif->ul_vr_id = ul_vr->id;
9718     lb_rif->ul_rif_id = 0;
9719     ++ul_vr->rif_count;
9720     return 0;
9721 
9722 err_loopback_op:
9723     mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9724     return err;
9725 }
9726 
9727 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9728 {
9729     struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9730     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9731     struct mlxsw_sp_vr *ul_vr;
9732 
9733     ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
9734     mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
9735 
9736     --ul_vr->rif_count;
9737     mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9738 }
9739 
9740 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
9741     .type           = MLXSW_SP_RIF_TYPE_IPIP_LB,
9742     .rif_size       = sizeof(struct mlxsw_sp_rif_ipip_lb),
9743     .setup                  = mlxsw_sp_rif_ipip_lb_setup,
9744     .configure      = mlxsw_sp1_rif_ipip_lb_configure,
9745     .deconfigure        = mlxsw_sp1_rif_ipip_lb_deconfigure,
9746 };
9747 
9748 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
9749     [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
9750     [MLXSW_SP_RIF_TYPE_VLAN]    = &mlxsw_sp1_rif_vlan_ops,
9751     [MLXSW_SP_RIF_TYPE_FID]     = &mlxsw_sp_rif_fid_ops,
9752     [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
9753 };
9754 
9755 static int
9756 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
9757 {
9758     struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9759     char ritr_pl[MLXSW_REG_RITR_LEN];
9760 
9761     mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
9762                 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
9763     mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
9764                          MLXSW_REG_RITR_LOOPBACK_GENERIC);
9765 
9766     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9767 }
9768 
9769 static struct mlxsw_sp_rif *
9770 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
9771                struct netlink_ext_ack *extack)
9772 {
9773     struct mlxsw_sp_rif *ul_rif;
9774     u16 rif_index;
9775     int err;
9776 
9777     err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
9778     if (err) {
9779         NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
9780         return ERR_PTR(err);
9781     }
9782 
9783     ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
9784     if (!ul_rif)
9785         return ERR_PTR(-ENOMEM);
9786 
9787     mlxsw_sp->router->rifs[rif_index] = ul_rif;
9788     ul_rif->mlxsw_sp = mlxsw_sp;
9789     err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
9790     if (err)
9791         goto ul_rif_op_err;
9792 
9793     atomic_inc(&mlxsw_sp->router->rifs_count);
9794     return ul_rif;
9795 
9796 ul_rif_op_err:
9797     mlxsw_sp->router->rifs[rif_index] = NULL;
9798     kfree(ul_rif);
9799     return ERR_PTR(err);
9800 }
9801 
9802 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
9803 {
9804     struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9805 
9806     atomic_dec(&mlxsw_sp->router->rifs_count);
9807     mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
9808     mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
9809     kfree(ul_rif);
9810 }
9811 
9812 static struct mlxsw_sp_rif *
9813 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
9814             struct netlink_ext_ack *extack)
9815 {
9816     struct mlxsw_sp_vr *vr;
9817     int err;
9818 
9819     vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
9820     if (IS_ERR(vr))
9821         return ERR_CAST(vr);
9822 
9823     if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
9824         return vr->ul_rif;
9825 
9826     vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
9827     if (IS_ERR(vr->ul_rif)) {
9828         err = PTR_ERR(vr->ul_rif);
9829         goto err_ul_rif_create;
9830     }
9831 
9832     vr->rif_count++;
9833     refcount_set(&vr->ul_rif_refcnt, 1);
9834 
9835     return vr->ul_rif;
9836 
9837 err_ul_rif_create:
9838     mlxsw_sp_vr_put(mlxsw_sp, vr);
9839     return ERR_PTR(err);
9840 }
9841 
9842 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
9843 {
9844     struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9845     struct mlxsw_sp_vr *vr;
9846 
9847     vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
9848 
9849     if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
9850         return;
9851 
9852     vr->rif_count--;
9853     mlxsw_sp_ul_rif_destroy(ul_rif);
9854     mlxsw_sp_vr_put(mlxsw_sp, vr);
9855 }
9856 
9857 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
9858                    u16 *ul_rif_index)
9859 {
9860     struct mlxsw_sp_rif *ul_rif;
9861     int err = 0;
9862 
9863     mutex_lock(&mlxsw_sp->router->lock);
9864     ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
9865     if (IS_ERR(ul_rif)) {
9866         err = PTR_ERR(ul_rif);
9867         goto out;
9868     }
9869     *ul_rif_index = ul_rif->rif_index;
9870 out:
9871     mutex_unlock(&mlxsw_sp->router->lock);
9872     return err;
9873 }
9874 
9875 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
9876 {
9877     struct mlxsw_sp_rif *ul_rif;
9878 
9879     mutex_lock(&mlxsw_sp->router->lock);
9880     ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
9881     if (WARN_ON(!ul_rif))
9882         goto out;
9883 
9884     mlxsw_sp_ul_rif_put(ul_rif);
9885 out:
9886     mutex_unlock(&mlxsw_sp->router->lock);
9887 }
9888 
9889 static int
9890 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
9891                 struct netlink_ext_ack *extack)
9892 {
9893     struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9894     u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9895     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9896     struct mlxsw_sp_rif *ul_rif;
9897     int err;
9898 
9899     ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
9900     if (IS_ERR(ul_rif))
9901         return PTR_ERR(ul_rif);
9902 
9903     err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
9904     if (err)
9905         goto err_loopback_op;
9906 
9907     lb_rif->ul_vr_id = 0;
9908     lb_rif->ul_rif_id = ul_rif->rif_index;
9909 
9910     return 0;
9911 
9912 err_loopback_op:
9913     mlxsw_sp_ul_rif_put(ul_rif);
9914     return err;
9915 }
9916 
9917 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9918 {
9919     struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9920     struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9921     struct mlxsw_sp_rif *ul_rif;
9922 
9923     ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
9924     mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
9925     mlxsw_sp_ul_rif_put(ul_rif);
9926 }
9927 
9928 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
9929     .type           = MLXSW_SP_RIF_TYPE_IPIP_LB,
9930     .rif_size       = sizeof(struct mlxsw_sp_rif_ipip_lb),
9931     .setup                  = mlxsw_sp_rif_ipip_lb_setup,
9932     .configure      = mlxsw_sp2_rif_ipip_lb_configure,
9933     .deconfigure        = mlxsw_sp2_rif_ipip_lb_deconfigure,
9934 };
9935 
9936 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
9937     [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
9938     [MLXSW_SP_RIF_TYPE_VLAN]    = &mlxsw_sp2_rif_vlan_ops,
9939     [MLXSW_SP_RIF_TYPE_FID]     = &mlxsw_sp_rif_fid_ops,
9940     [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
9941 };
9942 
9943 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
9944 {
9945     u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
9946     struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
9947     struct mlxsw_core *core = mlxsw_sp->core;
9948 
9949     if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
9950         return -EIO;
9951     mlxsw_sp->router->max_rif_mac_profile =
9952         MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
9953 
9954     mlxsw_sp->router->rifs = kcalloc(max_rifs,
9955                      sizeof(struct mlxsw_sp_rif *),
9956                      GFP_KERNEL);
9957     if (!mlxsw_sp->router->rifs)
9958         return -ENOMEM;
9959 
9960     idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
9961     atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
9962     atomic_set(&mlxsw_sp->router->rifs_count, 0);
9963     devl_resource_occ_get_register(devlink,
9964                        MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
9965                        mlxsw_sp_rif_mac_profiles_occ_get,
9966                        mlxsw_sp);
9967     devl_resource_occ_get_register(devlink,
9968                        MLXSW_SP_RESOURCE_RIFS,
9969                        mlxsw_sp_rifs_occ_get,
9970                        mlxsw_sp);
9971 
9972     return 0;
9973 }
9974 
9975 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
9976 {
9977     struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
9978     int i;
9979 
9980     WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
9981     for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
9982         WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
9983 
9984     devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
9985     devl_resource_occ_get_unregister(devlink,
9986                      MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
9987     WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
9988     idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
9989     kfree(mlxsw_sp->router->rifs);
9990 }
9991 
9992 static int
9993 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
9994 {
9995     char tigcr_pl[MLXSW_REG_TIGCR_LEN];
9996 
9997     mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
9998     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
9999 }
10000 
10001 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10002 {
10003     int err;
10004 
10005     INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10006 
10007     err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10008     if (err)
10009         return err;
10010     err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10011     if (err)
10012         return err;
10013 
10014     return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10015 }
10016 
10017 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10018 {
10019     mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10020     return mlxsw_sp_ipips_init(mlxsw_sp);
10021 }
10022 
10023 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10024 {
10025     mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10026     return mlxsw_sp_ipips_init(mlxsw_sp);
10027 }
10028 
10029 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10030 {
10031     WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10032 }
10033 
10034 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10035 {
10036     struct mlxsw_sp_router *router;
10037 
10038     /* Flush pending FIB notifications and then flush the device's
10039      * table before requesting another dump. The FIB notification
10040      * block is unregistered, so no need to take RTNL.
10041      */
10042     mlxsw_core_flush_owq();
10043     router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10044     mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10045 }
10046 
10047 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10048 struct mlxsw_sp_mp_hash_config {
10049     DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10050     DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10051     DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10052     DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10053     bool inc_parsing_depth;
10054 };
10055 
10056 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10057     bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10058 
10059 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10060     bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10061 
10062 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10063     bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10064 
10065 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10066 {
10067     unsigned long *inner_headers = config->inner_headers;
10068     unsigned long *inner_fields = config->inner_fields;
10069 
10070     /* IPv4 inner */
10071     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10072     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10073     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10074     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10075     /* IPv6 inner */
10076     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10077     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10078     MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10079     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10080     MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10081     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10082     MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10083     MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10084 }
10085 
10086 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10087 {
10088     unsigned long *headers = config->headers;
10089     unsigned long *fields = config->fields;
10090 
10091     MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10092     MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10093     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10094     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10095 }
10096 
10097 static void
10098 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
10099                   u32 hash_fields)
10100 {
10101     unsigned long *inner_headers = config->inner_headers;
10102     unsigned long *inner_fields = config->inner_fields;
10103 
10104     /* IPv4 Inner */
10105     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10106     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10107     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
10108         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10109     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
10110         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10111     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10112         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
10113     /* IPv6 inner */
10114     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10115     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10116     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
10117         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10118         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10119     }
10120     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
10121         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10122         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10123     }
10124     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10125         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10126     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
10127         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10128     /* L4 inner */
10129     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
10130     MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
10131     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
10132         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
10133     if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
10134         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
10135 }
10136 
10137 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
10138                    struct mlxsw_sp_mp_hash_config *config)
10139 {
10140     struct net *net = mlxsw_sp_net(mlxsw_sp);
10141     unsigned long *headers = config->headers;
10142     unsigned long *fields = config->fields;
10143     u32 hash_fields;
10144 
10145     switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
10146     case 0:
10147         mlxsw_sp_mp4_hash_outer_addr(config);
10148         break;
10149     case 1:
10150         mlxsw_sp_mp4_hash_outer_addr(config);
10151         MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10152         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10153         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10154         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10155         break;
10156     case 2:
10157         /* Outer */
10158         mlxsw_sp_mp4_hash_outer_addr(config);
10159         /* Inner */
10160         mlxsw_sp_mp_hash_inner_l3(config);
10161         break;
10162     case 3:
10163         hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
10164         /* Outer */
10165         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10166         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10167         MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10168         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
10169             MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10170         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
10171             MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10172         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10173             MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10174         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10175             MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10176         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10177             MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10178         /* Inner */
10179         mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10180         break;
10181     }
10182 }
10183 
10184 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10185 {
10186     unsigned long *headers = config->headers;
10187     unsigned long *fields = config->fields;
10188 
10189     MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10190     MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10191     MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10192     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10193     MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10194     MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10195 }
10196 
10197 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
10198                    struct mlxsw_sp_mp_hash_config *config)
10199 {
10200     u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
10201     unsigned long *headers = config->headers;
10202     unsigned long *fields = config->fields;
10203 
10204     switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
10205     case 0:
10206         mlxsw_sp_mp6_hash_outer_addr(config);
10207         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10208         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10209         break;
10210     case 1:
10211         mlxsw_sp_mp6_hash_outer_addr(config);
10212         MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10213         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10214         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10215         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10216         break;
10217     case 2:
10218         /* Outer */
10219         mlxsw_sp_mp6_hash_outer_addr(config);
10220         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10221         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10222         /* Inner */
10223         mlxsw_sp_mp_hash_inner_l3(config);
10224         config->inc_parsing_depth = true;
10225         break;
10226     case 3:
10227         /* Outer */
10228         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10229         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10230         MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10231         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
10232             MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10233             MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10234         }
10235         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
10236             MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10237             MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10238         }
10239         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10240             MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10241         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
10242             MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10243         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10244             MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10245         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10246             MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10247         /* Inner */
10248         mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10249         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
10250             config->inc_parsing_depth = true;
10251         break;
10252     }
10253 }
10254 
10255 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
10256                          bool old_inc_parsing_depth,
10257                          bool new_inc_parsing_depth)
10258 {
10259     int err;
10260 
10261     if (!old_inc_parsing_depth && new_inc_parsing_depth) {
10262         err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
10263         if (err)
10264             return err;
10265         mlxsw_sp->router->inc_parsing_depth = true;
10266     } else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
10267         mlxsw_sp_parsing_depth_dec(mlxsw_sp);
10268         mlxsw_sp->router->inc_parsing_depth = false;
10269     }
10270 
10271     return 0;
10272 }
10273 
10274 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10275 {
10276     bool old_inc_parsing_depth, new_inc_parsing_depth;
10277     struct mlxsw_sp_mp_hash_config config = {};
10278     char recr2_pl[MLXSW_REG_RECR2_LEN];
10279     unsigned long bit;
10280     u32 seed;
10281     int err;
10282 
10283     seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
10284     mlxsw_reg_recr2_pack(recr2_pl, seed);
10285     mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
10286     mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
10287 
10288     old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10289     new_inc_parsing_depth = config.inc_parsing_depth;
10290     err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
10291                             old_inc_parsing_depth,
10292                             new_inc_parsing_depth);
10293     if (err)
10294         return err;
10295 
10296     for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
10297         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
10298     for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
10299         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
10300     for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
10301         mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
10302     for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
10303         mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
10304 
10305     err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
10306     if (err)
10307         goto err_reg_write;
10308 
10309     return 0;
10310 
10311 err_reg_write:
10312     mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
10313                           old_inc_parsing_depth);
10314     return err;
10315 }
10316 #else
10317 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10318 {
10319     return 0;
10320 }
10321 #endif
10322 
10323 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
10324 {
10325     char rdpm_pl[MLXSW_REG_RDPM_LEN];
10326     unsigned int i;
10327 
10328     MLXSW_REG_ZERO(rdpm, rdpm_pl);
10329 
10330     /* HW is determining switch priority based on DSCP-bits, but the
10331      * kernel is still doing that based on the ToS. Since there's a
10332      * mismatch in bits we need to make sure to translate the right
10333      * value ToS would observe, skipping the 2 least-significant ECN bits.
10334      */
10335     for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
10336         mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
10337 
10338     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
10339 }
10340 
10341 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
10342 {
10343     struct net *net = mlxsw_sp_net(mlxsw_sp);
10344     char rgcr_pl[MLXSW_REG_RGCR_LEN];
10345     u64 max_rifs;
10346     bool usp;
10347 
10348     if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
10349         return -EIO;
10350     max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10351     usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
10352 
10353     mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
10354     mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
10355     mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
10356     return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10357 }
10358 
10359 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10360 {
10361     char rgcr_pl[MLXSW_REG_RGCR_LEN];
10362 
10363     mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
10364     mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10365 }
10366 
10367 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
10368 {
10369     u16 lb_rif_index;
10370     int err;
10371 
10372     /* Create a generic loopback RIF associated with the main table
10373      * (default VRF). Any table can be used, but the main table exists
10374      * anyway, so we do not waste resources.
10375      */
10376     err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
10377                      &lb_rif_index);
10378     if (err)
10379         return err;
10380 
10381     mlxsw_sp->router->lb_rif_index = lb_rif_index;
10382 
10383     return 0;
10384 }
10385 
10386 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
10387 {
10388     mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
10389 }
10390 
10391 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
10392 {
10393     size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
10394 
10395     mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
10396     mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
10397     mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10398 
10399     return 0;
10400 }
10401 
10402 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
10403     .init = mlxsw_sp1_router_init,
10404     .ipips_init = mlxsw_sp1_ipips_init,
10405 };
10406 
10407 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
10408 {
10409     size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
10410 
10411     mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
10412     mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
10413     mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10414 
10415     return 0;
10416 }
10417 
10418 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
10419     .init = mlxsw_sp2_router_init,
10420     .ipips_init = mlxsw_sp2_ipips_init,
10421 };
10422 
10423 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
10424              struct netlink_ext_ack *extack)
10425 {
10426     struct mlxsw_sp_router *router;
10427     int err;
10428 
10429     router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
10430     if (!router)
10431         return -ENOMEM;
10432     mutex_init(&router->lock);
10433     mlxsw_sp->router = router;
10434     router->mlxsw_sp = mlxsw_sp;
10435 
10436     err = mlxsw_sp->router_ops->init(mlxsw_sp);
10437     if (err)
10438         goto err_router_ops_init;
10439 
10440     INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
10441     INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
10442               mlxsw_sp_nh_grp_activity_work);
10443     INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
10444     err = __mlxsw_sp_router_init(mlxsw_sp);
10445     if (err)
10446         goto err_router_init;
10447 
10448     err = mlxsw_sp_rifs_init(mlxsw_sp);
10449     if (err)
10450         goto err_rifs_init;
10451 
10452     err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
10453     if (err)
10454         goto err_ipips_init;
10455 
10456     err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
10457                   &mlxsw_sp_nexthop_ht_params);
10458     if (err)
10459         goto err_nexthop_ht_init;
10460 
10461     err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
10462                   &mlxsw_sp_nexthop_group_ht_params);
10463     if (err)
10464         goto err_nexthop_group_ht_init;
10465 
10466     INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
10467     err = mlxsw_sp_lpm_init(mlxsw_sp);
10468     if (err)
10469         goto err_lpm_init;
10470 
10471     err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
10472     if (err)
10473         goto err_mr_init;
10474 
10475     err = mlxsw_sp_vrs_init(mlxsw_sp);
10476     if (err)
10477         goto err_vrs_init;
10478 
10479     err = mlxsw_sp_lb_rif_init(mlxsw_sp);
10480     if (err)
10481         goto err_lb_rif_init;
10482 
10483     err = mlxsw_sp_neigh_init(mlxsw_sp);
10484     if (err)
10485         goto err_neigh_init;
10486 
10487     err = mlxsw_sp_mp_hash_init(mlxsw_sp);
10488     if (err)
10489         goto err_mp_hash_init;
10490 
10491     err = mlxsw_sp_dscp_init(mlxsw_sp);
10492     if (err)
10493         goto err_dscp_init;
10494 
10495     router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
10496     err = register_inetaddr_notifier(&router->inetaddr_nb);
10497     if (err)
10498         goto err_register_inetaddr_notifier;
10499 
10500     router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
10501     err = register_inet6addr_notifier(&router->inet6addr_nb);
10502     if (err)
10503         goto err_register_inet6addr_notifier;
10504 
10505     mlxsw_sp->router->netevent_nb.notifier_call =
10506         mlxsw_sp_router_netevent_event;
10507     err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10508     if (err)
10509         goto err_register_netevent_notifier;
10510 
10511     mlxsw_sp->router->nexthop_nb.notifier_call =
10512         mlxsw_sp_nexthop_obj_event;
10513     err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10514                     &mlxsw_sp->router->nexthop_nb,
10515                     extack);
10516     if (err)
10517         goto err_register_nexthop_notifier;
10518 
10519     mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
10520     err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10521                     &mlxsw_sp->router->fib_nb,
10522                     mlxsw_sp_router_fib_dump_flush, extack);
10523     if (err)
10524         goto err_register_fib_notifier;
10525 
10526     mlxsw_sp->router->netdevice_nb.notifier_call =
10527         mlxsw_sp_router_netdevice_event;
10528     err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10529                           &mlxsw_sp->router->netdevice_nb);
10530     if (err)
10531         goto err_register_netdev_notifier;
10532 
10533     return 0;
10534 
10535 err_register_netdev_notifier:
10536     unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10537                 &mlxsw_sp->router->fib_nb);
10538 err_register_fib_notifier:
10539     unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10540                     &mlxsw_sp->router->nexthop_nb);
10541 err_register_nexthop_notifier:
10542     unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10543 err_register_netevent_notifier:
10544     unregister_inet6addr_notifier(&router->inet6addr_nb);
10545 err_register_inet6addr_notifier:
10546     unregister_inetaddr_notifier(&router->inetaddr_nb);
10547 err_register_inetaddr_notifier:
10548     mlxsw_core_flush_owq();
10549 err_dscp_init:
10550 err_mp_hash_init:
10551     mlxsw_sp_neigh_fini(mlxsw_sp);
10552 err_neigh_init:
10553     mlxsw_sp_lb_rif_fini(mlxsw_sp);
10554 err_lb_rif_init:
10555     mlxsw_sp_vrs_fini(mlxsw_sp);
10556 err_vrs_init:
10557     mlxsw_sp_mr_fini(mlxsw_sp);
10558 err_mr_init:
10559     mlxsw_sp_lpm_fini(mlxsw_sp);
10560 err_lpm_init:
10561     rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10562 err_nexthop_group_ht_init:
10563     rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10564 err_nexthop_ht_init:
10565     mlxsw_sp_ipips_fini(mlxsw_sp);
10566 err_ipips_init:
10567     mlxsw_sp_rifs_fini(mlxsw_sp);
10568 err_rifs_init:
10569     __mlxsw_sp_router_fini(mlxsw_sp);
10570 err_router_init:
10571     cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10572 err_router_ops_init:
10573     mutex_destroy(&mlxsw_sp->router->lock);
10574     kfree(mlxsw_sp->router);
10575     return err;
10576 }
10577 
10578 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10579 {
10580     unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10581                       &mlxsw_sp->router->netdevice_nb);
10582     unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10583                 &mlxsw_sp->router->fib_nb);
10584     unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10585                     &mlxsw_sp->router->nexthop_nb);
10586     unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10587     unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
10588     unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
10589     mlxsw_core_flush_owq();
10590     mlxsw_sp_neigh_fini(mlxsw_sp);
10591     mlxsw_sp_lb_rif_fini(mlxsw_sp);
10592     mlxsw_sp_vrs_fini(mlxsw_sp);
10593     mlxsw_sp_mr_fini(mlxsw_sp);
10594     mlxsw_sp_lpm_fini(mlxsw_sp);
10595     rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10596     rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10597     mlxsw_sp_ipips_fini(mlxsw_sp);
10598     mlxsw_sp_rifs_fini(mlxsw_sp);
10599     __mlxsw_sp_router_fini(mlxsw_sp);
10600     cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10601     mutex_destroy(&mlxsw_sp->router->lock);
10602     kfree(mlxsw_sp->router);
10603 }