Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * VXLAN: Virtual eXtensible Local Area Network
0004  *
0005  * Copyright (c) 2012-2013 Vyatta Inc.
0006  */
0007 
0008 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0009 
0010 #include <linux/kernel.h>
0011 #include <linux/module.h>
0012 #include <linux/errno.h>
0013 #include <linux/slab.h>
0014 #include <linux/udp.h>
0015 #include <linux/igmp.h>
0016 #include <linux/if_ether.h>
0017 #include <linux/ethtool.h>
0018 #include <net/arp.h>
0019 #include <net/ndisc.h>
0020 #include <net/gro.h>
0021 #include <net/ipv6_stubs.h>
0022 #include <net/ip.h>
0023 #include <net/icmp.h>
0024 #include <net/rtnetlink.h>
0025 #include <net/inet_ecn.h>
0026 #include <net/net_namespace.h>
0027 #include <net/netns/generic.h>
0028 #include <net/tun_proto.h>
0029 #include <net/vxlan.h>
0030 #include <net/nexthop.h>
0031 
0032 #if IS_ENABLED(CONFIG_IPV6)
0033 #include <net/ip6_tunnel.h>
0034 #include <net/ip6_checksum.h>
0035 #endif
0036 
0037 #include "vxlan_private.h"
0038 
0039 #define VXLAN_VERSION   "0.1"
0040 
0041 #define FDB_AGE_DEFAULT 300 /* 5 min */
0042 #define FDB_AGE_INTERVAL (10 * HZ)  /* rescan interval */
0043 
0044 /* UDP port for VXLAN traffic.
0045  * The IANA assigned port is 4789, but the Linux default is 8472
0046  * for compatibility with early adopters.
0047  */
0048 static unsigned short vxlan_port __read_mostly = 8472;
0049 module_param_named(udp_port, vxlan_port, ushort, 0444);
0050 MODULE_PARM_DESC(udp_port, "Destination UDP port");
0051 
0052 static bool log_ecn_error = true;
0053 module_param(log_ecn_error, bool, 0644);
0054 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
0055 
0056 unsigned int vxlan_net_id;
0057 
0058 const u8 all_zeros_mac[ETH_ALEN + 2];
0059 static struct rtnl_link_ops vxlan_link_ops;
0060 
0061 static int vxlan_sock_add(struct vxlan_dev *vxlan);
0062 
0063 static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
0064 
0065 /* salt for hash table */
0066 static u32 vxlan_salt __read_mostly;
0067 
0068 static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
0069 {
0070     return vs->flags & VXLAN_F_COLLECT_METADATA ||
0071            ip_tunnel_collect_metadata();
0072 }
0073 
0074 #if IS_ENABLED(CONFIG_IPV6)
0075 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
0076 {
0077     if (nla_len(nla) >= sizeof(struct in6_addr)) {
0078         ip->sin6.sin6_addr = nla_get_in6_addr(nla);
0079         ip->sa.sa_family = AF_INET6;
0080         return 0;
0081     } else if (nla_len(nla) >= sizeof(__be32)) {
0082         ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
0083         ip->sa.sa_family = AF_INET;
0084         return 0;
0085     } else {
0086         return -EAFNOSUPPORT;
0087     }
0088 }
0089 
0090 static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
0091                   const union vxlan_addr *ip)
0092 {
0093     if (ip->sa.sa_family == AF_INET6)
0094         return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
0095     else
0096         return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
0097 }
0098 
0099 #else /* !CONFIG_IPV6 */
0100 
0101 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
0102 {
0103     if (nla_len(nla) >= sizeof(struct in6_addr)) {
0104         return -EAFNOSUPPORT;
0105     } else if (nla_len(nla) >= sizeof(__be32)) {
0106         ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
0107         ip->sa.sa_family = AF_INET;
0108         return 0;
0109     } else {
0110         return -EAFNOSUPPORT;
0111     }
0112 }
0113 
0114 static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
0115                   const union vxlan_addr *ip)
0116 {
0117     return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
0118 }
0119 #endif
0120 
0121 /* Find VXLAN socket based on network namespace, address family, UDP port,
0122  * enabled unshareable flags and socket device binding (see l3mdev with
0123  * non-default VRF).
0124  */
0125 static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
0126                       __be16 port, u32 flags, int ifindex)
0127 {
0128     struct vxlan_sock *vs;
0129 
0130     flags &= VXLAN_F_RCV_FLAGS;
0131 
0132     hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
0133         if (inet_sk(vs->sock->sk)->inet_sport == port &&
0134             vxlan_get_sk_family(vs) == family &&
0135             vs->flags == flags &&
0136             vs->sock->sk->sk_bound_dev_if == ifindex)
0137             return vs;
0138     }
0139     return NULL;
0140 }
0141 
0142 static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs,
0143                        int ifindex, __be32 vni,
0144                        struct vxlan_vni_node **vninode)
0145 {
0146     struct vxlan_vni_node *vnode;
0147     struct vxlan_dev_node *node;
0148 
0149     /* For flow based devices, map all packets to VNI 0 */
0150     if (vs->flags & VXLAN_F_COLLECT_METADATA &&
0151         !(vs->flags & VXLAN_F_VNIFILTER))
0152         vni = 0;
0153 
0154     hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
0155         if (!node->vxlan)
0156             continue;
0157         vnode = NULL;
0158         if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
0159             vnode = vxlan_vnifilter_lookup(node->vxlan, vni);
0160             if (!vnode)
0161                 continue;
0162         } else if (node->vxlan->default_dst.remote_vni != vni) {
0163             continue;
0164         }
0165 
0166         if (IS_ENABLED(CONFIG_IPV6)) {
0167             const struct vxlan_config *cfg = &node->vxlan->cfg;
0168 
0169             if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
0170                 cfg->remote_ifindex != ifindex)
0171                 continue;
0172         }
0173 
0174         if (vninode)
0175             *vninode = vnode;
0176         return node->vxlan;
0177     }
0178 
0179     return NULL;
0180 }
0181 
0182 /* Look up VNI in a per net namespace table */
0183 static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
0184                     __be32 vni, sa_family_t family,
0185                     __be16 port, u32 flags)
0186 {
0187     struct vxlan_sock *vs;
0188 
0189     vs = vxlan_find_sock(net, family, port, flags, ifindex);
0190     if (!vs)
0191         return NULL;
0192 
0193     return vxlan_vs_find_vni(vs, ifindex, vni, NULL);
0194 }
0195 
0196 /* Fill in neighbour message in skbuff. */
0197 static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
0198               const struct vxlan_fdb *fdb,
0199               u32 portid, u32 seq, int type, unsigned int flags,
0200               const struct vxlan_rdst *rdst)
0201 {
0202     unsigned long now = jiffies;
0203     struct nda_cacheinfo ci;
0204     bool send_ip, send_eth;
0205     struct nlmsghdr *nlh;
0206     struct nexthop *nh;
0207     struct ndmsg *ndm;
0208     int nh_family;
0209     u32 nh_id;
0210 
0211     nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
0212     if (nlh == NULL)
0213         return -EMSGSIZE;
0214 
0215     ndm = nlmsg_data(nlh);
0216     memset(ndm, 0, sizeof(*ndm));
0217 
0218     send_eth = send_ip = true;
0219 
0220     rcu_read_lock();
0221     nh = rcu_dereference(fdb->nh);
0222     if (nh) {
0223         nh_family = nexthop_get_family(nh);
0224         nh_id = nh->id;
0225     }
0226     rcu_read_unlock();
0227 
0228     if (type == RTM_GETNEIGH) {
0229         if (rdst) {
0230             send_ip = !vxlan_addr_any(&rdst->remote_ip);
0231             ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
0232         } else if (nh) {
0233             ndm->ndm_family = nh_family;
0234         }
0235         send_eth = !is_zero_ether_addr(fdb->eth_addr);
0236     } else
0237         ndm->ndm_family = AF_BRIDGE;
0238     ndm->ndm_state = fdb->state;
0239     ndm->ndm_ifindex = vxlan->dev->ifindex;
0240     ndm->ndm_flags = fdb->flags;
0241     if (rdst && rdst->offloaded)
0242         ndm->ndm_flags |= NTF_OFFLOADED;
0243     ndm->ndm_type = RTN_UNICAST;
0244 
0245     if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
0246         nla_put_s32(skb, NDA_LINK_NETNSID,
0247             peernet2id(dev_net(vxlan->dev), vxlan->net)))
0248         goto nla_put_failure;
0249 
0250     if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
0251         goto nla_put_failure;
0252     if (nh) {
0253         if (nla_put_u32(skb, NDA_NH_ID, nh_id))
0254             goto nla_put_failure;
0255     } else if (rdst) {
0256         if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
0257                           &rdst->remote_ip))
0258             goto nla_put_failure;
0259 
0260         if (rdst->remote_port &&
0261             rdst->remote_port != vxlan->cfg.dst_port &&
0262             nla_put_be16(skb, NDA_PORT, rdst->remote_port))
0263             goto nla_put_failure;
0264         if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
0265             nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
0266             goto nla_put_failure;
0267         if (rdst->remote_ifindex &&
0268             nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
0269             goto nla_put_failure;
0270     }
0271 
0272     if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
0273         nla_put_u32(skb, NDA_SRC_VNI,
0274             be32_to_cpu(fdb->vni)))
0275         goto nla_put_failure;
0276 
0277     ci.ndm_used  = jiffies_to_clock_t(now - fdb->used);
0278     ci.ndm_confirmed = 0;
0279     ci.ndm_updated   = jiffies_to_clock_t(now - fdb->updated);
0280     ci.ndm_refcnt    = 0;
0281 
0282     if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
0283         goto nla_put_failure;
0284 
0285     nlmsg_end(skb, nlh);
0286     return 0;
0287 
0288 nla_put_failure:
0289     nlmsg_cancel(skb, nlh);
0290     return -EMSGSIZE;
0291 }
0292 
0293 static inline size_t vxlan_nlmsg_size(void)
0294 {
0295     return NLMSG_ALIGN(sizeof(struct ndmsg))
0296         + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
0297         + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
0298         + nla_total_size(sizeof(__be16)) /* NDA_PORT */
0299         + nla_total_size(sizeof(__be32)) /* NDA_VNI */
0300         + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
0301         + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
0302         + nla_total_size(sizeof(struct nda_cacheinfo));
0303 }
0304 
0305 static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
0306                    struct vxlan_rdst *rd, int type)
0307 {
0308     struct net *net = dev_net(vxlan->dev);
0309     struct sk_buff *skb;
0310     int err = -ENOBUFS;
0311 
0312     skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
0313     if (skb == NULL)
0314         goto errout;
0315 
0316     err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
0317     if (err < 0) {
0318         /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
0319         WARN_ON(err == -EMSGSIZE);
0320         kfree_skb(skb);
0321         goto errout;
0322     }
0323 
0324     rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
0325     return;
0326 errout:
0327     if (err < 0)
0328         rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
0329 }
0330 
0331 static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
0332                 const struct vxlan_fdb *fdb,
0333                 const struct vxlan_rdst *rd,
0334                 struct netlink_ext_ack *extack,
0335                 struct switchdev_notifier_vxlan_fdb_info *fdb_info)
0336 {
0337     fdb_info->info.dev = vxlan->dev;
0338     fdb_info->info.extack = extack;
0339     fdb_info->remote_ip = rd->remote_ip;
0340     fdb_info->remote_port = rd->remote_port;
0341     fdb_info->remote_vni = rd->remote_vni;
0342     fdb_info->remote_ifindex = rd->remote_ifindex;
0343     memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
0344     fdb_info->vni = fdb->vni;
0345     fdb_info->offloaded = rd->offloaded;
0346     fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
0347 }
0348 
0349 static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
0350                           struct vxlan_fdb *fdb,
0351                           struct vxlan_rdst *rd,
0352                           bool adding,
0353                           struct netlink_ext_ack *extack)
0354 {
0355     struct switchdev_notifier_vxlan_fdb_info info;
0356     enum switchdev_notifier_type notifier_type;
0357     int ret;
0358 
0359     if (WARN_ON(!rd))
0360         return 0;
0361 
0362     notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
0363                    : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
0364     vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
0365     ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
0366                        &info.info, extack);
0367     return notifier_to_errno(ret);
0368 }
0369 
0370 static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
0371                 struct vxlan_rdst *rd, int type, bool swdev_notify,
0372                 struct netlink_ext_ack *extack)
0373 {
0374     int err;
0375 
0376     if (swdev_notify && rd) {
0377         switch (type) {
0378         case RTM_NEWNEIGH:
0379             err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
0380                                  true, extack);
0381             if (err)
0382                 return err;
0383             break;
0384         case RTM_DELNEIGH:
0385             vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
0386                                false, extack);
0387             break;
0388         }
0389     }
0390 
0391     __vxlan_fdb_notify(vxlan, fdb, rd, type);
0392     return 0;
0393 }
0394 
0395 static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
0396 {
0397     struct vxlan_dev *vxlan = netdev_priv(dev);
0398     struct vxlan_fdb f = {
0399         .state = NUD_STALE,
0400     };
0401     struct vxlan_rdst remote = {
0402         .remote_ip = *ipa, /* goes to NDA_DST */
0403         .remote_vni = cpu_to_be32(VXLAN_N_VID),
0404     };
0405 
0406     vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
0407 }
0408 
0409 static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
0410 {
0411     struct vxlan_fdb f = {
0412         .state = NUD_STALE,
0413     };
0414     struct vxlan_rdst remote = { };
0415 
0416     memcpy(f.eth_addr, eth_addr, ETH_ALEN);
0417 
0418     vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
0419 }
0420 
0421 /* Hash Ethernet address */
0422 static u32 eth_hash(const unsigned char *addr)
0423 {
0424     u64 value = get_unaligned((u64 *)addr);
0425 
0426     /* only want 6 bytes */
0427 #ifdef __BIG_ENDIAN
0428     value >>= 16;
0429 #else
0430     value <<= 16;
0431 #endif
0432     return hash_64(value, FDB_HASH_BITS);
0433 }
0434 
0435 u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
0436 {
0437     /* use 1 byte of OUI and 3 bytes of NIC */
0438     u32 key = get_unaligned((u32 *)(addr + 2));
0439 
0440     return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
0441 }
0442 
0443 u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
0444 {
0445     if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
0446         return eth_vni_hash(mac, vni);
0447     else
0448         return eth_hash(mac);
0449 }
0450 
0451 /* Hash chain to use given mac address */
0452 static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
0453                         const u8 *mac, __be32 vni)
0454 {
0455     return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
0456 }
0457 
0458 /* Look up Ethernet address in forwarding table */
0459 static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
0460                       const u8 *mac, __be32 vni)
0461 {
0462     struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
0463     struct vxlan_fdb *f;
0464 
0465     hlist_for_each_entry_rcu(f, head, hlist) {
0466         if (ether_addr_equal(mac, f->eth_addr)) {
0467             if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
0468                 if (vni == f->vni)
0469                     return f;
0470             } else {
0471                 return f;
0472             }
0473         }
0474     }
0475 
0476     return NULL;
0477 }
0478 
0479 static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
0480                     const u8 *mac, __be32 vni)
0481 {
0482     struct vxlan_fdb *f;
0483 
0484     f = __vxlan_find_mac(vxlan, mac, vni);
0485     if (f && f->used != jiffies)
0486         f->used = jiffies;
0487 
0488     return f;
0489 }
0490 
0491 /* caller should hold vxlan->hash_lock */
0492 static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
0493                           union vxlan_addr *ip, __be16 port,
0494                           __be32 vni, __u32 ifindex)
0495 {
0496     struct vxlan_rdst *rd;
0497 
0498     list_for_each_entry(rd, &f->remotes, list) {
0499         if (vxlan_addr_equal(&rd->remote_ip, ip) &&
0500             rd->remote_port == port &&
0501             rd->remote_vni == vni &&
0502             rd->remote_ifindex == ifindex)
0503             return rd;
0504     }
0505 
0506     return NULL;
0507 }
0508 
0509 int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
0510               struct switchdev_notifier_vxlan_fdb_info *fdb_info)
0511 {
0512     struct vxlan_dev *vxlan = netdev_priv(dev);
0513     u8 eth_addr[ETH_ALEN + 2] = { 0 };
0514     struct vxlan_rdst *rdst;
0515     struct vxlan_fdb *f;
0516     int rc = 0;
0517 
0518     if (is_multicast_ether_addr(mac) ||
0519         is_zero_ether_addr(mac))
0520         return -EINVAL;
0521 
0522     ether_addr_copy(eth_addr, mac);
0523 
0524     rcu_read_lock();
0525 
0526     f = __vxlan_find_mac(vxlan, eth_addr, vni);
0527     if (!f) {
0528         rc = -ENOENT;
0529         goto out;
0530     }
0531 
0532     rdst = first_remote_rcu(f);
0533     vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
0534 
0535 out:
0536     rcu_read_unlock();
0537     return rc;
0538 }
0539 EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
0540 
0541 static int vxlan_fdb_notify_one(struct notifier_block *nb,
0542                 const struct vxlan_dev *vxlan,
0543                 const struct vxlan_fdb *f,
0544                 const struct vxlan_rdst *rdst,
0545                 struct netlink_ext_ack *extack)
0546 {
0547     struct switchdev_notifier_vxlan_fdb_info fdb_info;
0548     int rc;
0549 
0550     vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
0551     rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
0552                    &fdb_info);
0553     return notifier_to_errno(rc);
0554 }
0555 
0556 int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
0557              struct notifier_block *nb,
0558              struct netlink_ext_ack *extack)
0559 {
0560     struct vxlan_dev *vxlan;
0561     struct vxlan_rdst *rdst;
0562     struct vxlan_fdb *f;
0563     unsigned int h;
0564     int rc = 0;
0565 
0566     if (!netif_is_vxlan(dev))
0567         return -EINVAL;
0568     vxlan = netdev_priv(dev);
0569 
0570     for (h = 0; h < FDB_HASH_SIZE; ++h) {
0571         spin_lock_bh(&vxlan->hash_lock[h]);
0572         hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
0573             if (f->vni == vni) {
0574                 list_for_each_entry(rdst, &f->remotes, list) {
0575                     rc = vxlan_fdb_notify_one(nb, vxlan,
0576                                   f, rdst,
0577                                   extack);
0578                     if (rc)
0579                         goto unlock;
0580                 }
0581             }
0582         }
0583         spin_unlock_bh(&vxlan->hash_lock[h]);
0584     }
0585     return 0;
0586 
0587 unlock:
0588     spin_unlock_bh(&vxlan->hash_lock[h]);
0589     return rc;
0590 }
0591 EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
0592 
0593 void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
0594 {
0595     struct vxlan_dev *vxlan;
0596     struct vxlan_rdst *rdst;
0597     struct vxlan_fdb *f;
0598     unsigned int h;
0599 
0600     if (!netif_is_vxlan(dev))
0601         return;
0602     vxlan = netdev_priv(dev);
0603 
0604     for (h = 0; h < FDB_HASH_SIZE; ++h) {
0605         spin_lock_bh(&vxlan->hash_lock[h]);
0606         hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
0607             if (f->vni == vni)
0608                 list_for_each_entry(rdst, &f->remotes, list)
0609                     rdst->offloaded = false;
0610         spin_unlock_bh(&vxlan->hash_lock[h]);
0611     }
0612 
0613 }
0614 EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
0615 
0616 /* Replace destination of unicast mac */
0617 static int vxlan_fdb_replace(struct vxlan_fdb *f,
0618                  union vxlan_addr *ip, __be16 port, __be32 vni,
0619                  __u32 ifindex, struct vxlan_rdst *oldrd)
0620 {
0621     struct vxlan_rdst *rd;
0622 
0623     rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
0624     if (rd)
0625         return 0;
0626 
0627     rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
0628     if (!rd)
0629         return 0;
0630 
0631     *oldrd = *rd;
0632     dst_cache_reset(&rd->dst_cache);
0633     rd->remote_ip = *ip;
0634     rd->remote_port = port;
0635     rd->remote_vni = vni;
0636     rd->remote_ifindex = ifindex;
0637     rd->offloaded = false;
0638     return 1;
0639 }
0640 
0641 /* Add/update destinations for multicast */
0642 static int vxlan_fdb_append(struct vxlan_fdb *f,
0643                 union vxlan_addr *ip, __be16 port, __be32 vni,
0644                 __u32 ifindex, struct vxlan_rdst **rdp)
0645 {
0646     struct vxlan_rdst *rd;
0647 
0648     rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
0649     if (rd)
0650         return 0;
0651 
0652     rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
0653     if (rd == NULL)
0654         return -ENOMEM;
0655 
0656     if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
0657         kfree(rd);
0658         return -ENOMEM;
0659     }
0660 
0661     rd->remote_ip = *ip;
0662     rd->remote_port = port;
0663     rd->offloaded = false;
0664     rd->remote_vni = vni;
0665     rd->remote_ifindex = ifindex;
0666 
0667     list_add_tail_rcu(&rd->list, &f->remotes);
0668 
0669     *rdp = rd;
0670     return 1;
0671 }
0672 
0673 static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
0674                       unsigned int off,
0675                       struct vxlanhdr *vh, size_t hdrlen,
0676                       __be32 vni_field,
0677                       struct gro_remcsum *grc,
0678                       bool nopartial)
0679 {
0680     size_t start, offset;
0681 
0682     if (skb->remcsum_offload)
0683         return vh;
0684 
0685     if (!NAPI_GRO_CB(skb)->csum_valid)
0686         return NULL;
0687 
0688     start = vxlan_rco_start(vni_field);
0689     offset = start + vxlan_rco_offset(vni_field);
0690 
0691     vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
0692                      start, offset, grc, nopartial);
0693 
0694     skb->remcsum_offload = 1;
0695 
0696     return vh;
0697 }
0698 
0699 static struct sk_buff *vxlan_gro_receive(struct sock *sk,
0700                      struct list_head *head,
0701                      struct sk_buff *skb)
0702 {
0703     struct sk_buff *pp = NULL;
0704     struct sk_buff *p;
0705     struct vxlanhdr *vh, *vh2;
0706     unsigned int hlen, off_vx;
0707     int flush = 1;
0708     struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
0709     __be32 flags;
0710     struct gro_remcsum grc;
0711 
0712     skb_gro_remcsum_init(&grc);
0713 
0714     off_vx = skb_gro_offset(skb);
0715     hlen = off_vx + sizeof(*vh);
0716     vh   = skb_gro_header_fast(skb, off_vx);
0717     if (skb_gro_header_hard(skb, hlen)) {
0718         vh = skb_gro_header_slow(skb, hlen, off_vx);
0719         if (unlikely(!vh))
0720             goto out;
0721     }
0722 
0723     skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
0724 
0725     flags = vh->vx_flags;
0726 
0727     if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
0728         vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
0729                        vh->vx_vni, &grc,
0730                        !!(vs->flags &
0731                       VXLAN_F_REMCSUM_NOPARTIAL));
0732 
0733         if (!vh)
0734             goto out;
0735     }
0736 
0737     skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
0738 
0739     list_for_each_entry(p, head, list) {
0740         if (!NAPI_GRO_CB(p)->same_flow)
0741             continue;
0742 
0743         vh2 = (struct vxlanhdr *)(p->data + off_vx);
0744         if (vh->vx_flags != vh2->vx_flags ||
0745             vh->vx_vni != vh2->vx_vni) {
0746             NAPI_GRO_CB(p)->same_flow = 0;
0747             continue;
0748         }
0749     }
0750 
0751     pp = call_gro_receive(eth_gro_receive, head, skb);
0752     flush = 0;
0753 
0754 out:
0755     skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
0756 
0757     return pp;
0758 }
0759 
0760 static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
0761 {
0762     /* Sets 'skb->inner_mac_header' since we are always called with
0763      * 'skb->encapsulation' set.
0764      */
0765     return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
0766 }
0767 
0768 static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
0769                      __u16 state, __be32 src_vni,
0770                      __u16 ndm_flags)
0771 {
0772     struct vxlan_fdb *f;
0773 
0774     f = kmalloc(sizeof(*f), GFP_ATOMIC);
0775     if (!f)
0776         return NULL;
0777     f->state = state;
0778     f->flags = ndm_flags;
0779     f->updated = f->used = jiffies;
0780     f->vni = src_vni;
0781     f->nh = NULL;
0782     RCU_INIT_POINTER(f->vdev, vxlan);
0783     INIT_LIST_HEAD(&f->nh_list);
0784     INIT_LIST_HEAD(&f->remotes);
0785     memcpy(f->eth_addr, mac, ETH_ALEN);
0786 
0787     return f;
0788 }
0789 
0790 static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
0791                  __be32 src_vni, struct vxlan_fdb *f)
0792 {
0793     ++vxlan->addrcnt;
0794     hlist_add_head_rcu(&f->hlist,
0795                vxlan_fdb_head(vxlan, mac, src_vni));
0796 }
0797 
0798 static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
0799                    u32 nhid, struct netlink_ext_ack *extack)
0800 {
0801     struct nexthop *old_nh = rtnl_dereference(fdb->nh);
0802     struct nexthop *nh;
0803     int err = -EINVAL;
0804 
0805     if (old_nh && old_nh->id == nhid)
0806         return 0;
0807 
0808     nh = nexthop_find_by_id(vxlan->net, nhid);
0809     if (!nh) {
0810         NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
0811         goto err_inval;
0812     }
0813 
0814     if (!nexthop_get(nh)) {
0815         NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
0816         nh = NULL;
0817         goto err_inval;
0818     }
0819     if (!nexthop_is_fdb(nh)) {
0820         NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
0821         goto err_inval;
0822     }
0823 
0824     if (!nexthop_is_multipath(nh)) {
0825         NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
0826         goto err_inval;
0827     }
0828 
0829     /* check nexthop group family */
0830     switch (vxlan->default_dst.remote_ip.sa.sa_family) {
0831     case AF_INET:
0832         if (!nexthop_has_v4(nh)) {
0833             err = -EAFNOSUPPORT;
0834             NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
0835             goto err_inval;
0836         }
0837         break;
0838     case AF_INET6:
0839         if (nexthop_has_v4(nh)) {
0840             err = -EAFNOSUPPORT;
0841             NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
0842             goto err_inval;
0843         }
0844     }
0845 
0846     if (old_nh) {
0847         list_del_rcu(&fdb->nh_list);
0848         nexthop_put(old_nh);
0849     }
0850     rcu_assign_pointer(fdb->nh, nh);
0851     list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
0852     return 1;
0853 
0854 err_inval:
0855     if (nh)
0856         nexthop_put(nh);
0857     return err;
0858 }
0859 
0860 int vxlan_fdb_create(struct vxlan_dev *vxlan,
0861              const u8 *mac, union vxlan_addr *ip,
0862              __u16 state, __be16 port, __be32 src_vni,
0863              __be32 vni, __u32 ifindex, __u16 ndm_flags,
0864              u32 nhid, struct vxlan_fdb **fdb,
0865              struct netlink_ext_ack *extack)
0866 {
0867     struct vxlan_rdst *rd = NULL;
0868     struct vxlan_fdb *f;
0869     int rc;
0870 
0871     if (vxlan->cfg.addrmax &&
0872         vxlan->addrcnt >= vxlan->cfg.addrmax)
0873         return -ENOSPC;
0874 
0875     netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
0876     f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
0877     if (!f)
0878         return -ENOMEM;
0879 
0880     if (nhid)
0881         rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
0882     else
0883         rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
0884     if (rc < 0)
0885         goto errout;
0886 
0887     *fdb = f;
0888 
0889     return 0;
0890 
0891 errout:
0892     kfree(f);
0893     return rc;
0894 }
0895 
0896 static void __vxlan_fdb_free(struct vxlan_fdb *f)
0897 {
0898     struct vxlan_rdst *rd, *nd;
0899     struct nexthop *nh;
0900 
0901     nh = rcu_dereference_raw(f->nh);
0902     if (nh) {
0903         rcu_assign_pointer(f->nh, NULL);
0904         rcu_assign_pointer(f->vdev, NULL);
0905         nexthop_put(nh);
0906     }
0907 
0908     list_for_each_entry_safe(rd, nd, &f->remotes, list) {
0909         dst_cache_destroy(&rd->dst_cache);
0910         kfree(rd);
0911     }
0912     kfree(f);
0913 }
0914 
0915 static void vxlan_fdb_free(struct rcu_head *head)
0916 {
0917     struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
0918 
0919     __vxlan_fdb_free(f);
0920 }
0921 
0922 static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
0923                   bool do_notify, bool swdev_notify)
0924 {
0925     struct vxlan_rdst *rd;
0926 
0927     netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
0928 
0929     --vxlan->addrcnt;
0930     if (do_notify) {
0931         if (rcu_access_pointer(f->nh))
0932             vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
0933                      swdev_notify, NULL);
0934         else
0935             list_for_each_entry(rd, &f->remotes, list)
0936                 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
0937                          swdev_notify, NULL);
0938     }
0939 
0940     hlist_del_rcu(&f->hlist);
0941     list_del_rcu(&f->nh_list);
0942     call_rcu(&f->rcu, vxlan_fdb_free);
0943 }
0944 
0945 static void vxlan_dst_free(struct rcu_head *head)
0946 {
0947     struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
0948 
0949     dst_cache_destroy(&rd->dst_cache);
0950     kfree(rd);
0951 }
0952 
0953 static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
0954                      union vxlan_addr *ip,
0955                      __u16 state, __u16 flags,
0956                      __be16 port, __be32 vni,
0957                      __u32 ifindex, __u16 ndm_flags,
0958                      struct vxlan_fdb *f, u32 nhid,
0959                      bool swdev_notify,
0960                      struct netlink_ext_ack *extack)
0961 {
0962     __u16 fdb_flags = (ndm_flags & ~NTF_USE);
0963     struct vxlan_rdst *rd = NULL;
0964     struct vxlan_rdst oldrd;
0965     int notify = 0;
0966     int rc = 0;
0967     int err;
0968 
0969     if (nhid && !rcu_access_pointer(f->nh)) {
0970         NL_SET_ERR_MSG(extack,
0971                    "Cannot replace an existing non nexthop fdb with a nexthop");
0972         return -EOPNOTSUPP;
0973     }
0974 
0975     if (nhid && (flags & NLM_F_APPEND)) {
0976         NL_SET_ERR_MSG(extack,
0977                    "Cannot append to a nexthop fdb");
0978         return -EOPNOTSUPP;
0979     }
0980 
0981     /* Do not allow an externally learned entry to take over an entry added
0982      * by the user.
0983      */
0984     if (!(fdb_flags & NTF_EXT_LEARNED) ||
0985         !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
0986         if (f->state != state) {
0987             f->state = state;
0988             f->updated = jiffies;
0989             notify = 1;
0990         }
0991         if (f->flags != fdb_flags) {
0992             f->flags = fdb_flags;
0993             f->updated = jiffies;
0994             notify = 1;
0995         }
0996     }
0997 
0998     if ((flags & NLM_F_REPLACE)) {
0999         /* Only change unicasts */
1000         if (!(is_multicast_ether_addr(f->eth_addr) ||
1001               is_zero_ether_addr(f->eth_addr))) {
1002             if (nhid) {
1003                 rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
1004                 if (rc < 0)
1005                     return rc;
1006             } else {
1007                 rc = vxlan_fdb_replace(f, ip, port, vni,
1008                                ifindex, &oldrd);
1009             }
1010             notify |= rc;
1011         } else {
1012             NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
1013             return -EOPNOTSUPP;
1014         }
1015     }
1016     if ((flags & NLM_F_APPEND) &&
1017         (is_multicast_ether_addr(f->eth_addr) ||
1018          is_zero_ether_addr(f->eth_addr))) {
1019         rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
1020 
1021         if (rc < 0)
1022             return rc;
1023         notify |= rc;
1024     }
1025 
1026     if (ndm_flags & NTF_USE)
1027         f->used = jiffies;
1028 
1029     if (notify) {
1030         if (rd == NULL)
1031             rd = first_remote_rtnl(f);
1032 
1033         err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
1034                        swdev_notify, extack);
1035         if (err)
1036             goto err_notify;
1037     }
1038 
1039     return 0;
1040 
1041 err_notify:
1042     if (nhid)
1043         return err;
1044     if ((flags & NLM_F_REPLACE) && rc)
1045         *rd = oldrd;
1046     else if ((flags & NLM_F_APPEND) && rc) {
1047         list_del_rcu(&rd->list);
1048         call_rcu(&rd->rcu, vxlan_dst_free);
1049     }
1050     return err;
1051 }
1052 
1053 static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
1054                    const u8 *mac, union vxlan_addr *ip,
1055                    __u16 state, __u16 flags,
1056                    __be16 port, __be32 src_vni, __be32 vni,
1057                    __u32 ifindex, __u16 ndm_flags, u32 nhid,
1058                    bool swdev_notify,
1059                    struct netlink_ext_ack *extack)
1060 {
1061     __u16 fdb_flags = (ndm_flags & ~NTF_USE);
1062     struct vxlan_fdb *f;
1063     int rc;
1064 
1065     /* Disallow replace to add a multicast entry */
1066     if ((flags & NLM_F_REPLACE) &&
1067         (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
1068         return -EOPNOTSUPP;
1069 
1070     netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
1071     rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
1072                   vni, ifindex, fdb_flags, nhid, &f, extack);
1073     if (rc < 0)
1074         return rc;
1075 
1076     vxlan_fdb_insert(vxlan, mac, src_vni, f);
1077     rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
1078                   swdev_notify, extack);
1079     if (rc)
1080         goto err_notify;
1081 
1082     return 0;
1083 
1084 err_notify:
1085     vxlan_fdb_destroy(vxlan, f, false, false);
1086     return rc;
1087 }
1088 
1089 /* Add new entry to forwarding table -- assumes lock held */
1090 int vxlan_fdb_update(struct vxlan_dev *vxlan,
1091              const u8 *mac, union vxlan_addr *ip,
1092              __u16 state, __u16 flags,
1093              __be16 port, __be32 src_vni, __be32 vni,
1094              __u32 ifindex, __u16 ndm_flags, u32 nhid,
1095              bool swdev_notify,
1096              struct netlink_ext_ack *extack)
1097 {
1098     struct vxlan_fdb *f;
1099 
1100     f = __vxlan_find_mac(vxlan, mac, src_vni);
1101     if (f) {
1102         if (flags & NLM_F_EXCL) {
1103             netdev_dbg(vxlan->dev,
1104                    "lost race to create %pM\n", mac);
1105             return -EEXIST;
1106         }
1107 
1108         return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
1109                          vni, ifindex, ndm_flags, f,
1110                          nhid, swdev_notify, extack);
1111     } else {
1112         if (!(flags & NLM_F_CREATE))
1113             return -ENOENT;
1114 
1115         return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
1116                            port, src_vni, vni, ifindex,
1117                            ndm_flags, nhid, swdev_notify,
1118                            extack);
1119     }
1120 }
1121 
1122 static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
1123                   struct vxlan_rdst *rd, bool swdev_notify)
1124 {
1125     list_del_rcu(&rd->list);
1126     vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
1127     call_rcu(&rd->rcu, vxlan_dst_free);
1128 }
1129 
1130 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
1131                union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
1132                __be32 *vni, u32 *ifindex, u32 *nhid,
1133                struct netlink_ext_ack *extack)
1134 {
1135     struct net *net = dev_net(vxlan->dev);
1136     int err;
1137 
1138     if (tb[NDA_NH_ID] &&
1139         (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] || tb[NDA_PORT])) {
1140         NL_SET_ERR_MSG(extack, "DST, VNI, ifindex and port are mutually exclusive with NH_ID");
1141         return -EINVAL;
1142     }
1143 
1144     if (tb[NDA_DST]) {
1145         err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
1146         if (err) {
1147             NL_SET_ERR_MSG(extack, "Unsupported address family");
1148             return err;
1149         }
1150     } else {
1151         union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
1152 
1153         if (remote->sa.sa_family == AF_INET) {
1154             ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
1155             ip->sa.sa_family = AF_INET;
1156 #if IS_ENABLED(CONFIG_IPV6)
1157         } else {
1158             ip->sin6.sin6_addr = in6addr_any;
1159             ip->sa.sa_family = AF_INET6;
1160 #endif
1161         }
1162     }
1163 
1164     if (tb[NDA_PORT]) {
1165         if (nla_len(tb[NDA_PORT]) != sizeof(__be16)) {
1166             NL_SET_ERR_MSG(extack, "Invalid vxlan port");
1167             return -EINVAL;
1168         }
1169         *port = nla_get_be16(tb[NDA_PORT]);
1170     } else {
1171         *port = vxlan->cfg.dst_port;
1172     }
1173 
1174     if (tb[NDA_VNI]) {
1175         if (nla_len(tb[NDA_VNI]) != sizeof(u32)) {
1176             NL_SET_ERR_MSG(extack, "Invalid vni");
1177             return -EINVAL;
1178         }
1179         *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
1180     } else {
1181         *vni = vxlan->default_dst.remote_vni;
1182     }
1183 
1184     if (tb[NDA_SRC_VNI]) {
1185         if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32)) {
1186             NL_SET_ERR_MSG(extack, "Invalid src vni");
1187             return -EINVAL;
1188         }
1189         *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
1190     } else {
1191         *src_vni = vxlan->default_dst.remote_vni;
1192     }
1193 
1194     if (tb[NDA_IFINDEX]) {
1195         struct net_device *tdev;
1196 
1197         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) {
1198             NL_SET_ERR_MSG(extack, "Invalid ifindex");
1199             return -EINVAL;
1200         }
1201         *ifindex = nla_get_u32(tb[NDA_IFINDEX]);
1202         tdev = __dev_get_by_index(net, *ifindex);
1203         if (!tdev) {
1204             NL_SET_ERR_MSG(extack, "Device not found");
1205             return -EADDRNOTAVAIL;
1206         }
1207     } else {
1208         *ifindex = 0;
1209     }
1210 
1211     if (tb[NDA_NH_ID])
1212         *nhid = nla_get_u32(tb[NDA_NH_ID]);
1213     else
1214         *nhid = 0;
1215 
1216     return 0;
1217 }
1218 
1219 /* Add static entry (via netlink) */
1220 static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
1221              struct net_device *dev,
1222              const unsigned char *addr, u16 vid, u16 flags,
1223              struct netlink_ext_ack *extack)
1224 {
1225     struct vxlan_dev *vxlan = netdev_priv(dev);
1226     /* struct net *net = dev_net(vxlan->dev); */
1227     union vxlan_addr ip;
1228     __be16 port;
1229     __be32 src_vni, vni;
1230     u32 ifindex, nhid;
1231     u32 hash_index;
1232     int err;
1233 
1234     if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
1235         pr_info("RTM_NEWNEIGH with invalid state %#x\n",
1236             ndm->ndm_state);
1237         return -EINVAL;
1238     }
1239 
1240     if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
1241         return -EINVAL;
1242 
1243     err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1244                   &nhid, extack);
1245     if (err)
1246         return err;
1247 
1248     if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
1249         return -EAFNOSUPPORT;
1250 
1251     hash_index = fdb_head_index(vxlan, addr, src_vni);
1252     spin_lock_bh(&vxlan->hash_lock[hash_index]);
1253     err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
1254                    port, src_vni, vni, ifindex,
1255                    ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
1256                    nhid, true, extack);
1257     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
1258 
1259     return err;
1260 }
1261 
1262 int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
1263                const unsigned char *addr, union vxlan_addr ip,
1264                __be16 port, __be32 src_vni, __be32 vni,
1265                u32 ifindex, bool swdev_notify)
1266 {
1267     struct vxlan_rdst *rd = NULL;
1268     struct vxlan_fdb *f;
1269     int err = -ENOENT;
1270 
1271     f = vxlan_find_mac(vxlan, addr, src_vni);
1272     if (!f)
1273         return err;
1274 
1275     if (!vxlan_addr_any(&ip)) {
1276         rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
1277         if (!rd)
1278             goto out;
1279     }
1280 
1281     /* remove a destination if it's not the only one on the list,
1282      * otherwise destroy the fdb entry
1283      */
1284     if (rd && !list_is_singular(&f->remotes)) {
1285         vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
1286         goto out;
1287     }
1288 
1289     vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
1290 
1291 out:
1292     return 0;
1293 }
1294 
1295 /* Delete entry (via netlink) */
1296 static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
1297                 struct net_device *dev,
1298                 const unsigned char *addr, u16 vid,
1299                 struct netlink_ext_ack *extack)
1300 {
1301     struct vxlan_dev *vxlan = netdev_priv(dev);
1302     union vxlan_addr ip;
1303     __be32 src_vni, vni;
1304     u32 ifindex, nhid;
1305     u32 hash_index;
1306     __be16 port;
1307     int err;
1308 
1309     err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1310                   &nhid, extack);
1311     if (err)
1312         return err;
1313 
1314     hash_index = fdb_head_index(vxlan, addr, src_vni);
1315     spin_lock_bh(&vxlan->hash_lock[hash_index]);
1316     err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
1317                  true);
1318     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
1319 
1320     return err;
1321 }
1322 
1323 /* Dump forwarding table */
1324 static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1325               struct net_device *dev,
1326               struct net_device *filter_dev, int *idx)
1327 {
1328     struct vxlan_dev *vxlan = netdev_priv(dev);
1329     unsigned int h;
1330     int err = 0;
1331 
1332     for (h = 0; h < FDB_HASH_SIZE; ++h) {
1333         struct vxlan_fdb *f;
1334 
1335         rcu_read_lock();
1336         hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
1337             struct vxlan_rdst *rd;
1338 
1339             if (rcu_access_pointer(f->nh)) {
1340                 if (*idx < cb->args[2])
1341                     goto skip_nh;
1342                 err = vxlan_fdb_info(skb, vxlan, f,
1343                              NETLINK_CB(cb->skb).portid,
1344                              cb->nlh->nlmsg_seq,
1345                              RTM_NEWNEIGH,
1346                              NLM_F_MULTI, NULL);
1347                 if (err < 0) {
1348                     rcu_read_unlock();
1349                     goto out;
1350                 }
1351 skip_nh:
1352                 *idx += 1;
1353                 continue;
1354             }
1355 
1356             list_for_each_entry_rcu(rd, &f->remotes, list) {
1357                 if (*idx < cb->args[2])
1358                     goto skip;
1359 
1360                 err = vxlan_fdb_info(skb, vxlan, f,
1361                              NETLINK_CB(cb->skb).portid,
1362                              cb->nlh->nlmsg_seq,
1363                              RTM_NEWNEIGH,
1364                              NLM_F_MULTI, rd);
1365                 if (err < 0) {
1366                     rcu_read_unlock();
1367                     goto out;
1368                 }
1369 skip:
1370                 *idx += 1;
1371             }
1372         }
1373         rcu_read_unlock();
1374     }
1375 out:
1376     return err;
1377 }
1378 
1379 static int vxlan_fdb_get(struct sk_buff *skb,
1380              struct nlattr *tb[],
1381              struct net_device *dev,
1382              const unsigned char *addr,
1383              u16 vid, u32 portid, u32 seq,
1384              struct netlink_ext_ack *extack)
1385 {
1386     struct vxlan_dev *vxlan = netdev_priv(dev);
1387     struct vxlan_fdb *f;
1388     __be32 vni;
1389     int err;
1390 
1391     if (tb[NDA_VNI])
1392         vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
1393     else
1394         vni = vxlan->default_dst.remote_vni;
1395 
1396     rcu_read_lock();
1397 
1398     f = __vxlan_find_mac(vxlan, addr, vni);
1399     if (!f) {
1400         NL_SET_ERR_MSG(extack, "Fdb entry not found");
1401         err = -ENOENT;
1402         goto errout;
1403     }
1404 
1405     err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
1406                  RTM_NEWNEIGH, 0, first_remote_rcu(f));
1407 errout:
1408     rcu_read_unlock();
1409     return err;
1410 }
1411 
1412 /* Watch incoming packets to learn mapping between Ethernet address
1413  * and Tunnel endpoint.
1414  * Return true if packet is bogus and should be dropped.
1415  */
1416 static bool vxlan_snoop(struct net_device *dev,
1417             union vxlan_addr *src_ip, const u8 *src_mac,
1418             u32 src_ifindex, __be32 vni)
1419 {
1420     struct vxlan_dev *vxlan = netdev_priv(dev);
1421     struct vxlan_fdb *f;
1422     u32 ifindex = 0;
1423 
1424 #if IS_ENABLED(CONFIG_IPV6)
1425     if (src_ip->sa.sa_family == AF_INET6 &&
1426         (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
1427         ifindex = src_ifindex;
1428 #endif
1429 
1430     f = vxlan_find_mac(vxlan, src_mac, vni);
1431     if (likely(f)) {
1432         struct vxlan_rdst *rdst = first_remote_rcu(f);
1433 
1434         if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
1435                rdst->remote_ifindex == ifindex))
1436             return false;
1437 
1438         /* Don't migrate static entries, drop packets */
1439         if (f->state & (NUD_PERMANENT | NUD_NOARP))
1440             return true;
1441 
1442         /* Don't override an fdb with nexthop with a learnt entry */
1443         if (rcu_access_pointer(f->nh))
1444             return true;
1445 
1446         if (net_ratelimit())
1447             netdev_info(dev,
1448                     "%pM migrated from %pIS to %pIS\n",
1449                     src_mac, &rdst->remote_ip.sa, &src_ip->sa);
1450 
1451         rdst->remote_ip = *src_ip;
1452         f->updated = jiffies;
1453         vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
1454     } else {
1455         u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
1456 
1457         /* learned new entry */
1458         spin_lock(&vxlan->hash_lock[hash_index]);
1459 
1460         /* close off race between vxlan_flush and incoming packets */
1461         if (netif_running(dev))
1462             vxlan_fdb_update(vxlan, src_mac, src_ip,
1463                      NUD_REACHABLE,
1464                      NLM_F_EXCL|NLM_F_CREATE,
1465                      vxlan->cfg.dst_port,
1466                      vni,
1467                      vxlan->default_dst.remote_vni,
1468                      ifindex, NTF_SELF, 0, true, NULL);
1469         spin_unlock(&vxlan->hash_lock[hash_index]);
1470     }
1471 
1472     return false;
1473 }
1474 
1475 static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
1476 {
1477     struct vxlan_net *vn;
1478 
1479     if (!vs)
1480         return false;
1481     if (!refcount_dec_and_test(&vs->refcnt))
1482         return false;
1483 
1484     vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
1485     spin_lock(&vn->sock_lock);
1486     hlist_del_rcu(&vs->hlist);
1487     udp_tunnel_notify_del_rx_port(vs->sock,
1488                       (vs->flags & VXLAN_F_GPE) ?
1489                       UDP_TUNNEL_TYPE_VXLAN_GPE :
1490                       UDP_TUNNEL_TYPE_VXLAN);
1491     spin_unlock(&vn->sock_lock);
1492 
1493     return true;
1494 }
1495 
1496 static void vxlan_sock_release(struct vxlan_dev *vxlan)
1497 {
1498     struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
1499 #if IS_ENABLED(CONFIG_IPV6)
1500     struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
1501 
1502     RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
1503 #endif
1504 
1505     RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
1506     synchronize_net();
1507 
1508     if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
1509         vxlan_vs_del_vnigrp(vxlan);
1510     else
1511         vxlan_vs_del_dev(vxlan);
1512 
1513     if (__vxlan_sock_release_prep(sock4)) {
1514         udp_tunnel_sock_release(sock4->sock);
1515         kfree(sock4);
1516     }
1517 
1518 #if IS_ENABLED(CONFIG_IPV6)
1519     if (__vxlan_sock_release_prep(sock6)) {
1520         udp_tunnel_sock_release(sock6->sock);
1521         kfree(sock6);
1522     }
1523 #endif
1524 }
1525 
1526 static bool vxlan_remcsum(struct vxlanhdr *unparsed,
1527               struct sk_buff *skb, u32 vxflags)
1528 {
1529     size_t start, offset;
1530 
1531     if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
1532         goto out;
1533 
1534     start = vxlan_rco_start(unparsed->vx_vni);
1535     offset = start + vxlan_rco_offset(unparsed->vx_vni);
1536 
1537     if (!pskb_may_pull(skb, offset + sizeof(u16)))
1538         return false;
1539 
1540     skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
1541                 !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
1542 out:
1543     unparsed->vx_flags &= ~VXLAN_HF_RCO;
1544     unparsed->vx_vni &= VXLAN_VNI_MASK;
1545     return true;
1546 }
1547 
1548 static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
1549                 struct sk_buff *skb, u32 vxflags,
1550                 struct vxlan_metadata *md)
1551 {
1552     struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
1553     struct metadata_dst *tun_dst;
1554 
1555     if (!(unparsed->vx_flags & VXLAN_HF_GBP))
1556         goto out;
1557 
1558     md->gbp = ntohs(gbp->policy_id);
1559 
1560     tun_dst = (struct metadata_dst *)skb_dst(skb);
1561     if (tun_dst) {
1562         tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
1563         tun_dst->u.tun_info.options_len = sizeof(*md);
1564     }
1565     if (gbp->dont_learn)
1566         md->gbp |= VXLAN_GBP_DONT_LEARN;
1567 
1568     if (gbp->policy_applied)
1569         md->gbp |= VXLAN_GBP_POLICY_APPLIED;
1570 
1571     /* In flow-based mode, GBP is carried in dst_metadata */
1572     if (!(vxflags & VXLAN_F_COLLECT_METADATA))
1573         skb->mark = md->gbp;
1574 out:
1575     unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
1576 }
1577 
1578 static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
1579                 __be16 *protocol,
1580                 struct sk_buff *skb, u32 vxflags)
1581 {
1582     struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
1583 
1584     /* Need to have Next Protocol set for interfaces in GPE mode. */
1585     if (!gpe->np_applied)
1586         return false;
1587     /* "The initial version is 0. If a receiver does not support the
1588      * version indicated it MUST drop the packet.
1589      */
1590     if (gpe->version != 0)
1591         return false;
1592     /* "When the O bit is set to 1, the packet is an OAM packet and OAM
1593      * processing MUST occur." However, we don't implement OAM
1594      * processing, thus drop the packet.
1595      */
1596     if (gpe->oam_flag)
1597         return false;
1598 
1599     *protocol = tun_p_to_eth_p(gpe->next_protocol);
1600     if (!*protocol)
1601         return false;
1602 
1603     unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
1604     return true;
1605 }
1606 
1607 static bool vxlan_set_mac(struct vxlan_dev *vxlan,
1608               struct vxlan_sock *vs,
1609               struct sk_buff *skb, __be32 vni)
1610 {
1611     union vxlan_addr saddr;
1612     u32 ifindex = skb->dev->ifindex;
1613 
1614     skb_reset_mac_header(skb);
1615     skb->protocol = eth_type_trans(skb, vxlan->dev);
1616     skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1617 
1618     /* Ignore packet loops (and multicast echo) */
1619     if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
1620         return false;
1621 
1622     /* Get address from the outer IP header */
1623     if (vxlan_get_sk_family(vs) == AF_INET) {
1624         saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
1625         saddr.sa.sa_family = AF_INET;
1626 #if IS_ENABLED(CONFIG_IPV6)
1627     } else {
1628         saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
1629         saddr.sa.sa_family = AF_INET6;
1630 #endif
1631     }
1632 
1633     if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
1634         vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
1635         return false;
1636 
1637     return true;
1638 }
1639 
1640 static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
1641                   struct sk_buff *skb)
1642 {
1643     int err = 0;
1644 
1645     if (vxlan_get_sk_family(vs) == AF_INET)
1646         err = IP_ECN_decapsulate(oiph, skb);
1647 #if IS_ENABLED(CONFIG_IPV6)
1648     else
1649         err = IP6_ECN_decapsulate(oiph, skb);
1650 #endif
1651 
1652     if (unlikely(err) && log_ecn_error) {
1653         if (vxlan_get_sk_family(vs) == AF_INET)
1654             net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
1655                          &((struct iphdr *)oiph)->saddr,
1656                          ((struct iphdr *)oiph)->tos);
1657         else
1658             net_info_ratelimited("non-ECT from %pI6\n",
1659                          &((struct ipv6hdr *)oiph)->saddr);
1660     }
1661     return err <= 1;
1662 }
1663 
1664 /* Callback from net/ipv4/udp.c to receive packets */
1665 static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1666 {
1667     struct vxlan_vni_node *vninode = NULL;
1668     struct vxlan_dev *vxlan;
1669     struct vxlan_sock *vs;
1670     struct vxlanhdr unparsed;
1671     struct vxlan_metadata _md;
1672     struct vxlan_metadata *md = &_md;
1673     __be16 protocol = htons(ETH_P_TEB);
1674     bool raw_proto = false;
1675     void *oiph;
1676     __be32 vni = 0;
1677 
1678     /* Need UDP and VXLAN header to be present */
1679     if (!pskb_may_pull(skb, VXLAN_HLEN))
1680         goto drop;
1681 
1682     unparsed = *vxlan_hdr(skb);
1683     /* VNI flag always required to be set */
1684     if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
1685         netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
1686                ntohl(vxlan_hdr(skb)->vx_flags),
1687                ntohl(vxlan_hdr(skb)->vx_vni));
1688         /* Return non vxlan pkt */
1689         goto drop;
1690     }
1691     unparsed.vx_flags &= ~VXLAN_HF_VNI;
1692     unparsed.vx_vni &= ~VXLAN_VNI_MASK;
1693 
1694     vs = rcu_dereference_sk_user_data(sk);
1695     if (!vs)
1696         goto drop;
1697 
1698     vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
1699 
1700     vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode);
1701     if (!vxlan)
1702         goto drop;
1703 
1704     /* For backwards compatibility, only allow reserved fields to be
1705      * used by VXLAN extensions if explicitly requested.
1706      */
1707     if (vs->flags & VXLAN_F_GPE) {
1708         if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
1709             goto drop;
1710         raw_proto = true;
1711     }
1712 
1713     if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
1714                    !net_eq(vxlan->net, dev_net(vxlan->dev))))
1715         goto drop;
1716 
1717     if (vs->flags & VXLAN_F_REMCSUM_RX)
1718         if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
1719             goto drop;
1720 
1721     if (vxlan_collect_metadata(vs)) {
1722         struct metadata_dst *tun_dst;
1723 
1724         tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
1725                      key32_to_tunnel_id(vni), sizeof(*md));
1726 
1727         if (!tun_dst)
1728             goto drop;
1729 
1730         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
1731 
1732         skb_dst_set(skb, (struct dst_entry *)tun_dst);
1733     } else {
1734         memset(md, 0, sizeof(*md));
1735     }
1736 
1737     if (vs->flags & VXLAN_F_GBP)
1738         vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
1739     /* Note that GBP and GPE can never be active together. This is
1740      * ensured in vxlan_dev_configure.
1741      */
1742 
1743     if (unparsed.vx_flags || unparsed.vx_vni) {
1744         /* If there are any unprocessed flags remaining treat
1745          * this as a malformed packet. This behavior diverges from
1746          * VXLAN RFC (RFC7348) which stipulates that bits in reserved
1747          * in reserved fields are to be ignored. The approach here
1748          * maintains compatibility with previous stack code, and also
1749          * is more robust and provides a little more security in
1750          * adding extensions to VXLAN.
1751          */
1752         goto drop;
1753     }
1754 
1755     if (!raw_proto) {
1756         if (!vxlan_set_mac(vxlan, vs, skb, vni))
1757             goto drop;
1758     } else {
1759         skb_reset_mac_header(skb);
1760         skb->dev = vxlan->dev;
1761         skb->pkt_type = PACKET_HOST;
1762     }
1763 
1764     oiph = skb_network_header(skb);
1765     skb_reset_network_header(skb);
1766 
1767     if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
1768         ++vxlan->dev->stats.rx_frame_errors;
1769         ++vxlan->dev->stats.rx_errors;
1770         vxlan_vnifilter_count(vxlan, vni, vninode,
1771                       VXLAN_VNI_STATS_RX_ERRORS, 0);
1772         goto drop;
1773     }
1774 
1775     rcu_read_lock();
1776 
1777     if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
1778         rcu_read_unlock();
1779         dev_core_stats_rx_dropped_inc(vxlan->dev);
1780         vxlan_vnifilter_count(vxlan, vni, vninode,
1781                       VXLAN_VNI_STATS_RX_DROPS, 0);
1782         goto drop;
1783     }
1784 
1785     dev_sw_netstats_rx_add(vxlan->dev, skb->len);
1786     vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX, skb->len);
1787     gro_cells_receive(&vxlan->gro_cells, skb);
1788 
1789     rcu_read_unlock();
1790 
1791     return 0;
1792 
1793 drop:
1794     /* Consume bad packet */
1795     kfree_skb(skb);
1796     return 0;
1797 }
1798 
1799 /* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
1800 static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
1801 {
1802     struct vxlan_dev *vxlan;
1803     struct vxlan_sock *vs;
1804     struct vxlanhdr *hdr;
1805     __be32 vni;
1806 
1807     if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
1808         return -EINVAL;
1809 
1810     hdr = vxlan_hdr(skb);
1811 
1812     if (!(hdr->vx_flags & VXLAN_HF_VNI))
1813         return -EINVAL;
1814 
1815     vs = rcu_dereference_sk_user_data(sk);
1816     if (!vs)
1817         return -ENOENT;
1818 
1819     vni = vxlan_vni(hdr->vx_vni);
1820     vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, NULL);
1821     if (!vxlan)
1822         return -ENOENT;
1823 
1824     return 0;
1825 }
1826 
1827 static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
1828 {
1829     struct vxlan_dev *vxlan = netdev_priv(dev);
1830     struct arphdr *parp;
1831     u8 *arpptr, *sha;
1832     __be32 sip, tip;
1833     struct neighbour *n;
1834 
1835     if (dev->flags & IFF_NOARP)
1836         goto out;
1837 
1838     if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
1839         dev->stats.tx_dropped++;
1840         goto out;
1841     }
1842     parp = arp_hdr(skb);
1843 
1844     if ((parp->ar_hrd != htons(ARPHRD_ETHER) &&
1845          parp->ar_hrd != htons(ARPHRD_IEEE802)) ||
1846         parp->ar_pro != htons(ETH_P_IP) ||
1847         parp->ar_op != htons(ARPOP_REQUEST) ||
1848         parp->ar_hln != dev->addr_len ||
1849         parp->ar_pln != 4)
1850         goto out;
1851     arpptr = (u8 *)parp + sizeof(struct arphdr);
1852     sha = arpptr;
1853     arpptr += dev->addr_len;    /* sha */
1854     memcpy(&sip, arpptr, sizeof(sip));
1855     arpptr += sizeof(sip);
1856     arpptr += dev->addr_len;    /* tha */
1857     memcpy(&tip, arpptr, sizeof(tip));
1858 
1859     if (ipv4_is_loopback(tip) ||
1860         ipv4_is_multicast(tip))
1861         goto out;
1862 
1863     n = neigh_lookup(&arp_tbl, &tip, dev);
1864 
1865     if (n) {
1866         struct vxlan_fdb *f;
1867         struct sk_buff  *reply;
1868 
1869         if (!(n->nud_state & NUD_CONNECTED)) {
1870             neigh_release(n);
1871             goto out;
1872         }
1873 
1874         f = vxlan_find_mac(vxlan, n->ha, vni);
1875         if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
1876             /* bridge-local neighbor */
1877             neigh_release(n);
1878             goto out;
1879         }
1880 
1881         reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1882                 n->ha, sha);
1883 
1884         neigh_release(n);
1885 
1886         if (reply == NULL)
1887             goto out;
1888 
1889         skb_reset_mac_header(reply);
1890         __skb_pull(reply, skb_network_offset(reply));
1891         reply->ip_summed = CHECKSUM_UNNECESSARY;
1892         reply->pkt_type = PACKET_HOST;
1893 
1894         if (netif_rx(reply) == NET_RX_DROP) {
1895             dev->stats.rx_dropped++;
1896             vxlan_vnifilter_count(vxlan, vni, NULL,
1897                           VXLAN_VNI_STATS_RX_DROPS, 0);
1898         }
1899 
1900     } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
1901         union vxlan_addr ipa = {
1902             .sin.sin_addr.s_addr = tip,
1903             .sin.sin_family = AF_INET,
1904         };
1905 
1906         vxlan_ip_miss(dev, &ipa);
1907     }
1908 out:
1909     consume_skb(skb);
1910     return NETDEV_TX_OK;
1911 }
1912 
1913 #if IS_ENABLED(CONFIG_IPV6)
1914 static struct sk_buff *vxlan_na_create(struct sk_buff *request,
1915     struct neighbour *n, bool isrouter)
1916 {
1917     struct net_device *dev = request->dev;
1918     struct sk_buff *reply;
1919     struct nd_msg *ns, *na;
1920     struct ipv6hdr *pip6;
1921     u8 *daddr;
1922     int na_olen = 8; /* opt hdr + ETH_ALEN for target */
1923     int ns_olen;
1924     int i, len;
1925 
1926     if (dev == NULL || !pskb_may_pull(request, request->len))
1927         return NULL;
1928 
1929     len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
1930         sizeof(*na) + na_olen + dev->needed_tailroom;
1931     reply = alloc_skb(len, GFP_ATOMIC);
1932     if (reply == NULL)
1933         return NULL;
1934 
1935     reply->protocol = htons(ETH_P_IPV6);
1936     reply->dev = dev;
1937     skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
1938     skb_push(reply, sizeof(struct ethhdr));
1939     skb_reset_mac_header(reply);
1940 
1941     ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
1942 
1943     daddr = eth_hdr(request)->h_source;
1944     ns_olen = request->len - skb_network_offset(request) -
1945         sizeof(struct ipv6hdr) - sizeof(*ns);
1946     for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
1947         if (!ns->opt[i + 1]) {
1948             kfree_skb(reply);
1949             return NULL;
1950         }
1951         if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
1952             daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
1953             break;
1954         }
1955     }
1956 
1957     /* Ethernet header */
1958     ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
1959     ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
1960     eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
1961     reply->protocol = htons(ETH_P_IPV6);
1962 
1963     skb_pull(reply, sizeof(struct ethhdr));
1964     skb_reset_network_header(reply);
1965     skb_put(reply, sizeof(struct ipv6hdr));
1966 
1967     /* IPv6 header */
1968 
1969     pip6 = ipv6_hdr(reply);
1970     memset(pip6, 0, sizeof(struct ipv6hdr));
1971     pip6->version = 6;
1972     pip6->priority = ipv6_hdr(request)->priority;
1973     pip6->nexthdr = IPPROTO_ICMPV6;
1974     pip6->hop_limit = 255;
1975     pip6->daddr = ipv6_hdr(request)->saddr;
1976     pip6->saddr = *(struct in6_addr *)n->primary_key;
1977 
1978     skb_pull(reply, sizeof(struct ipv6hdr));
1979     skb_reset_transport_header(reply);
1980 
1981     /* Neighbor Advertisement */
1982     na = skb_put_zero(reply, sizeof(*na) + na_olen);
1983     na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
1984     na->icmph.icmp6_router = isrouter;
1985     na->icmph.icmp6_override = 1;
1986     na->icmph.icmp6_solicited = 1;
1987     na->target = ns->target;
1988     ether_addr_copy(&na->opt[2], n->ha);
1989     na->opt[0] = ND_OPT_TARGET_LL_ADDR;
1990     na->opt[1] = na_olen >> 3;
1991 
1992     na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
1993         &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
1994         csum_partial(na, sizeof(*na)+na_olen, 0));
1995 
1996     pip6->payload_len = htons(sizeof(*na)+na_olen);
1997 
1998     skb_push(reply, sizeof(struct ipv6hdr));
1999 
2000     reply->ip_summed = CHECKSUM_UNNECESSARY;
2001 
2002     return reply;
2003 }
2004 
2005 static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
2006 {
2007     struct vxlan_dev *vxlan = netdev_priv(dev);
2008     const struct in6_addr *daddr;
2009     const struct ipv6hdr *iphdr;
2010     struct inet6_dev *in6_dev;
2011     struct neighbour *n;
2012     struct nd_msg *msg;
2013 
2014     rcu_read_lock();
2015     in6_dev = __in6_dev_get(dev);
2016     if (!in6_dev)
2017         goto out;
2018 
2019     iphdr = ipv6_hdr(skb);
2020     daddr = &iphdr->daddr;
2021     msg = (struct nd_msg *)(iphdr + 1);
2022 
2023     if (ipv6_addr_loopback(daddr) ||
2024         ipv6_addr_is_multicast(&msg->target))
2025         goto out;
2026 
2027     n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
2028 
2029     if (n) {
2030         struct vxlan_fdb *f;
2031         struct sk_buff *reply;
2032 
2033         if (!(n->nud_state & NUD_CONNECTED)) {
2034             neigh_release(n);
2035             goto out;
2036         }
2037 
2038         f = vxlan_find_mac(vxlan, n->ha, vni);
2039         if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
2040             /* bridge-local neighbor */
2041             neigh_release(n);
2042             goto out;
2043         }
2044 
2045         reply = vxlan_na_create(skb, n,
2046                     !!(f ? f->flags & NTF_ROUTER : 0));
2047 
2048         neigh_release(n);
2049 
2050         if (reply == NULL)
2051             goto out;
2052 
2053         if (netif_rx(reply) == NET_RX_DROP) {
2054             dev->stats.rx_dropped++;
2055             vxlan_vnifilter_count(vxlan, vni, NULL,
2056                           VXLAN_VNI_STATS_RX_DROPS, 0);
2057         }
2058     } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
2059         union vxlan_addr ipa = {
2060             .sin6.sin6_addr = msg->target,
2061             .sin6.sin6_family = AF_INET6,
2062         };
2063 
2064         vxlan_ip_miss(dev, &ipa);
2065     }
2066 
2067 out:
2068     rcu_read_unlock();
2069     consume_skb(skb);
2070     return NETDEV_TX_OK;
2071 }
2072 #endif
2073 
2074 static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
2075 {
2076     struct vxlan_dev *vxlan = netdev_priv(dev);
2077     struct neighbour *n;
2078 
2079     if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
2080         return false;
2081 
2082     n = NULL;
2083     switch (ntohs(eth_hdr(skb)->h_proto)) {
2084     case ETH_P_IP:
2085     {
2086         struct iphdr *pip;
2087 
2088         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
2089             return false;
2090         pip = ip_hdr(skb);
2091         n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
2092         if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
2093             union vxlan_addr ipa = {
2094                 .sin.sin_addr.s_addr = pip->daddr,
2095                 .sin.sin_family = AF_INET,
2096             };
2097 
2098             vxlan_ip_miss(dev, &ipa);
2099             return false;
2100         }
2101 
2102         break;
2103     }
2104 #if IS_ENABLED(CONFIG_IPV6)
2105     case ETH_P_IPV6:
2106     {
2107         struct ipv6hdr *pip6;
2108 
2109         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
2110             return false;
2111         pip6 = ipv6_hdr(skb);
2112         n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
2113         if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
2114             union vxlan_addr ipa = {
2115                 .sin6.sin6_addr = pip6->daddr,
2116                 .sin6.sin6_family = AF_INET6,
2117             };
2118 
2119             vxlan_ip_miss(dev, &ipa);
2120             return false;
2121         }
2122 
2123         break;
2124     }
2125 #endif
2126     default:
2127         return false;
2128     }
2129 
2130     if (n) {
2131         bool diff;
2132 
2133         diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha);
2134         if (diff) {
2135             memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
2136                 dev->addr_len);
2137             memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len);
2138         }
2139         neigh_release(n);
2140         return diff;
2141     }
2142 
2143     return false;
2144 }
2145 
2146 static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
2147                 struct vxlan_metadata *md)
2148 {
2149     struct vxlanhdr_gbp *gbp;
2150 
2151     if (!md->gbp)
2152         return;
2153 
2154     gbp = (struct vxlanhdr_gbp *)vxh;
2155     vxh->vx_flags |= VXLAN_HF_GBP;
2156 
2157     if (md->gbp & VXLAN_GBP_DONT_LEARN)
2158         gbp->dont_learn = 1;
2159 
2160     if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
2161         gbp->policy_applied = 1;
2162 
2163     gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
2164 }
2165 
2166 static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
2167                    __be16 protocol)
2168 {
2169     struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
2170 
2171     gpe->np_applied = 1;
2172     gpe->next_protocol = tun_p_from_eth_p(protocol);
2173     if (!gpe->next_protocol)
2174         return -EPFNOSUPPORT;
2175     return 0;
2176 }
2177 
2178 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
2179                int iphdr_len, __be32 vni,
2180                struct vxlan_metadata *md, u32 vxflags,
2181                bool udp_sum)
2182 {
2183     struct vxlanhdr *vxh;
2184     int min_headroom;
2185     int err;
2186     int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
2187     __be16 inner_protocol = htons(ETH_P_TEB);
2188 
2189     if ((vxflags & VXLAN_F_REMCSUM_TX) &&
2190         skb->ip_summed == CHECKSUM_PARTIAL) {
2191         int csum_start = skb_checksum_start_offset(skb);
2192 
2193         if (csum_start <= VXLAN_MAX_REMCSUM_START &&
2194             !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
2195             (skb->csum_offset == offsetof(struct udphdr, check) ||
2196              skb->csum_offset == offsetof(struct tcphdr, check)))
2197             type |= SKB_GSO_TUNNEL_REMCSUM;
2198     }
2199 
2200     min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
2201             + VXLAN_HLEN + iphdr_len;
2202 
2203     /* Need space for new headers (invalidates iph ptr) */
2204     err = skb_cow_head(skb, min_headroom);
2205     if (unlikely(err))
2206         return err;
2207 
2208     err = iptunnel_handle_offloads(skb, type);
2209     if (err)
2210         return err;
2211 
2212     vxh = __skb_push(skb, sizeof(*vxh));
2213     vxh->vx_flags = VXLAN_HF_VNI;
2214     vxh->vx_vni = vxlan_vni_field(vni);
2215 
2216     if (type & SKB_GSO_TUNNEL_REMCSUM) {
2217         unsigned int start;
2218 
2219         start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
2220         vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
2221         vxh->vx_flags |= VXLAN_HF_RCO;
2222 
2223         if (!skb_is_gso(skb)) {
2224             skb->ip_summed = CHECKSUM_NONE;
2225             skb->encapsulation = 0;
2226         }
2227     }
2228 
2229     if (vxflags & VXLAN_F_GBP)
2230         vxlan_build_gbp_hdr(vxh, vxflags, md);
2231     if (vxflags & VXLAN_F_GPE) {
2232         err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
2233         if (err < 0)
2234             return err;
2235         inner_protocol = skb->protocol;
2236     }
2237 
2238     skb_set_inner_protocol(skb, inner_protocol);
2239     return 0;
2240 }
2241 
2242 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
2243                       struct vxlan_sock *sock4,
2244                       struct sk_buff *skb, int oif, u8 tos,
2245                       __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
2246                       __u8 flow_flags, struct dst_cache *dst_cache,
2247                       const struct ip_tunnel_info *info)
2248 {
2249     bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
2250     struct rtable *rt = NULL;
2251     struct flowi4 fl4;
2252 
2253     if (!sock4)
2254         return ERR_PTR(-EIO);
2255 
2256     if (tos && !info)
2257         use_cache = false;
2258     if (use_cache) {
2259         rt = dst_cache_get_ip4(dst_cache, saddr);
2260         if (rt)
2261             return rt;
2262     }
2263 
2264     memset(&fl4, 0, sizeof(fl4));
2265     fl4.flowi4_oif = oif;
2266     fl4.flowi4_tos = RT_TOS(tos);
2267     fl4.flowi4_mark = skb->mark;
2268     fl4.flowi4_proto = IPPROTO_UDP;
2269     fl4.daddr = daddr;
2270     fl4.saddr = *saddr;
2271     fl4.fl4_dport = dport;
2272     fl4.fl4_sport = sport;
2273     fl4.flowi4_flags = flow_flags;
2274 
2275     rt = ip_route_output_key(vxlan->net, &fl4);
2276     if (!IS_ERR(rt)) {
2277         if (rt->dst.dev == dev) {
2278             netdev_dbg(dev, "circular route to %pI4\n", &daddr);
2279             ip_rt_put(rt);
2280             return ERR_PTR(-ELOOP);
2281         }
2282 
2283         *saddr = fl4.saddr;
2284         if (use_cache)
2285             dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
2286     } else {
2287         netdev_dbg(dev, "no route to %pI4\n", &daddr);
2288         return ERR_PTR(-ENETUNREACH);
2289     }
2290     return rt;
2291 }
2292 
2293 #if IS_ENABLED(CONFIG_IPV6)
2294 static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
2295                       struct net_device *dev,
2296                       struct vxlan_sock *sock6,
2297                       struct sk_buff *skb, int oif, u8 tos,
2298                       __be32 label,
2299                       const struct in6_addr *daddr,
2300                       struct in6_addr *saddr,
2301                       __be16 dport, __be16 sport,
2302                       struct dst_cache *dst_cache,
2303                       const struct ip_tunnel_info *info)
2304 {
2305     bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
2306     struct dst_entry *ndst;
2307     struct flowi6 fl6;
2308 
2309     if (!sock6)
2310         return ERR_PTR(-EIO);
2311 
2312     if (tos && !info)
2313         use_cache = false;
2314     if (use_cache) {
2315         ndst = dst_cache_get_ip6(dst_cache, saddr);
2316         if (ndst)
2317             return ndst;
2318     }
2319 
2320     memset(&fl6, 0, sizeof(fl6));
2321     fl6.flowi6_oif = oif;
2322     fl6.daddr = *daddr;
2323     fl6.saddr = *saddr;
2324     fl6.flowlabel = ip6_make_flowinfo(tos, label);
2325     fl6.flowi6_mark = skb->mark;
2326     fl6.flowi6_proto = IPPROTO_UDP;
2327     fl6.fl6_dport = dport;
2328     fl6.fl6_sport = sport;
2329 
2330     ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
2331                            &fl6, NULL);
2332     if (IS_ERR(ndst)) {
2333         netdev_dbg(dev, "no route to %pI6\n", daddr);
2334         return ERR_PTR(-ENETUNREACH);
2335     }
2336 
2337     if (unlikely(ndst->dev == dev)) {
2338         netdev_dbg(dev, "circular route to %pI6\n", daddr);
2339         dst_release(ndst);
2340         return ERR_PTR(-ELOOP);
2341     }
2342 
2343     *saddr = fl6.saddr;
2344     if (use_cache)
2345         dst_cache_set_ip6(dst_cache, ndst, saddr);
2346     return ndst;
2347 }
2348 #endif
2349 
2350 /* Bypass encapsulation if the destination is local */
2351 static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
2352                    struct vxlan_dev *dst_vxlan, __be32 vni,
2353                    bool snoop)
2354 {
2355     struct pcpu_sw_netstats *tx_stats, *rx_stats;
2356     union vxlan_addr loopback;
2357     union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
2358     struct net_device *dev;
2359     int len = skb->len;
2360 
2361     tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
2362     rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
2363     skb->pkt_type = PACKET_HOST;
2364     skb->encapsulation = 0;
2365     skb->dev = dst_vxlan->dev;
2366     __skb_pull(skb, skb_network_offset(skb));
2367 
2368     if (remote_ip->sa.sa_family == AF_INET) {
2369         loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2370         loopback.sa.sa_family =  AF_INET;
2371 #if IS_ENABLED(CONFIG_IPV6)
2372     } else {
2373         loopback.sin6.sin6_addr = in6addr_loopback;
2374         loopback.sa.sa_family =  AF_INET6;
2375 #endif
2376     }
2377 
2378     rcu_read_lock();
2379     dev = skb->dev;
2380     if (unlikely(!(dev->flags & IFF_UP))) {
2381         kfree_skb(skb);
2382         goto drop;
2383     }
2384 
2385     if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
2386         vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
2387 
2388     u64_stats_update_begin(&tx_stats->syncp);
2389     u64_stats_inc(&tx_stats->tx_packets);
2390     u64_stats_add(&tx_stats->tx_bytes, len);
2391     u64_stats_update_end(&tx_stats->syncp);
2392     vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len);
2393 
2394     if (__netif_rx(skb) == NET_RX_SUCCESS) {
2395         u64_stats_update_begin(&rx_stats->syncp);
2396         u64_stats_inc(&rx_stats->rx_packets);
2397         u64_stats_add(&rx_stats->rx_bytes, len);
2398         u64_stats_update_end(&rx_stats->syncp);
2399         vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX,
2400                       len);
2401     } else {
2402 drop:
2403         dev->stats.rx_dropped++;
2404         vxlan_vnifilter_count(dst_vxlan, vni, NULL,
2405                       VXLAN_VNI_STATS_RX_DROPS, 0);
2406     }
2407     rcu_read_unlock();
2408 }
2409 
2410 static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
2411                  struct vxlan_dev *vxlan,
2412                  union vxlan_addr *daddr,
2413                  __be16 dst_port, int dst_ifindex, __be32 vni,
2414                  struct dst_entry *dst,
2415                  u32 rt_flags)
2416 {
2417 #if IS_ENABLED(CONFIG_IPV6)
2418     /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
2419      * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
2420      * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
2421      */
2422     BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
2423 #endif
2424     /* Bypass encapsulation if the destination is local */
2425     if (rt_flags & RTCF_LOCAL &&
2426         !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
2427         struct vxlan_dev *dst_vxlan;
2428 
2429         dst_release(dst);
2430         dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
2431                        daddr->sa.sa_family, dst_port,
2432                        vxlan->cfg.flags);
2433         if (!dst_vxlan) {
2434             dev->stats.tx_errors++;
2435             vxlan_vnifilter_count(vxlan, vni, NULL,
2436                           VXLAN_VNI_STATS_TX_ERRORS, 0);
2437             kfree_skb(skb);
2438 
2439             return -ENOENT;
2440         }
2441         vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
2442         return 1;
2443     }
2444 
2445     return 0;
2446 }
2447 
2448 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2449                __be32 default_vni, struct vxlan_rdst *rdst,
2450                bool did_rsc)
2451 {
2452     struct dst_cache *dst_cache;
2453     struct ip_tunnel_info *info;
2454     struct vxlan_dev *vxlan = netdev_priv(dev);
2455     const struct iphdr *old_iph = ip_hdr(skb);
2456     union vxlan_addr *dst;
2457     union vxlan_addr remote_ip, local_ip;
2458     struct vxlan_metadata _md;
2459     struct vxlan_metadata *md = &_md;
2460     unsigned int pkt_len = skb->len;
2461     __be16 src_port = 0, dst_port;
2462     struct dst_entry *ndst = NULL;
2463     __u8 tos, ttl, flow_flags = 0;
2464     int ifindex;
2465     int err;
2466     u32 flags = vxlan->cfg.flags;
2467     bool udp_sum = false;
2468     bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
2469     __be32 vni = 0;
2470 #if IS_ENABLED(CONFIG_IPV6)
2471     __be32 label;
2472 #endif
2473 
2474     info = skb_tunnel_info(skb);
2475 
2476     if (rdst) {
2477         dst = &rdst->remote_ip;
2478         if (vxlan_addr_any(dst)) {
2479             if (did_rsc) {
2480                 /* short-circuited back to local bridge */
2481                 vxlan_encap_bypass(skb, vxlan, vxlan,
2482                            default_vni, true);
2483                 return;
2484             }
2485             goto drop;
2486         }
2487 
2488         dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
2489         vni = (rdst->remote_vni) ? : default_vni;
2490         ifindex = rdst->remote_ifindex;
2491         local_ip = vxlan->cfg.saddr;
2492         dst_cache = &rdst->dst_cache;
2493         md->gbp = skb->mark;
2494         if (flags & VXLAN_F_TTL_INHERIT) {
2495             ttl = ip_tunnel_get_ttl(old_iph, skb);
2496         } else {
2497             ttl = vxlan->cfg.ttl;
2498             if (!ttl && vxlan_addr_multicast(dst))
2499                 ttl = 1;
2500         }
2501 
2502         tos = vxlan->cfg.tos;
2503         if (tos == 1)
2504             tos = ip_tunnel_get_dsfield(old_iph, skb);
2505 
2506         if (dst->sa.sa_family == AF_INET)
2507             udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
2508         else
2509             udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
2510 #if IS_ENABLED(CONFIG_IPV6)
2511         label = vxlan->cfg.label;
2512 #endif
2513     } else {
2514         if (!info) {
2515             WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
2516                   dev->name);
2517             goto drop;
2518         }
2519         remote_ip.sa.sa_family = ip_tunnel_info_af(info);
2520         if (remote_ip.sa.sa_family == AF_INET) {
2521             remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
2522             local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
2523         } else {
2524             remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
2525             local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
2526         }
2527         dst = &remote_ip;
2528         dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
2529         flow_flags = info->key.flow_flags;
2530         vni = tunnel_id_to_key32(info->key.tun_id);
2531         ifindex = 0;
2532         dst_cache = &info->dst_cache;
2533         if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
2534             if (info->options_len < sizeof(*md))
2535                 goto drop;
2536             md = ip_tunnel_info_opts(info);
2537         }
2538         ttl = info->key.ttl;
2539         tos = info->key.tos;
2540 #if IS_ENABLED(CONFIG_IPV6)
2541         label = info->key.label;
2542 #endif
2543         udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
2544     }
2545     src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
2546                      vxlan->cfg.port_max, true);
2547 
2548     rcu_read_lock();
2549     if (dst->sa.sa_family == AF_INET) {
2550         struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
2551         struct rtable *rt;
2552         __be16 df = 0;
2553 
2554         if (!ifindex)
2555             ifindex = sock4->sock->sk->sk_bound_dev_if;
2556 
2557         rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
2558                      dst->sin.sin_addr.s_addr,
2559                      &local_ip.sin.sin_addr.s_addr,
2560                      dst_port, src_port, flow_flags,
2561                      dst_cache, info);
2562         if (IS_ERR(rt)) {
2563             err = PTR_ERR(rt);
2564             goto tx_error;
2565         }
2566 
2567         if (!info) {
2568             /* Bypass encapsulation if the destination is local */
2569             err = encap_bypass_if_local(skb, dev, vxlan, dst,
2570                             dst_port, ifindex, vni,
2571                             &rt->dst, rt->rt_flags);
2572             if (err)
2573                 goto out_unlock;
2574 
2575             if (vxlan->cfg.df == VXLAN_DF_SET) {
2576                 df = htons(IP_DF);
2577             } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
2578                 struct ethhdr *eth = eth_hdr(skb);
2579 
2580                 if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
2581                     (ntohs(eth->h_proto) == ETH_P_IP &&
2582                      old_iph->frag_off & htons(IP_DF)))
2583                     df = htons(IP_DF);
2584             }
2585         } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
2586             df = htons(IP_DF);
2587         }
2588 
2589         ndst = &rt->dst;
2590         err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
2591                         netif_is_any_bridge_port(dev));
2592         if (err < 0) {
2593             goto tx_error;
2594         } else if (err) {
2595             if (info) {
2596                 struct ip_tunnel_info *unclone;
2597                 struct in_addr src, dst;
2598 
2599                 unclone = skb_tunnel_info_unclone(skb);
2600                 if (unlikely(!unclone))
2601                     goto tx_error;
2602 
2603                 src = remote_ip.sin.sin_addr;
2604                 dst = local_ip.sin.sin_addr;
2605                 unclone->key.u.ipv4.src = src.s_addr;
2606                 unclone->key.u.ipv4.dst = dst.s_addr;
2607             }
2608             vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2609             dst_release(ndst);
2610             goto out_unlock;
2611         }
2612 
2613         tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2614         ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2615         err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
2616                       vni, md, flags, udp_sum);
2617         if (err < 0)
2618             goto tx_error;
2619 
2620         udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
2621                     dst->sin.sin_addr.s_addr, tos, ttl, df,
2622                     src_port, dst_port, xnet, !udp_sum);
2623 #if IS_ENABLED(CONFIG_IPV6)
2624     } else {
2625         struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
2626 
2627         if (!ifindex)
2628             ifindex = sock6->sock->sk->sk_bound_dev_if;
2629 
2630         ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
2631                     label, &dst->sin6.sin6_addr,
2632                     &local_ip.sin6.sin6_addr,
2633                     dst_port, src_port,
2634                     dst_cache, info);
2635         if (IS_ERR(ndst)) {
2636             err = PTR_ERR(ndst);
2637             ndst = NULL;
2638             goto tx_error;
2639         }
2640 
2641         if (!info) {
2642             u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
2643 
2644             err = encap_bypass_if_local(skb, dev, vxlan, dst,
2645                             dst_port, ifindex, vni,
2646                             ndst, rt6i_flags);
2647             if (err)
2648                 goto out_unlock;
2649         }
2650 
2651         err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
2652                         netif_is_any_bridge_port(dev));
2653         if (err < 0) {
2654             goto tx_error;
2655         } else if (err) {
2656             if (info) {
2657                 struct ip_tunnel_info *unclone;
2658                 struct in6_addr src, dst;
2659 
2660                 unclone = skb_tunnel_info_unclone(skb);
2661                 if (unlikely(!unclone))
2662                     goto tx_error;
2663 
2664                 src = remote_ip.sin6.sin6_addr;
2665                 dst = local_ip.sin6.sin6_addr;
2666                 unclone->key.u.ipv6.src = src;
2667                 unclone->key.u.ipv6.dst = dst;
2668             }
2669 
2670             vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2671             dst_release(ndst);
2672             goto out_unlock;
2673         }
2674 
2675         tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2676         ttl = ttl ? : ip6_dst_hoplimit(ndst);
2677         skb_scrub_packet(skb, xnet);
2678         err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
2679                       vni, md, flags, udp_sum);
2680         if (err < 0)
2681             goto tx_error;
2682 
2683         udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
2684                      &local_ip.sin6.sin6_addr,
2685                      &dst->sin6.sin6_addr, tos, ttl,
2686                      label, src_port, dst_port, !udp_sum);
2687 #endif
2688     }
2689     vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
2690 out_unlock:
2691     rcu_read_unlock();
2692     return;
2693 
2694 drop:
2695     dev->stats.tx_dropped++;
2696     vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0);
2697     dev_kfree_skb(skb);
2698     return;
2699 
2700 tx_error:
2701     rcu_read_unlock();
2702     if (err == -ELOOP)
2703         dev->stats.collisions++;
2704     else if (err == -ENETUNREACH)
2705         dev->stats.tx_carrier_errors++;
2706     dst_release(ndst);
2707     dev->stats.tx_errors++;
2708     vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0);
2709     kfree_skb(skb);
2710 }
2711 
2712 static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
2713               struct vxlan_fdb *f, __be32 vni, bool did_rsc)
2714 {
2715     struct vxlan_rdst nh_rdst;
2716     struct nexthop *nh;
2717     bool do_xmit;
2718     u32 hash;
2719 
2720     memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
2721     hash = skb_get_hash(skb);
2722 
2723     rcu_read_lock();
2724     nh = rcu_dereference(f->nh);
2725     if (!nh) {
2726         rcu_read_unlock();
2727         goto drop;
2728     }
2729     do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
2730     rcu_read_unlock();
2731 
2732     if (likely(do_xmit))
2733         vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
2734     else
2735         goto drop;
2736 
2737     return;
2738 
2739 drop:
2740     dev->stats.tx_dropped++;
2741     vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
2742                   VXLAN_VNI_STATS_TX_DROPS, 0);
2743     dev_kfree_skb(skb);
2744 }
2745 
2746 /* Transmit local packets over Vxlan
2747  *
2748  * Outer IP header inherits ECN and DF from inner header.
2749  * Outer UDP destination is the VXLAN assigned port.
2750  *           source port is based on hash of flow
2751  */
2752 static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
2753 {
2754     struct vxlan_dev *vxlan = netdev_priv(dev);
2755     struct vxlan_rdst *rdst, *fdst = NULL;
2756     const struct ip_tunnel_info *info;
2757     bool did_rsc = false;
2758     struct vxlan_fdb *f;
2759     struct ethhdr *eth;
2760     __be32 vni = 0;
2761 
2762     info = skb_tunnel_info(skb);
2763 
2764     skb_reset_mac_header(skb);
2765 
2766     if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
2767         if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
2768             info->mode & IP_TUNNEL_INFO_TX) {
2769             vni = tunnel_id_to_key32(info->key.tun_id);
2770         } else {
2771             if (info && info->mode & IP_TUNNEL_INFO_TX)
2772                 vxlan_xmit_one(skb, dev, vni, NULL, false);
2773             else
2774                 kfree_skb(skb);
2775             return NETDEV_TX_OK;
2776         }
2777     }
2778 
2779     if (vxlan->cfg.flags & VXLAN_F_PROXY) {
2780         eth = eth_hdr(skb);
2781         if (ntohs(eth->h_proto) == ETH_P_ARP)
2782             return arp_reduce(dev, skb, vni);
2783 #if IS_ENABLED(CONFIG_IPV6)
2784         else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
2785              pskb_may_pull(skb, sizeof(struct ipv6hdr) +
2786                         sizeof(struct nd_msg)) &&
2787              ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
2788             struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
2789 
2790             if (m->icmph.icmp6_code == 0 &&
2791                 m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
2792                 return neigh_reduce(dev, skb, vni);
2793         }
2794 #endif
2795     }
2796 
2797     eth = eth_hdr(skb);
2798     f = vxlan_find_mac(vxlan, eth->h_dest, vni);
2799     did_rsc = false;
2800 
2801     if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&
2802         (ntohs(eth->h_proto) == ETH_P_IP ||
2803          ntohs(eth->h_proto) == ETH_P_IPV6)) {
2804         did_rsc = route_shortcircuit(dev, skb);
2805         if (did_rsc)
2806             f = vxlan_find_mac(vxlan, eth->h_dest, vni);
2807     }
2808 
2809     if (f == NULL) {
2810         f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
2811         if (f == NULL) {
2812             if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&
2813                 !is_multicast_ether_addr(eth->h_dest))
2814                 vxlan_fdb_miss(vxlan, eth->h_dest);
2815 
2816             dev->stats.tx_dropped++;
2817             vxlan_vnifilter_count(vxlan, vni, NULL,
2818                           VXLAN_VNI_STATS_TX_DROPS, 0);
2819             kfree_skb(skb);
2820             return NETDEV_TX_OK;
2821         }
2822     }
2823 
2824     if (rcu_access_pointer(f->nh)) {
2825         vxlan_xmit_nh(skb, dev, f,
2826                   (vni ? : vxlan->default_dst.remote_vni), did_rsc);
2827     } else {
2828         list_for_each_entry_rcu(rdst, &f->remotes, list) {
2829             struct sk_buff *skb1;
2830 
2831             if (!fdst) {
2832                 fdst = rdst;
2833                 continue;
2834             }
2835             skb1 = skb_clone(skb, GFP_ATOMIC);
2836             if (skb1)
2837                 vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
2838         }
2839         if (fdst)
2840             vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
2841         else
2842             kfree_skb(skb);
2843     }
2844 
2845     return NETDEV_TX_OK;
2846 }
2847 
2848 /* Walk the forwarding table and purge stale entries */
2849 static void vxlan_cleanup(struct timer_list *t)
2850 {
2851     struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
2852     unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
2853     unsigned int h;
2854 
2855     if (!netif_running(vxlan->dev))
2856         return;
2857 
2858     for (h = 0; h < FDB_HASH_SIZE; ++h) {
2859         struct hlist_node *p, *n;
2860 
2861         spin_lock(&vxlan->hash_lock[h]);
2862         hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2863             struct vxlan_fdb *f
2864                 = container_of(p, struct vxlan_fdb, hlist);
2865             unsigned long timeout;
2866 
2867             if (f->state & (NUD_PERMANENT | NUD_NOARP))
2868                 continue;
2869 
2870             if (f->flags & NTF_EXT_LEARNED)
2871                 continue;
2872 
2873             timeout = f->used + vxlan->cfg.age_interval * HZ;
2874             if (time_before_eq(timeout, jiffies)) {
2875                 netdev_dbg(vxlan->dev,
2876                        "garbage collect %pM\n",
2877                        f->eth_addr);
2878                 f->state = NUD_STALE;
2879                 vxlan_fdb_destroy(vxlan, f, true, true);
2880             } else if (time_before(timeout, next_timer))
2881                 next_timer = timeout;
2882         }
2883         spin_unlock(&vxlan->hash_lock[h]);
2884     }
2885 
2886     mod_timer(&vxlan->age_timer, next_timer);
2887 }
2888 
2889 static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
2890 {
2891     struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2892 
2893     spin_lock(&vn->sock_lock);
2894     hlist_del_init_rcu(&vxlan->hlist4.hlist);
2895 #if IS_ENABLED(CONFIG_IPV6)
2896     hlist_del_init_rcu(&vxlan->hlist6.hlist);
2897 #endif
2898     spin_unlock(&vn->sock_lock);
2899 }
2900 
2901 static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
2902                  struct vxlan_dev_node *node)
2903 {
2904     struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2905     __be32 vni = vxlan->default_dst.remote_vni;
2906 
2907     node->vxlan = vxlan;
2908     spin_lock(&vn->sock_lock);
2909     hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
2910     spin_unlock(&vn->sock_lock);
2911 }
2912 
2913 /* Setup stats when device is created */
2914 static int vxlan_init(struct net_device *dev)
2915 {
2916     struct vxlan_dev *vxlan = netdev_priv(dev);
2917     int err;
2918 
2919     if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2920         vxlan_vnigroup_init(vxlan);
2921 
2922     dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
2923     if (!dev->tstats)
2924         return -ENOMEM;
2925 
2926     err = gro_cells_init(&vxlan->gro_cells, dev);
2927     if (err) {
2928         free_percpu(dev->tstats);
2929         return err;
2930     }
2931 
2932     return 0;
2933 }
2934 
2935 static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
2936 {
2937     struct vxlan_fdb *f;
2938     u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
2939 
2940     spin_lock_bh(&vxlan->hash_lock[hash_index]);
2941     f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
2942     if (f)
2943         vxlan_fdb_destroy(vxlan, f, true, true);
2944     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
2945 }
2946 
2947 static void vxlan_uninit(struct net_device *dev)
2948 {
2949     struct vxlan_dev *vxlan = netdev_priv(dev);
2950 
2951     if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2952         vxlan_vnigroup_uninit(vxlan);
2953 
2954     gro_cells_destroy(&vxlan->gro_cells);
2955 
2956     vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
2957 
2958     free_percpu(dev->tstats);
2959 }
2960 
2961 /* Start ageing timer and join group when device is brought up */
2962 static int vxlan_open(struct net_device *dev)
2963 {
2964     struct vxlan_dev *vxlan = netdev_priv(dev);
2965     int ret;
2966 
2967     ret = vxlan_sock_add(vxlan);
2968     if (ret < 0)
2969         return ret;
2970 
2971     ret = vxlan_multicast_join(vxlan);
2972     if (ret) {
2973         vxlan_sock_release(vxlan);
2974         return ret;
2975     }
2976 
2977     if (vxlan->cfg.age_interval)
2978         mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
2979 
2980     return ret;
2981 }
2982 
2983 /* Purge the forwarding table */
2984 static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
2985 {
2986     unsigned int h;
2987 
2988     for (h = 0; h < FDB_HASH_SIZE; ++h) {
2989         struct hlist_node *p, *n;
2990 
2991         spin_lock_bh(&vxlan->hash_lock[h]);
2992         hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
2993             struct vxlan_fdb *f
2994                 = container_of(p, struct vxlan_fdb, hlist);
2995             if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
2996                 continue;
2997             /* the all_zeros_mac entry is deleted at vxlan_uninit */
2998             if (is_zero_ether_addr(f->eth_addr) &&
2999                 f->vni == vxlan->cfg.vni)
3000                 continue;
3001             vxlan_fdb_destroy(vxlan, f, true, true);
3002         }
3003         spin_unlock_bh(&vxlan->hash_lock[h]);
3004     }
3005 }
3006 
3007 /* Cleanup timer and forwarding table on shutdown */
3008 static int vxlan_stop(struct net_device *dev)
3009 {
3010     struct vxlan_dev *vxlan = netdev_priv(dev);
3011 
3012     vxlan_multicast_leave(vxlan);
3013 
3014     del_timer_sync(&vxlan->age_timer);
3015 
3016     vxlan_flush(vxlan, false);
3017     vxlan_sock_release(vxlan);
3018 
3019     return 0;
3020 }
3021 
3022 /* Stub, nothing needs to be done. */
3023 static void vxlan_set_multicast_list(struct net_device *dev)
3024 {
3025 }
3026 
3027 static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
3028 {
3029     struct vxlan_dev *vxlan = netdev_priv(dev);
3030     struct vxlan_rdst *dst = &vxlan->default_dst;
3031     struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
3032                              dst->remote_ifindex);
3033     bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
3034 
3035     /* This check is different than dev->max_mtu, because it looks at
3036      * the lowerdev->mtu, rather than the static dev->max_mtu
3037      */
3038     if (lowerdev) {
3039         int max_mtu = lowerdev->mtu -
3040                   (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
3041         if (new_mtu > max_mtu)
3042             return -EINVAL;
3043     }
3044 
3045     dev->mtu = new_mtu;
3046     return 0;
3047 }
3048 
3049 static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
3050 {
3051     struct vxlan_dev *vxlan = netdev_priv(dev);
3052     struct ip_tunnel_info *info = skb_tunnel_info(skb);
3053     __be16 sport, dport;
3054 
3055     sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
3056                   vxlan->cfg.port_max, true);
3057     dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
3058 
3059     if (ip_tunnel_info_af(info) == AF_INET) {
3060         struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
3061         struct rtable *rt;
3062 
3063         rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
3064                      info->key.u.ipv4.dst,
3065                      &info->key.u.ipv4.src, dport, sport,
3066                      info->key.flow_flags, &info->dst_cache,
3067                      info);
3068         if (IS_ERR(rt))
3069             return PTR_ERR(rt);
3070         ip_rt_put(rt);
3071     } else {
3072 #if IS_ENABLED(CONFIG_IPV6)
3073         struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
3074         struct dst_entry *ndst;
3075 
3076         ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
3077                     info->key.label, &info->key.u.ipv6.dst,
3078                     &info->key.u.ipv6.src, dport, sport,
3079                     &info->dst_cache, info);
3080         if (IS_ERR(ndst))
3081             return PTR_ERR(ndst);
3082         dst_release(ndst);
3083 #else /* !CONFIG_IPV6 */
3084         return -EPFNOSUPPORT;
3085 #endif
3086     }
3087     info->key.tp_src = sport;
3088     info->key.tp_dst = dport;
3089     return 0;
3090 }
3091 
3092 static const struct net_device_ops vxlan_netdev_ether_ops = {
3093     .ndo_init       = vxlan_init,
3094     .ndo_uninit     = vxlan_uninit,
3095     .ndo_open       = vxlan_open,
3096     .ndo_stop       = vxlan_stop,
3097     .ndo_start_xmit     = vxlan_xmit,
3098     .ndo_get_stats64    = dev_get_tstats64,
3099     .ndo_set_rx_mode    = vxlan_set_multicast_list,
3100     .ndo_change_mtu     = vxlan_change_mtu,
3101     .ndo_validate_addr  = eth_validate_addr,
3102     .ndo_set_mac_address    = eth_mac_addr,
3103     .ndo_fdb_add        = vxlan_fdb_add,
3104     .ndo_fdb_del        = vxlan_fdb_delete,
3105     .ndo_fdb_dump       = vxlan_fdb_dump,
3106     .ndo_fdb_get        = vxlan_fdb_get,
3107     .ndo_fill_metadata_dst  = vxlan_fill_metadata_dst,
3108 };
3109 
3110 static const struct net_device_ops vxlan_netdev_raw_ops = {
3111     .ndo_init       = vxlan_init,
3112     .ndo_uninit     = vxlan_uninit,
3113     .ndo_open       = vxlan_open,
3114     .ndo_stop       = vxlan_stop,
3115     .ndo_start_xmit     = vxlan_xmit,
3116     .ndo_get_stats64    = dev_get_tstats64,
3117     .ndo_change_mtu     = vxlan_change_mtu,
3118     .ndo_fill_metadata_dst  = vxlan_fill_metadata_dst,
3119 };
3120 
3121 /* Info for udev, that this is a virtual tunnel endpoint */
3122 static struct device_type vxlan_type = {
3123     .name = "vxlan",
3124 };
3125 
3126 /* Calls the ndo_udp_tunnel_add of the caller in order to
3127  * supply the listening VXLAN udp ports. Callers are expected
3128  * to implement the ndo_udp_tunnel_add.
3129  */
3130 static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
3131 {
3132     struct vxlan_sock *vs;
3133     struct net *net = dev_net(dev);
3134     struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3135     unsigned int i;
3136 
3137     spin_lock(&vn->sock_lock);
3138     for (i = 0; i < PORT_HASH_SIZE; ++i) {
3139         hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
3140             unsigned short type;
3141 
3142             if (vs->flags & VXLAN_F_GPE)
3143                 type = UDP_TUNNEL_TYPE_VXLAN_GPE;
3144             else
3145                 type = UDP_TUNNEL_TYPE_VXLAN;
3146 
3147             if (push)
3148                 udp_tunnel_push_rx_port(dev, vs->sock, type);
3149             else
3150                 udp_tunnel_drop_rx_port(dev, vs->sock, type);
3151         }
3152     }
3153     spin_unlock(&vn->sock_lock);
3154 }
3155 
3156 /* Initialize the device structure. */
3157 static void vxlan_setup(struct net_device *dev)
3158 {
3159     struct vxlan_dev *vxlan = netdev_priv(dev);
3160     unsigned int h;
3161 
3162     eth_hw_addr_random(dev);
3163     ether_setup(dev);
3164 
3165     dev->needs_free_netdev = true;
3166     SET_NETDEV_DEVTYPE(dev, &vxlan_type);
3167 
3168     dev->features   |= NETIF_F_LLTX;
3169     dev->features   |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
3170     dev->features   |= NETIF_F_RXCSUM;
3171     dev->features   |= NETIF_F_GSO_SOFTWARE;
3172 
3173     dev->vlan_features = dev->features;
3174     dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
3175     dev->hw_features |= NETIF_F_RXCSUM;
3176     dev->hw_features |= NETIF_F_GSO_SOFTWARE;
3177     netif_keep_dst(dev);
3178     dev->priv_flags |= IFF_NO_QUEUE | IFF_CHANGE_PROTO_DOWN;
3179 
3180     /* MTU range: 68 - 65535 */
3181     dev->min_mtu = ETH_MIN_MTU;
3182     dev->max_mtu = ETH_MAX_MTU;
3183 
3184     INIT_LIST_HEAD(&vxlan->next);
3185 
3186     timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
3187 
3188     vxlan->dev = dev;
3189 
3190     for (h = 0; h < FDB_HASH_SIZE; ++h) {
3191         spin_lock_init(&vxlan->hash_lock[h]);
3192         INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
3193     }
3194 }
3195 
3196 static void vxlan_ether_setup(struct net_device *dev)
3197 {
3198     dev->priv_flags &= ~IFF_TX_SKB_SHARING;
3199     dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3200     dev->netdev_ops = &vxlan_netdev_ether_ops;
3201 }
3202 
3203 static void vxlan_raw_setup(struct net_device *dev)
3204 {
3205     dev->header_ops = NULL;
3206     dev->type = ARPHRD_NONE;
3207     dev->hard_header_len = 0;
3208     dev->addr_len = 0;
3209     dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
3210     dev->netdev_ops = &vxlan_netdev_raw_ops;
3211 }
3212 
3213 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
3214     [IFLA_VXLAN_ID]     = { .type = NLA_U32 },
3215     [IFLA_VXLAN_GROUP]  = { .len = sizeof_field(struct iphdr, daddr) },
3216     [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
3217     [IFLA_VXLAN_LINK]   = { .type = NLA_U32 },
3218     [IFLA_VXLAN_LOCAL]  = { .len = sizeof_field(struct iphdr, saddr) },
3219     [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
3220     [IFLA_VXLAN_TOS]    = { .type = NLA_U8 },
3221     [IFLA_VXLAN_TTL]    = { .type = NLA_U8 },
3222     [IFLA_VXLAN_LABEL]  = { .type = NLA_U32 },
3223     [IFLA_VXLAN_LEARNING]   = { .type = NLA_U8 },
3224     [IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
3225     [IFLA_VXLAN_LIMIT]  = { .type = NLA_U32 },
3226     [IFLA_VXLAN_PORT_RANGE] = { .len  = sizeof(struct ifla_vxlan_port_range) },
3227     [IFLA_VXLAN_PROXY]  = { .type = NLA_U8 },
3228     [IFLA_VXLAN_RSC]    = { .type = NLA_U8 },
3229     [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 },
3230     [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 },
3231     [IFLA_VXLAN_COLLECT_METADATA]   = { .type = NLA_U8 },
3232     [IFLA_VXLAN_PORT]   = { .type = NLA_U16 },
3233     [IFLA_VXLAN_UDP_CSUM]   = { .type = NLA_U8 },
3234     [IFLA_VXLAN_UDP_ZERO_CSUM6_TX]  = { .type = NLA_U8 },
3235     [IFLA_VXLAN_UDP_ZERO_CSUM6_RX]  = { .type = NLA_U8 },
3236     [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
3237     [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
3238     [IFLA_VXLAN_GBP]    = { .type = NLA_FLAG, },
3239     [IFLA_VXLAN_GPE]    = { .type = NLA_FLAG, },
3240     [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
3241     [IFLA_VXLAN_TTL_INHERIT]    = { .type = NLA_FLAG },
3242     [IFLA_VXLAN_DF]     = { .type = NLA_U8 },
3243     [IFLA_VXLAN_VNIFILTER]  = { .type = NLA_U8 },
3244 };
3245 
3246 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
3247               struct netlink_ext_ack *extack)
3248 {
3249     if (tb[IFLA_ADDRESS]) {
3250         if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
3251             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
3252                         "Provided link layer address is not Ethernet");
3253             return -EINVAL;
3254         }
3255 
3256         if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
3257             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
3258                         "Provided Ethernet address is not unicast");
3259             return -EADDRNOTAVAIL;
3260         }
3261     }
3262 
3263     if (tb[IFLA_MTU]) {
3264         u32 mtu = nla_get_u32(tb[IFLA_MTU]);
3265 
3266         if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) {
3267             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
3268                         "MTU must be between 68 and 65535");
3269             return -EINVAL;
3270         }
3271     }
3272 
3273     if (!data) {
3274         NL_SET_ERR_MSG(extack,
3275                    "Required attributes not provided to perform the operation");
3276         return -EINVAL;
3277     }
3278 
3279     if (data[IFLA_VXLAN_ID]) {
3280         u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
3281 
3282         if (id >= VXLAN_N_VID) {
3283             NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
3284                         "VXLAN ID must be lower than 16777216");
3285             return -ERANGE;
3286         }
3287     }
3288 
3289     if (data[IFLA_VXLAN_PORT_RANGE]) {
3290         const struct ifla_vxlan_port_range *p
3291             = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
3292 
3293         if (ntohs(p->high) < ntohs(p->low)) {
3294             NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
3295                         "Invalid source port range");
3296             return -EINVAL;
3297         }
3298     }
3299 
3300     if (data[IFLA_VXLAN_DF]) {
3301         enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
3302 
3303         if (df < 0 || df > VXLAN_DF_MAX) {
3304             NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
3305                         "Invalid DF attribute");
3306             return -EINVAL;
3307         }
3308     }
3309 
3310     return 0;
3311 }
3312 
3313 static void vxlan_get_drvinfo(struct net_device *netdev,
3314                   struct ethtool_drvinfo *drvinfo)
3315 {
3316     strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
3317     strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
3318 }
3319 
3320 static int vxlan_get_link_ksettings(struct net_device *dev,
3321                     struct ethtool_link_ksettings *cmd)
3322 {
3323     struct vxlan_dev *vxlan = netdev_priv(dev);
3324     struct vxlan_rdst *dst = &vxlan->default_dst;
3325     struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
3326                              dst->remote_ifindex);
3327 
3328     if (!lowerdev) {
3329         cmd->base.duplex = DUPLEX_UNKNOWN;
3330         cmd->base.port = PORT_OTHER;
3331         cmd->base.speed = SPEED_UNKNOWN;
3332 
3333         return 0;
3334     }
3335 
3336     return __ethtool_get_link_ksettings(lowerdev, cmd);
3337 }
3338 
3339 static const struct ethtool_ops vxlan_ethtool_ops = {
3340     .get_drvinfo        = vxlan_get_drvinfo,
3341     .get_link       = ethtool_op_get_link,
3342     .get_link_ksettings = vxlan_get_link_ksettings,
3343 };
3344 
3345 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
3346                     __be16 port, u32 flags, int ifindex)
3347 {
3348     struct socket *sock;
3349     struct udp_port_cfg udp_conf;
3350     int err;
3351 
3352     memset(&udp_conf, 0, sizeof(udp_conf));
3353 
3354     if (ipv6) {
3355         udp_conf.family = AF_INET6;
3356         udp_conf.use_udp6_rx_checksums =
3357             !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
3358         udp_conf.ipv6_v6only = 1;
3359     } else {
3360         udp_conf.family = AF_INET;
3361     }
3362 
3363     udp_conf.local_udp_port = port;
3364     udp_conf.bind_ifindex = ifindex;
3365 
3366     /* Open UDP socket */
3367     err = udp_sock_create(net, &udp_conf, &sock);
3368     if (err < 0)
3369         return ERR_PTR(err);
3370 
3371     udp_allow_gso(sock->sk);
3372     return sock;
3373 }
3374 
3375 /* Create new listen socket if needed */
3376 static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
3377                           __be16 port, u32 flags,
3378                           int ifindex)
3379 {
3380     struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3381     struct vxlan_sock *vs;
3382     struct socket *sock;
3383     unsigned int h;
3384     struct udp_tunnel_sock_cfg tunnel_cfg;
3385 
3386     vs = kzalloc(sizeof(*vs), GFP_KERNEL);
3387     if (!vs)
3388         return ERR_PTR(-ENOMEM);
3389 
3390     for (h = 0; h < VNI_HASH_SIZE; ++h)
3391         INIT_HLIST_HEAD(&vs->vni_list[h]);
3392 
3393     sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
3394     if (IS_ERR(sock)) {
3395         kfree(vs);
3396         return ERR_CAST(sock);
3397     }
3398 
3399     vs->sock = sock;
3400     refcount_set(&vs->refcnt, 1);
3401     vs->flags = (flags & VXLAN_F_RCV_FLAGS);
3402 
3403     spin_lock(&vn->sock_lock);
3404     hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
3405     udp_tunnel_notify_add_rx_port(sock,
3406                       (vs->flags & VXLAN_F_GPE) ?
3407                       UDP_TUNNEL_TYPE_VXLAN_GPE :
3408                       UDP_TUNNEL_TYPE_VXLAN);
3409     spin_unlock(&vn->sock_lock);
3410 
3411     /* Mark socket as an encapsulation socket. */
3412     memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
3413     tunnel_cfg.sk_user_data = vs;
3414     tunnel_cfg.encap_type = 1;
3415     tunnel_cfg.encap_rcv = vxlan_rcv;
3416     tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
3417     tunnel_cfg.encap_destroy = NULL;
3418     tunnel_cfg.gro_receive = vxlan_gro_receive;
3419     tunnel_cfg.gro_complete = vxlan_gro_complete;
3420 
3421     setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
3422 
3423     return vs;
3424 }
3425 
3426 static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
3427 {
3428     struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
3429     bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
3430     struct vxlan_sock *vs = NULL;
3431     struct vxlan_dev_node *node;
3432     int l3mdev_index = 0;
3433 
3434     if (vxlan->cfg.remote_ifindex)
3435         l3mdev_index = l3mdev_master_upper_ifindex_by_index(
3436             vxlan->net, vxlan->cfg.remote_ifindex);
3437 
3438     if (!vxlan->cfg.no_share) {
3439         spin_lock(&vn->sock_lock);
3440         vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
3441                      vxlan->cfg.dst_port, vxlan->cfg.flags,
3442                      l3mdev_index);
3443         if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
3444             spin_unlock(&vn->sock_lock);
3445             return -EBUSY;
3446         }
3447         spin_unlock(&vn->sock_lock);
3448     }
3449     if (!vs)
3450         vs = vxlan_socket_create(vxlan->net, ipv6,
3451                      vxlan->cfg.dst_port, vxlan->cfg.flags,
3452                      l3mdev_index);
3453     if (IS_ERR(vs))
3454         return PTR_ERR(vs);
3455 #if IS_ENABLED(CONFIG_IPV6)
3456     if (ipv6) {
3457         rcu_assign_pointer(vxlan->vn6_sock, vs);
3458         node = &vxlan->hlist6;
3459     } else
3460 #endif
3461     {
3462         rcu_assign_pointer(vxlan->vn4_sock, vs);
3463         node = &vxlan->hlist4;
3464     }
3465 
3466     if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER))
3467         vxlan_vs_add_vnigrp(vxlan, vs, ipv6);
3468     else
3469         vxlan_vs_add_dev(vs, vxlan, node);
3470 
3471     return 0;
3472 }
3473 
3474 static int vxlan_sock_add(struct vxlan_dev *vxlan)
3475 {
3476     bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
3477     bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
3478     bool ipv4 = !ipv6 || metadata;
3479     int ret = 0;
3480 
3481     RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
3482 #if IS_ENABLED(CONFIG_IPV6)
3483     RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
3484     if (ipv6) {
3485         ret = __vxlan_sock_add(vxlan, true);
3486         if (ret < 0 && ret != -EAFNOSUPPORT)
3487             ipv4 = false;
3488     }
3489 #endif
3490     if (ipv4)
3491         ret = __vxlan_sock_add(vxlan, false);
3492     if (ret < 0)
3493         vxlan_sock_release(vxlan);
3494     return ret;
3495 }
3496 
3497 int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
3498              struct vxlan_config *conf, __be32 vni)
3499 {
3500     struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
3501     struct vxlan_dev *tmp;
3502 
3503     list_for_each_entry(tmp, &vn->vxlan_list, next) {
3504         if (tmp == vxlan)
3505             continue;
3506         if (tmp->cfg.flags & VXLAN_F_VNIFILTER) {
3507             if (!vxlan_vnifilter_lookup(tmp, vni))
3508                 continue;
3509         } else if (tmp->cfg.vni != vni) {
3510             continue;
3511         }
3512         if (tmp->cfg.dst_port != conf->dst_port)
3513             continue;
3514         if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
3515             (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
3516             continue;
3517 
3518         if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
3519             tmp->cfg.remote_ifindex != conf->remote_ifindex)
3520             continue;
3521 
3522         return -EEXIST;
3523     }
3524 
3525     return 0;
3526 }
3527 
3528 static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
3529                  struct net_device **lower,
3530                  struct vxlan_dev *old,
3531                  struct netlink_ext_ack *extack)
3532 {
3533     bool use_ipv6 = false;
3534 
3535     if (conf->flags & VXLAN_F_GPE) {
3536         /* For now, allow GPE only together with
3537          * COLLECT_METADATA. This can be relaxed later; in such
3538          * case, the other side of the PtP link will have to be
3539          * provided.
3540          */
3541         if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
3542             !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
3543             NL_SET_ERR_MSG(extack,
3544                        "VXLAN GPE does not support this combination of attributes");
3545             return -EINVAL;
3546         }
3547     }
3548 
3549     if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) {
3550         /* Unless IPv6 is explicitly requested, assume IPv4 */
3551         conf->remote_ip.sa.sa_family = AF_INET;
3552         conf->saddr.sa.sa_family = AF_INET;
3553     } else if (!conf->remote_ip.sa.sa_family) {
3554         conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family;
3555     } else if (!conf->saddr.sa.sa_family) {
3556         conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
3557     }
3558 
3559     if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) {
3560         NL_SET_ERR_MSG(extack,
3561                    "Local and remote address must be from the same family");
3562         return -EINVAL;
3563     }
3564 
3565     if (vxlan_addr_multicast(&conf->saddr)) {
3566         NL_SET_ERR_MSG(extack, "Local address cannot be multicast");
3567         return -EINVAL;
3568     }
3569 
3570     if (conf->saddr.sa.sa_family == AF_INET6) {
3571         if (!IS_ENABLED(CONFIG_IPV6)) {
3572             NL_SET_ERR_MSG(extack,
3573                        "IPv6 support not enabled in the kernel");
3574             return -EPFNOSUPPORT;
3575         }
3576         use_ipv6 = true;
3577         conf->flags |= VXLAN_F_IPV6;
3578 
3579         if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
3580             int local_type =
3581                 ipv6_addr_type(&conf->saddr.sin6.sin6_addr);
3582             int remote_type =
3583                 ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr);
3584 
3585             if (local_type & IPV6_ADDR_LINKLOCAL) {
3586                 if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
3587                     (remote_type != IPV6_ADDR_ANY)) {
3588                     NL_SET_ERR_MSG(extack,
3589                                "Invalid combination of local and remote address scopes");
3590                     return -EINVAL;
3591                 }
3592 
3593                 conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
3594             } else {
3595                 if (remote_type ==
3596                     (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) {
3597                     NL_SET_ERR_MSG(extack,
3598                                "Invalid combination of local and remote address scopes");
3599                     return -EINVAL;
3600                 }
3601 
3602                 conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
3603             }
3604         }
3605     }
3606 
3607     if (conf->label && !use_ipv6) {
3608         NL_SET_ERR_MSG(extack,
3609                    "Label attribute only applies to IPv6 VXLAN devices");
3610         return -EINVAL;
3611     }
3612 
3613     if (conf->remote_ifindex) {
3614         struct net_device *lowerdev;
3615 
3616         lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
3617         if (!lowerdev) {
3618             NL_SET_ERR_MSG(extack,
3619                        "Invalid local interface, device not found");
3620             return -ENODEV;
3621         }
3622 
3623 #if IS_ENABLED(CONFIG_IPV6)
3624         if (use_ipv6) {
3625             struct inet6_dev *idev = __in6_dev_get(lowerdev);
3626 
3627             if (idev && idev->cnf.disable_ipv6) {
3628                 NL_SET_ERR_MSG(extack,
3629                            "IPv6 support disabled by administrator");
3630                 return -EPERM;
3631             }
3632         }
3633 #endif
3634 
3635         *lower = lowerdev;
3636     } else {
3637         if (vxlan_addr_multicast(&conf->remote_ip)) {
3638             NL_SET_ERR_MSG(extack,
3639                        "Local interface required for multicast remote destination");
3640 
3641             return -EINVAL;
3642         }
3643 
3644 #if IS_ENABLED(CONFIG_IPV6)
3645         if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) {
3646             NL_SET_ERR_MSG(extack,
3647                        "Local interface required for link-local local/remote addresses");
3648             return -EINVAL;
3649         }
3650 #endif
3651 
3652         *lower = NULL;
3653     }
3654 
3655     if (!conf->dst_port) {
3656         if (conf->flags & VXLAN_F_GPE)
3657             conf->dst_port = htons(IANA_VXLAN_GPE_UDP_PORT);
3658         else
3659             conf->dst_port = htons(vxlan_port);
3660     }
3661 
3662     if (!conf->age_interval)
3663         conf->age_interval = FDB_AGE_DEFAULT;
3664 
3665     if (vxlan_vni_in_use(src_net, old, conf, conf->vni)) {
3666         NL_SET_ERR_MSG(extack,
3667                    "A VXLAN device with the specified VNI already exists");
3668         return -EEXIST;
3669     }
3670 
3671     return 0;
3672 }
3673 
3674 static void vxlan_config_apply(struct net_device *dev,
3675                    struct vxlan_config *conf,
3676                    struct net_device *lowerdev,
3677                    struct net *src_net,
3678                    bool changelink)
3679 {
3680     struct vxlan_dev *vxlan = netdev_priv(dev);
3681     struct vxlan_rdst *dst = &vxlan->default_dst;
3682     unsigned short needed_headroom = ETH_HLEN;
3683     bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
3684     int max_mtu = ETH_MAX_MTU;
3685 
3686     if (!changelink) {
3687         if (conf->flags & VXLAN_F_GPE)
3688             vxlan_raw_setup(dev);
3689         else
3690             vxlan_ether_setup(dev);
3691 
3692         if (conf->mtu)
3693             dev->mtu = conf->mtu;
3694 
3695         vxlan->net = src_net;
3696     }
3697 
3698     dst->remote_vni = conf->vni;
3699 
3700     memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
3701 
3702     if (lowerdev) {
3703         dst->remote_ifindex = conf->remote_ifindex;
3704 
3705         netif_inherit_tso_max(dev, lowerdev);
3706 
3707         needed_headroom = lowerdev->hard_header_len;
3708         needed_headroom += lowerdev->needed_headroom;
3709 
3710         dev->needed_tailroom = lowerdev->needed_tailroom;
3711 
3712         max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3713                        VXLAN_HEADROOM);
3714         if (max_mtu < ETH_MIN_MTU)
3715             max_mtu = ETH_MIN_MTU;
3716 
3717         if (!changelink && !conf->mtu)
3718             dev->mtu = max_mtu;
3719     }
3720 
3721     if (dev->mtu > max_mtu)
3722         dev->mtu = max_mtu;
3723 
3724     if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
3725         needed_headroom += VXLAN6_HEADROOM;
3726     else
3727         needed_headroom += VXLAN_HEADROOM;
3728     dev->needed_headroom = needed_headroom;
3729 
3730     memcpy(&vxlan->cfg, conf, sizeof(*conf));
3731 }
3732 
3733 static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
3734                    struct vxlan_config *conf, bool changelink,
3735                    struct netlink_ext_ack *extack)
3736 {
3737     struct vxlan_dev *vxlan = netdev_priv(dev);
3738     struct net_device *lowerdev;
3739     int ret;
3740 
3741     ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan, extack);
3742     if (ret)
3743         return ret;
3744 
3745     vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
3746 
3747     return 0;
3748 }
3749 
3750 static int __vxlan_dev_create(struct net *net, struct net_device *dev,
3751                   struct vxlan_config *conf,
3752                   struct netlink_ext_ack *extack)
3753 {
3754     struct vxlan_net *vn = net_generic(net, vxlan_net_id);
3755     struct vxlan_dev *vxlan = netdev_priv(dev);
3756     struct net_device *remote_dev = NULL;
3757     struct vxlan_fdb *f = NULL;
3758     bool unregister = false;
3759     struct vxlan_rdst *dst;
3760     int err;
3761 
3762     dst = &vxlan->default_dst;
3763     err = vxlan_dev_configure(net, dev, conf, false, extack);
3764     if (err)
3765         return err;
3766 
3767     dev->ethtool_ops = &vxlan_ethtool_ops;
3768 
3769     /* create an fdb entry for a valid default destination */
3770     if (!vxlan_addr_any(&dst->remote_ip)) {
3771         err = vxlan_fdb_create(vxlan, all_zeros_mac,
3772                        &dst->remote_ip,
3773                        NUD_REACHABLE | NUD_PERMANENT,
3774                        vxlan->cfg.dst_port,
3775                        dst->remote_vni,
3776                        dst->remote_vni,
3777                        dst->remote_ifindex,
3778                        NTF_SELF, 0, &f, extack);
3779         if (err)
3780             return err;
3781     }
3782 
3783     err = register_netdevice(dev);
3784     if (err)
3785         goto errout;
3786     unregister = true;
3787 
3788     if (dst->remote_ifindex) {
3789         remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
3790         if (!remote_dev) {
3791             err = -ENODEV;
3792             goto errout;
3793         }
3794 
3795         err = netdev_upper_dev_link(remote_dev, dev, extack);
3796         if (err)
3797             goto errout;
3798     }
3799 
3800     err = rtnl_configure_link(dev, NULL);
3801     if (err < 0)
3802         goto unlink;
3803 
3804     if (f) {
3805         vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
3806 
3807         /* notify default fdb entry */
3808         err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
3809                        RTM_NEWNEIGH, true, extack);
3810         if (err) {
3811             vxlan_fdb_destroy(vxlan, f, false, false);
3812             if (remote_dev)
3813                 netdev_upper_dev_unlink(remote_dev, dev);
3814             goto unregister;
3815         }
3816     }
3817 
3818     list_add(&vxlan->next, &vn->vxlan_list);
3819     if (remote_dev)
3820         dst->remote_dev = remote_dev;
3821     return 0;
3822 unlink:
3823     if (remote_dev)
3824         netdev_upper_dev_unlink(remote_dev, dev);
3825 errout:
3826     /* unregister_netdevice() destroys the default FDB entry with deletion
3827      * notification. But the addition notification was not sent yet, so
3828      * destroy the entry by hand here.
3829      */
3830     if (f)
3831         __vxlan_fdb_free(f);
3832 unregister:
3833     if (unregister)
3834         unregister_netdevice(dev);
3835     return err;
3836 }
3837 
3838 /* Set/clear flags based on attribute */
3839 static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
3840               int attrtype, unsigned long mask, bool changelink,
3841               bool changelink_supported,
3842               struct netlink_ext_ack *extack)
3843 {
3844     unsigned long flags;
3845 
3846     if (!tb[attrtype])
3847         return 0;
3848 
3849     if (changelink && !changelink_supported) {
3850         vxlan_flag_attr_error(attrtype, extack);
3851         return -EOPNOTSUPP;
3852     }
3853 
3854     if (vxlan_policy[attrtype].type == NLA_FLAG)
3855         flags = conf->flags | mask;
3856     else if (nla_get_u8(tb[attrtype]))
3857         flags = conf->flags | mask;
3858     else
3859         flags = conf->flags & ~mask;
3860 
3861     conf->flags = flags;
3862 
3863     return 0;
3864 }
3865 
3866 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
3867              struct net_device *dev, struct vxlan_config *conf,
3868              bool changelink, struct netlink_ext_ack *extack)
3869 {
3870     struct vxlan_dev *vxlan = netdev_priv(dev);
3871     int err = 0;
3872 
3873     memset(conf, 0, sizeof(*conf));
3874 
3875     /* if changelink operation, start with old existing cfg */
3876     if (changelink)
3877         memcpy(conf, &vxlan->cfg, sizeof(*conf));
3878 
3879     if (data[IFLA_VXLAN_ID]) {
3880         __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
3881 
3882         if (changelink && (vni != conf->vni)) {
3883             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
3884             return -EOPNOTSUPP;
3885         }
3886         conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
3887     }
3888 
3889     if (data[IFLA_VXLAN_GROUP]) {
3890         if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
3891             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
3892             return -EOPNOTSUPP;
3893         }
3894 
3895         conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
3896         conf->remote_ip.sa.sa_family = AF_INET;
3897     } else if (data[IFLA_VXLAN_GROUP6]) {
3898         if (!IS_ENABLED(CONFIG_IPV6)) {
3899             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
3900             return -EPFNOSUPPORT;
3901         }
3902 
3903         if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
3904             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
3905             return -EOPNOTSUPP;
3906         }
3907 
3908         conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
3909         conf->remote_ip.sa.sa_family = AF_INET6;
3910     }
3911 
3912     if (data[IFLA_VXLAN_LOCAL]) {
3913         if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
3914             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
3915             return -EOPNOTSUPP;
3916         }
3917 
3918         conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
3919         conf->saddr.sa.sa_family = AF_INET;
3920     } else if (data[IFLA_VXLAN_LOCAL6]) {
3921         if (!IS_ENABLED(CONFIG_IPV6)) {
3922             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
3923             return -EPFNOSUPPORT;
3924         }
3925 
3926         if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
3927             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
3928             return -EOPNOTSUPP;
3929         }
3930 
3931         /* TODO: respect scope id */
3932         conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
3933         conf->saddr.sa.sa_family = AF_INET6;
3934     }
3935 
3936     if (data[IFLA_VXLAN_LINK])
3937         conf->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
3938 
3939     if (data[IFLA_VXLAN_TOS])
3940         conf->tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
3941 
3942     if (data[IFLA_VXLAN_TTL])
3943         conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
3944 
3945     if (data[IFLA_VXLAN_TTL_INHERIT]) {
3946         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
3947                     VXLAN_F_TTL_INHERIT, changelink, false,
3948                     extack);
3949         if (err)
3950             return err;
3951 
3952     }
3953 
3954     if (data[IFLA_VXLAN_LABEL])
3955         conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
3956                  IPV6_FLOWLABEL_MASK;
3957 
3958     if (data[IFLA_VXLAN_LEARNING]) {
3959         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
3960                     VXLAN_F_LEARN, changelink, true,
3961                     extack);
3962         if (err)
3963             return err;
3964     } else if (!changelink) {
3965         /* default to learn on a new device */
3966         conf->flags |= VXLAN_F_LEARN;
3967     }
3968 
3969     if (data[IFLA_VXLAN_AGEING])
3970         conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
3971 
3972     if (data[IFLA_VXLAN_PROXY]) {
3973         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
3974                     VXLAN_F_PROXY, changelink, false,
3975                     extack);
3976         if (err)
3977             return err;
3978     }
3979 
3980     if (data[IFLA_VXLAN_RSC]) {
3981         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
3982                     VXLAN_F_RSC, changelink, false,
3983                     extack);
3984         if (err)
3985             return err;
3986     }
3987 
3988     if (data[IFLA_VXLAN_L2MISS]) {
3989         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
3990                     VXLAN_F_L2MISS, changelink, false,
3991                     extack);
3992         if (err)
3993             return err;
3994     }
3995 
3996     if (data[IFLA_VXLAN_L3MISS]) {
3997         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
3998                     VXLAN_F_L3MISS, changelink, false,
3999                     extack);
4000         if (err)
4001             return err;
4002     }
4003 
4004     if (data[IFLA_VXLAN_LIMIT]) {
4005         if (changelink) {
4006             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
4007                         "Cannot change limit");
4008             return -EOPNOTSUPP;
4009         }
4010         conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
4011     }
4012 
4013     if (data[IFLA_VXLAN_COLLECT_METADATA]) {
4014         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
4015                     VXLAN_F_COLLECT_METADATA, changelink, false,
4016                     extack);
4017         if (err)
4018             return err;
4019     }
4020 
4021     if (data[IFLA_VXLAN_PORT_RANGE]) {
4022         if (!changelink) {
4023             const struct ifla_vxlan_port_range *p
4024                 = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
4025             conf->port_min = ntohs(p->low);
4026             conf->port_max = ntohs(p->high);
4027         } else {
4028             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
4029                         "Cannot change port range");
4030             return -EOPNOTSUPP;
4031         }
4032     }
4033 
4034     if (data[IFLA_VXLAN_PORT]) {
4035         if (changelink) {
4036             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
4037                         "Cannot change port");
4038             return -EOPNOTSUPP;
4039         }
4040         conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
4041     }
4042 
4043     if (data[IFLA_VXLAN_UDP_CSUM]) {
4044         if (changelink) {
4045             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
4046                         "Cannot change UDP_CSUM flag");
4047             return -EOPNOTSUPP;
4048         }
4049         if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
4050             conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
4051     }
4052 
4053     if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
4054         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
4055                     VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
4056                     false, extack);
4057         if (err)
4058             return err;
4059     }
4060 
4061     if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
4062         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
4063                     VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
4064                     false, extack);
4065         if (err)
4066             return err;
4067     }
4068 
4069     if (data[IFLA_VXLAN_REMCSUM_TX]) {
4070         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
4071                     VXLAN_F_REMCSUM_TX, changelink, false,
4072                     extack);
4073         if (err)
4074             return err;
4075     }
4076 
4077     if (data[IFLA_VXLAN_REMCSUM_RX]) {
4078         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
4079                     VXLAN_F_REMCSUM_RX, changelink, false,
4080                     extack);
4081         if (err)
4082             return err;
4083     }
4084 
4085     if (data[IFLA_VXLAN_GBP]) {
4086         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
4087                     VXLAN_F_GBP, changelink, false, extack);
4088         if (err)
4089             return err;
4090     }
4091 
4092     if (data[IFLA_VXLAN_GPE]) {
4093         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
4094                     VXLAN_F_GPE, changelink, false,
4095                     extack);
4096         if (err)
4097             return err;
4098     }
4099 
4100     if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
4101         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
4102                     VXLAN_F_REMCSUM_NOPARTIAL, changelink,
4103                     false, extack);
4104         if (err)
4105             return err;
4106     }
4107 
4108     if (tb[IFLA_MTU]) {
4109         if (changelink) {
4110             NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
4111                         "Cannot change mtu");
4112             return -EOPNOTSUPP;
4113         }
4114         conf->mtu = nla_get_u32(tb[IFLA_MTU]);
4115     }
4116 
4117     if (data[IFLA_VXLAN_DF])
4118         conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
4119 
4120     if (data[IFLA_VXLAN_VNIFILTER]) {
4121         err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER,
4122                     VXLAN_F_VNIFILTER, changelink, false,
4123                     extack);
4124         if (err)
4125             return err;
4126 
4127         if ((conf->flags & VXLAN_F_VNIFILTER) &&
4128             !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
4129             NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER],
4130                         "vxlan vnifilter only valid in collect metadata mode");
4131             return -EINVAL;
4132         }
4133     }
4134 
4135     return 0;
4136 }
4137 
4138 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
4139              struct nlattr *tb[], struct nlattr *data[],
4140              struct netlink_ext_ack *extack)
4141 {
4142     struct vxlan_config conf;
4143     int err;
4144 
4145     err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
4146     if (err)
4147         return err;
4148 
4149     return __vxlan_dev_create(src_net, dev, &conf, extack);
4150 }
4151 
4152 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
4153                 struct nlattr *data[],
4154                 struct netlink_ext_ack *extack)
4155 {
4156     struct vxlan_dev *vxlan = netdev_priv(dev);
4157     struct net_device *lowerdev;
4158     struct vxlan_config conf;
4159     struct vxlan_rdst *dst;
4160     int err;
4161 
4162     dst = &vxlan->default_dst;
4163     err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
4164     if (err)
4165         return err;
4166 
4167     err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
4168                     vxlan, extack);
4169     if (err)
4170         return err;
4171 
4172     if (dst->remote_dev == lowerdev)
4173         lowerdev = NULL;
4174 
4175     err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
4176                          extack);
4177     if (err)
4178         return err;
4179 
4180     /* handle default dst entry */
4181     if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
4182         u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
4183 
4184         spin_lock_bh(&vxlan->hash_lock[hash_index]);
4185         if (!vxlan_addr_any(&conf.remote_ip)) {
4186             err = vxlan_fdb_update(vxlan, all_zeros_mac,
4187                            &conf.remote_ip,
4188                            NUD_REACHABLE | NUD_PERMANENT,
4189                            NLM_F_APPEND | NLM_F_CREATE,
4190                            vxlan->cfg.dst_port,
4191                            conf.vni, conf.vni,
4192                            conf.remote_ifindex,
4193                            NTF_SELF, 0, true, extack);
4194             if (err) {
4195                 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4196                 netdev_adjacent_change_abort(dst->remote_dev,
4197                                  lowerdev, dev);
4198                 return err;
4199             }
4200         }
4201         if (!vxlan_addr_any(&dst->remote_ip))
4202             __vxlan_fdb_delete(vxlan, all_zeros_mac,
4203                        dst->remote_ip,
4204                        vxlan->cfg.dst_port,
4205                        dst->remote_vni,
4206                        dst->remote_vni,
4207                        dst->remote_ifindex,
4208                        true);
4209         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4210 
4211         /* If vni filtering device, also update fdb entries of
4212          * all vnis that were using default remote ip
4213          */
4214         if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
4215             err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip,
4216                              &conf.remote_ip, extack);
4217             if (err) {
4218                 netdev_adjacent_change_abort(dst->remote_dev,
4219                                  lowerdev, dev);
4220                 return err;
4221             }
4222         }
4223     }
4224 
4225     if (conf.age_interval != vxlan->cfg.age_interval)
4226         mod_timer(&vxlan->age_timer, jiffies);
4227 
4228     netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
4229     if (lowerdev && lowerdev != dst->remote_dev)
4230         dst->remote_dev = lowerdev;
4231     vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
4232     return 0;
4233 }
4234 
4235 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
4236 {
4237     struct vxlan_dev *vxlan = netdev_priv(dev);
4238 
4239     vxlan_flush(vxlan, true);
4240 
4241     list_del(&vxlan->next);
4242     unregister_netdevice_queue(dev, head);
4243     if (vxlan->default_dst.remote_dev)
4244         netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
4245 }
4246 
4247 static size_t vxlan_get_size(const struct net_device *dev)
4248 {
4249 
4250     return nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_ID */
4251         nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
4252         nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
4253         nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
4254         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
4255         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL_INHERIT */
4256         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
4257         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_DF */
4258         nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
4259         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
4260         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_PROXY */
4261         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_RSC */
4262         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L2MISS */
4263         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L3MISS */
4264         nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_COLLECT_METADATA */
4265         nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
4266         nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
4267         nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
4268         nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
4269         nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
4270         nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
4271         nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
4272         nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
4273         nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
4274         0;
4275 }
4276 
4277 static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
4278 {
4279     const struct vxlan_dev *vxlan = netdev_priv(dev);
4280     const struct vxlan_rdst *dst = &vxlan->default_dst;
4281     struct ifla_vxlan_port_range ports = {
4282         .low =  htons(vxlan->cfg.port_min),
4283         .high = htons(vxlan->cfg.port_max),
4284     };
4285 
4286     if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
4287         goto nla_put_failure;
4288 
4289     if (!vxlan_addr_any(&dst->remote_ip)) {
4290         if (dst->remote_ip.sa.sa_family == AF_INET) {
4291             if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
4292                         dst->remote_ip.sin.sin_addr.s_addr))
4293                 goto nla_put_failure;
4294 #if IS_ENABLED(CONFIG_IPV6)
4295         } else {
4296             if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
4297                          &dst->remote_ip.sin6.sin6_addr))
4298                 goto nla_put_failure;
4299 #endif
4300         }
4301     }
4302 
4303     if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
4304         goto nla_put_failure;
4305 
4306     if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
4307         if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
4308             if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
4309                         vxlan->cfg.saddr.sin.sin_addr.s_addr))
4310                 goto nla_put_failure;
4311 #if IS_ENABLED(CONFIG_IPV6)
4312         } else {
4313             if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
4314                          &vxlan->cfg.saddr.sin6.sin6_addr))
4315                 goto nla_put_failure;
4316 #endif
4317         }
4318     }
4319 
4320     if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
4321         nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
4322                !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
4323         nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
4324         nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
4325         nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
4326         nla_put_u8(skb, IFLA_VXLAN_LEARNING,
4327                !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
4328         nla_put_u8(skb, IFLA_VXLAN_PROXY,
4329                !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
4330         nla_put_u8(skb, IFLA_VXLAN_RSC,
4331                !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
4332         nla_put_u8(skb, IFLA_VXLAN_L2MISS,
4333                !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
4334         nla_put_u8(skb, IFLA_VXLAN_L3MISS,
4335                !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
4336         nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
4337                !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
4338         nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
4339         nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
4340         nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
4341         nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
4342                !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
4343         nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
4344                !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
4345         nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
4346                !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
4347         nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
4348                !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
4349         nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
4350                !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
4351         goto nla_put_failure;
4352 
4353     if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
4354         goto nla_put_failure;
4355 
4356     if (vxlan->cfg.flags & VXLAN_F_GBP &&
4357         nla_put_flag(skb, IFLA_VXLAN_GBP))
4358         goto nla_put_failure;
4359 
4360     if (vxlan->cfg.flags & VXLAN_F_GPE &&
4361         nla_put_flag(skb, IFLA_VXLAN_GPE))
4362         goto nla_put_failure;
4363 
4364     if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&
4365         nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
4366         goto nla_put_failure;
4367 
4368     if (vxlan->cfg.flags & VXLAN_F_VNIFILTER &&
4369         nla_put_u8(skb, IFLA_VXLAN_VNIFILTER,
4370                !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
4371         goto nla_put_failure;
4372 
4373     return 0;
4374 
4375 nla_put_failure:
4376     return -EMSGSIZE;
4377 }
4378 
4379 static struct net *vxlan_get_link_net(const struct net_device *dev)
4380 {
4381     struct vxlan_dev *vxlan = netdev_priv(dev);
4382 
4383     return vxlan->net;
4384 }
4385 
4386 static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
4387     .kind       = "vxlan",
4388     .maxtype    = IFLA_VXLAN_MAX,
4389     .policy     = vxlan_policy,
4390     .priv_size  = sizeof(struct vxlan_dev),
4391     .setup      = vxlan_setup,
4392     .validate   = vxlan_validate,
4393     .newlink    = vxlan_newlink,
4394     .changelink = vxlan_changelink,
4395     .dellink    = vxlan_dellink,
4396     .get_size   = vxlan_get_size,
4397     .fill_info  = vxlan_fill_info,
4398     .get_link_net   = vxlan_get_link_net,
4399 };
4400 
4401 struct net_device *vxlan_dev_create(struct net *net, const char *name,
4402                     u8 name_assign_type,
4403                     struct vxlan_config *conf)
4404 {
4405     struct nlattr *tb[IFLA_MAX + 1];
4406     struct net_device *dev;
4407     int err;
4408 
4409     memset(&tb, 0, sizeof(tb));
4410 
4411     dev = rtnl_create_link(net, name, name_assign_type,
4412                    &vxlan_link_ops, tb, NULL);
4413     if (IS_ERR(dev))
4414         return dev;
4415 
4416     err = __vxlan_dev_create(net, dev, conf, NULL);
4417     if (err < 0) {
4418         free_netdev(dev);
4419         return ERR_PTR(err);
4420     }
4421 
4422     err = rtnl_configure_link(dev, NULL);
4423     if (err < 0) {
4424         LIST_HEAD(list_kill);
4425 
4426         vxlan_dellink(dev, &list_kill);
4427         unregister_netdevice_many(&list_kill);
4428         return ERR_PTR(err);
4429     }
4430 
4431     return dev;
4432 }
4433 EXPORT_SYMBOL_GPL(vxlan_dev_create);
4434 
4435 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
4436                          struct net_device *dev)
4437 {
4438     struct vxlan_dev *vxlan, *next;
4439     LIST_HEAD(list_kill);
4440 
4441     list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
4442         struct vxlan_rdst *dst = &vxlan->default_dst;
4443 
4444         /* In case we created vxlan device with carrier
4445          * and we loose the carrier due to module unload
4446          * we also need to remove vxlan device. In other
4447          * cases, it's not necessary and remote_ifindex
4448          * is 0 here, so no matches.
4449          */
4450         if (dst->remote_ifindex == dev->ifindex)
4451             vxlan_dellink(vxlan->dev, &list_kill);
4452     }
4453 
4454     unregister_netdevice_many(&list_kill);
4455 }
4456 
4457 static int vxlan_netdevice_event(struct notifier_block *unused,
4458                  unsigned long event, void *ptr)
4459 {
4460     struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4461     struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
4462 
4463     if (event == NETDEV_UNREGISTER)
4464         vxlan_handle_lowerdev_unregister(vn, dev);
4465     else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
4466         vxlan_offload_rx_ports(dev, true);
4467     else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
4468         vxlan_offload_rx_ports(dev, false);
4469 
4470     return NOTIFY_DONE;
4471 }
4472 
4473 static struct notifier_block vxlan_notifier_block __read_mostly = {
4474     .notifier_call = vxlan_netdevice_event,
4475 };
4476 
4477 static void
4478 vxlan_fdb_offloaded_set(struct net_device *dev,
4479             struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4480 {
4481     struct vxlan_dev *vxlan = netdev_priv(dev);
4482     struct vxlan_rdst *rdst;
4483     struct vxlan_fdb *f;
4484     u32 hash_index;
4485 
4486     hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4487 
4488     spin_lock_bh(&vxlan->hash_lock[hash_index]);
4489 
4490     f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4491     if (!f)
4492         goto out;
4493 
4494     rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
4495                    fdb_info->remote_port,
4496                    fdb_info->remote_vni,
4497                    fdb_info->remote_ifindex);
4498     if (!rdst)
4499         goto out;
4500 
4501     rdst->offloaded = fdb_info->offloaded;
4502 
4503 out:
4504     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4505 }
4506 
4507 static int
4508 vxlan_fdb_external_learn_add(struct net_device *dev,
4509                  struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4510 {
4511     struct vxlan_dev *vxlan = netdev_priv(dev);
4512     struct netlink_ext_ack *extack;
4513     u32 hash_index;
4514     int err;
4515 
4516     hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4517     extack = switchdev_notifier_info_to_extack(&fdb_info->info);
4518 
4519     spin_lock_bh(&vxlan->hash_lock[hash_index]);
4520     err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
4521                    NUD_REACHABLE,
4522                    NLM_F_CREATE | NLM_F_REPLACE,
4523                    fdb_info->remote_port,
4524                    fdb_info->vni,
4525                    fdb_info->remote_vni,
4526                    fdb_info->remote_ifindex,
4527                    NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
4528                    0, false, extack);
4529     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4530 
4531     return err;
4532 }
4533 
4534 static int
4535 vxlan_fdb_external_learn_del(struct net_device *dev,
4536                  struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4537 {
4538     struct vxlan_dev *vxlan = netdev_priv(dev);
4539     struct vxlan_fdb *f;
4540     u32 hash_index;
4541     int err = 0;
4542 
4543     hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4544     spin_lock_bh(&vxlan->hash_lock[hash_index]);
4545 
4546     f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4547     if (!f)
4548         err = -ENOENT;
4549     else if (f->flags & NTF_EXT_LEARNED)
4550         err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
4551                      fdb_info->remote_ip,
4552                      fdb_info->remote_port,
4553                      fdb_info->vni,
4554                      fdb_info->remote_vni,
4555                      fdb_info->remote_ifindex,
4556                      false);
4557 
4558     spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4559 
4560     return err;
4561 }
4562 
4563 static int vxlan_switchdev_event(struct notifier_block *unused,
4564                  unsigned long event, void *ptr)
4565 {
4566     struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
4567     struct switchdev_notifier_vxlan_fdb_info *fdb_info;
4568     int err = 0;
4569 
4570     switch (event) {
4571     case SWITCHDEV_VXLAN_FDB_OFFLOADED:
4572         vxlan_fdb_offloaded_set(dev, ptr);
4573         break;
4574     case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
4575         fdb_info = ptr;
4576         err = vxlan_fdb_external_learn_add(dev, fdb_info);
4577         if (err) {
4578             err = notifier_from_errno(err);
4579             break;
4580         }
4581         fdb_info->offloaded = true;
4582         vxlan_fdb_offloaded_set(dev, fdb_info);
4583         break;
4584     case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
4585         fdb_info = ptr;
4586         err = vxlan_fdb_external_learn_del(dev, fdb_info);
4587         if (err) {
4588             err = notifier_from_errno(err);
4589             break;
4590         }
4591         fdb_info->offloaded = false;
4592         vxlan_fdb_offloaded_set(dev, fdb_info);
4593         break;
4594     }
4595 
4596     return err;
4597 }
4598 
4599 static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
4600     .notifier_call = vxlan_switchdev_event,
4601 };
4602 
4603 static void vxlan_fdb_nh_flush(struct nexthop *nh)
4604 {
4605     struct vxlan_fdb *fdb;
4606     struct vxlan_dev *vxlan;
4607     u32 hash_index;
4608 
4609     rcu_read_lock();
4610     list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
4611         vxlan = rcu_dereference(fdb->vdev);
4612         WARN_ON(!vxlan);
4613         hash_index = fdb_head_index(vxlan, fdb->eth_addr,
4614                         vxlan->default_dst.remote_vni);
4615         spin_lock_bh(&vxlan->hash_lock[hash_index]);
4616         if (!hlist_unhashed(&fdb->hlist))
4617             vxlan_fdb_destroy(vxlan, fdb, false, false);
4618         spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4619     }
4620     rcu_read_unlock();
4621 }
4622 
4623 static int vxlan_nexthop_event(struct notifier_block *nb,
4624                    unsigned long event, void *ptr)
4625 {
4626     struct nh_notifier_info *info = ptr;
4627     struct nexthop *nh;
4628 
4629     if (event != NEXTHOP_EVENT_DEL)
4630         return NOTIFY_DONE;
4631 
4632     nh = nexthop_find_by_id(info->net, info->id);
4633     if (!nh)
4634         return NOTIFY_DONE;
4635 
4636     vxlan_fdb_nh_flush(nh);
4637 
4638     return NOTIFY_DONE;
4639 }
4640 
4641 static __net_init int vxlan_init_net(struct net *net)
4642 {
4643     struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4644     unsigned int h;
4645 
4646     INIT_LIST_HEAD(&vn->vxlan_list);
4647     spin_lock_init(&vn->sock_lock);
4648     vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event;
4649 
4650     for (h = 0; h < PORT_HASH_SIZE; ++h)
4651         INIT_HLIST_HEAD(&vn->sock_list[h]);
4652 
4653     return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
4654                      NULL);
4655 }
4656 
4657 static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
4658 {
4659     struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4660     struct vxlan_dev *vxlan, *next;
4661     struct net_device *dev, *aux;
4662 
4663     for_each_netdev_safe(net, dev, aux)
4664         if (dev->rtnl_link_ops == &vxlan_link_ops)
4665             unregister_netdevice_queue(dev, head);
4666 
4667     list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
4668         /* If vxlan->dev is in the same netns, it has already been added
4669          * to the list by the previous loop.
4670          */
4671         if (!net_eq(dev_net(vxlan->dev), net))
4672             unregister_netdevice_queue(vxlan->dev, head);
4673     }
4674 
4675 }
4676 
4677 static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
4678 {
4679     struct net *net;
4680     LIST_HEAD(list);
4681     unsigned int h;
4682 
4683     list_for_each_entry(net, net_list, exit_list) {
4684         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4685 
4686         unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
4687     }
4688     rtnl_lock();
4689     list_for_each_entry(net, net_list, exit_list)
4690         vxlan_destroy_tunnels(net, &list);
4691 
4692     unregister_netdevice_many(&list);
4693     rtnl_unlock();
4694 
4695     list_for_each_entry(net, net_list, exit_list) {
4696         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
4697 
4698         for (h = 0; h < PORT_HASH_SIZE; ++h)
4699             WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
4700     }
4701 }
4702 
4703 static struct pernet_operations vxlan_net_ops = {
4704     .init = vxlan_init_net,
4705     .exit_batch = vxlan_exit_batch_net,
4706     .id   = &vxlan_net_id,
4707     .size = sizeof(struct vxlan_net),
4708 };
4709 
4710 static int __init vxlan_init_module(void)
4711 {
4712     int rc;
4713 
4714     get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
4715 
4716     rc = register_pernet_subsys(&vxlan_net_ops);
4717     if (rc)
4718         goto out1;
4719 
4720     rc = register_netdevice_notifier(&vxlan_notifier_block);
4721     if (rc)
4722         goto out2;
4723 
4724     rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
4725     if (rc)
4726         goto out3;
4727 
4728     rc = rtnl_link_register(&vxlan_link_ops);
4729     if (rc)
4730         goto out4;
4731 
4732     vxlan_vnifilter_init();
4733 
4734     return 0;
4735 out4:
4736     unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
4737 out3:
4738     unregister_netdevice_notifier(&vxlan_notifier_block);
4739 out2:
4740     unregister_pernet_subsys(&vxlan_net_ops);
4741 out1:
4742     return rc;
4743 }
4744 late_initcall(vxlan_init_module);
4745 
4746 static void __exit vxlan_cleanup_module(void)
4747 {
4748     vxlan_vnifilter_uninit();
4749     rtnl_link_unregister(&vxlan_link_ops);
4750     unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
4751     unregister_netdevice_notifier(&vxlan_notifier_block);
4752     unregister_pernet_subsys(&vxlan_net_ops);
4753     /* rcu_barrier() is called by netns */
4754 }
4755 module_exit(vxlan_cleanup_module);
4756 
4757 MODULE_LICENSE("GPL");
4758 MODULE_VERSION(VXLAN_VERSION);
4759 MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>");
4760 MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic");
4761 MODULE_ALIAS_RTNL_LINK("vxlan");