Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
0004  *  Linux INET6 implementation
0005  *
0006  *  Authors:
0007  *  Pedro Roque     <roque@di.fc.ul.pt>
0008  *  Alexey Kuznetsov    <kuznet@ms2.inr.ac.ru>
0009  *
0010  *  Changes:
0011  * Roger Venning <r.venning@telstra.com>:   6to4 support
0012  * Nate Thompson <nate@thebog.net>:     6to4 support
0013  * Fred Templin <fred.l.templin@boeing.com>:    isatap support
0014  */
0015 
0016 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0017 
0018 #include <linux/module.h>
0019 #include <linux/capability.h>
0020 #include <linux/errno.h>
0021 #include <linux/types.h>
0022 #include <linux/socket.h>
0023 #include <linux/sockios.h>
0024 #include <linux/net.h>
0025 #include <linux/in6.h>
0026 #include <linux/netdevice.h>
0027 #include <linux/if_arp.h>
0028 #include <linux/icmp.h>
0029 #include <linux/slab.h>
0030 #include <linux/uaccess.h>
0031 #include <linux/init.h>
0032 #include <linux/netfilter_ipv4.h>
0033 #include <linux/if_ether.h>
0034 
0035 #include <net/sock.h>
0036 #include <net/snmp.h>
0037 
0038 #include <net/ipv6.h>
0039 #include <net/protocol.h>
0040 #include <net/transp_v6.h>
0041 #include <net/ip6_fib.h>
0042 #include <net/ip6_route.h>
0043 #include <net/ndisc.h>
0044 #include <net/addrconf.h>
0045 #include <net/ip.h>
0046 #include <net/udp.h>
0047 #include <net/icmp.h>
0048 #include <net/ip_tunnels.h>
0049 #include <net/inet_ecn.h>
0050 #include <net/xfrm.h>
0051 #include <net/dsfield.h>
0052 #include <net/net_namespace.h>
0053 #include <net/netns/generic.h>
0054 
0055 /*
0056    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
0057 
0058    For comments look at net/ipv4/ip_gre.c --ANK
0059  */
0060 
0061 #define IP6_SIT_HASH_SIZE  16
0062 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
0063 
0064 static bool log_ecn_error = true;
0065 module_param(log_ecn_error, bool, 0644);
0066 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
0067 
0068 static int ipip6_tunnel_init(struct net_device *dev);
0069 static void ipip6_tunnel_setup(struct net_device *dev);
0070 static void ipip6_dev_free(struct net_device *dev);
0071 static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
0072               __be32 *v4dst);
0073 static struct rtnl_link_ops sit_link_ops __read_mostly;
0074 
0075 static unsigned int sit_net_id __read_mostly;
0076 struct sit_net {
0077     struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
0078     struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
0079     struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
0080     struct ip_tunnel __rcu *tunnels_wc[1];
0081     struct ip_tunnel __rcu **tunnels[4];
0082 
0083     struct net_device *fb_tunnel_dev;
0084 };
0085 
0086 static inline struct sit_net *dev_to_sit_net(struct net_device *dev)
0087 {
0088     struct ip_tunnel *t = netdev_priv(dev);
0089 
0090     return net_generic(t->net, sit_net_id);
0091 }
0092 
0093 /*
0094  * Must be invoked with rcu_read_lock
0095  */
0096 static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
0097                          struct net_device *dev,
0098                          __be32 remote, __be32 local,
0099                          int sifindex)
0100 {
0101     unsigned int h0 = HASH(remote);
0102     unsigned int h1 = HASH(local);
0103     struct ip_tunnel *t;
0104     struct sit_net *sitn = net_generic(net, sit_net_id);
0105     int ifindex = dev ? dev->ifindex : 0;
0106 
0107     for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
0108         if (local == t->parms.iph.saddr &&
0109             remote == t->parms.iph.daddr &&
0110             (!dev || !t->parms.link || ifindex == t->parms.link ||
0111              sifindex == t->parms.link) &&
0112             (t->dev->flags & IFF_UP))
0113             return t;
0114     }
0115     for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
0116         if (remote == t->parms.iph.daddr &&
0117             (!dev || !t->parms.link || ifindex == t->parms.link ||
0118              sifindex == t->parms.link) &&
0119             (t->dev->flags & IFF_UP))
0120             return t;
0121     }
0122     for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
0123         if (local == t->parms.iph.saddr &&
0124             (!dev || !t->parms.link || ifindex == t->parms.link ||
0125              sifindex == t->parms.link) &&
0126             (t->dev->flags & IFF_UP))
0127             return t;
0128     }
0129     t = rcu_dereference(sitn->tunnels_wc[0]);
0130     if (t && (t->dev->flags & IFF_UP))
0131         return t;
0132     return NULL;
0133 }
0134 
0135 static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
0136         struct ip_tunnel_parm *parms)
0137 {
0138     __be32 remote = parms->iph.daddr;
0139     __be32 local = parms->iph.saddr;
0140     unsigned int h = 0;
0141     int prio = 0;
0142 
0143     if (remote) {
0144         prio |= 2;
0145         h ^= HASH(remote);
0146     }
0147     if (local) {
0148         prio |= 1;
0149         h ^= HASH(local);
0150     }
0151     return &sitn->tunnels[prio][h];
0152 }
0153 
0154 static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
0155         struct ip_tunnel *t)
0156 {
0157     return __ipip6_bucket(sitn, &t->parms);
0158 }
0159 
0160 static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
0161 {
0162     struct ip_tunnel __rcu **tp;
0163     struct ip_tunnel *iter;
0164 
0165     for (tp = ipip6_bucket(sitn, t);
0166          (iter = rtnl_dereference(*tp)) != NULL;
0167          tp = &iter->next) {
0168         if (t == iter) {
0169             rcu_assign_pointer(*tp, t->next);
0170             break;
0171         }
0172     }
0173 }
0174 
0175 static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
0176 {
0177     struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
0178 
0179     rcu_assign_pointer(t->next, rtnl_dereference(*tp));
0180     rcu_assign_pointer(*tp, t);
0181 }
0182 
0183 static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
0184 {
0185 #ifdef CONFIG_IPV6_SIT_6RD
0186     struct ip_tunnel *t = netdev_priv(dev);
0187 
0188     if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
0189         ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
0190         t->ip6rd.relay_prefix = 0;
0191         t->ip6rd.prefixlen = 16;
0192         t->ip6rd.relay_prefixlen = 0;
0193     } else {
0194         struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
0195         memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
0196     }
0197 #endif
0198 }
0199 
0200 static int ipip6_tunnel_create(struct net_device *dev)
0201 {
0202     struct ip_tunnel *t = netdev_priv(dev);
0203     struct net *net = dev_net(dev);
0204     struct sit_net *sitn = net_generic(net, sit_net_id);
0205     int err;
0206 
0207     __dev_addr_set(dev, &t->parms.iph.saddr, 4);
0208     memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
0209 
0210     if ((__force u16)t->parms.i_flags & SIT_ISATAP)
0211         dev->priv_flags |= IFF_ISATAP;
0212 
0213     dev->rtnl_link_ops = &sit_link_ops;
0214 
0215     err = register_netdevice(dev);
0216     if (err < 0)
0217         goto out;
0218 
0219     ipip6_tunnel_clone_6rd(dev, sitn);
0220 
0221     ipip6_tunnel_link(sitn, t);
0222     return 0;
0223 
0224 out:
0225     return err;
0226 }
0227 
0228 static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
0229         struct ip_tunnel_parm *parms, int create)
0230 {
0231     __be32 remote = parms->iph.daddr;
0232     __be32 local = parms->iph.saddr;
0233     struct ip_tunnel *t, *nt;
0234     struct ip_tunnel __rcu **tp;
0235     struct net_device *dev;
0236     char name[IFNAMSIZ];
0237     struct sit_net *sitn = net_generic(net, sit_net_id);
0238 
0239     for (tp = __ipip6_bucket(sitn, parms);
0240         (t = rtnl_dereference(*tp)) != NULL;
0241          tp = &t->next) {
0242         if (local == t->parms.iph.saddr &&
0243             remote == t->parms.iph.daddr &&
0244             parms->link == t->parms.link) {
0245             if (create)
0246                 return NULL;
0247             else
0248                 return t;
0249         }
0250     }
0251     if (!create)
0252         goto failed;
0253 
0254     if (parms->name[0]) {
0255         if (!dev_valid_name(parms->name))
0256             goto failed;
0257         strlcpy(name, parms->name, IFNAMSIZ);
0258     } else {
0259         strcpy(name, "sit%d");
0260     }
0261     dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
0262                ipip6_tunnel_setup);
0263     if (!dev)
0264         return NULL;
0265 
0266     dev_net_set(dev, net);
0267 
0268     nt = netdev_priv(dev);
0269 
0270     nt->parms = *parms;
0271     if (ipip6_tunnel_create(dev) < 0)
0272         goto failed_free;
0273 
0274     if (!parms->name[0])
0275         strcpy(parms->name, dev->name);
0276 
0277     return nt;
0278 
0279 failed_free:
0280     free_netdev(dev);
0281 failed:
0282     return NULL;
0283 }
0284 
0285 #define for_each_prl_rcu(start)         \
0286     for (prl = rcu_dereference(start);  \
0287          prl;               \
0288          prl = rcu_dereference(prl->next))
0289 
0290 static struct ip_tunnel_prl_entry *
0291 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
0292 {
0293     struct ip_tunnel_prl_entry *prl;
0294 
0295     for_each_prl_rcu(t->prl)
0296         if (prl->addr == addr)
0297             break;
0298     return prl;
0299 
0300 }
0301 
0302 static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
0303 {
0304     struct ip_tunnel *t = netdev_priv(dev);
0305     struct ip_tunnel_prl kprl, *kp;
0306     struct ip_tunnel_prl_entry *prl;
0307     unsigned int cmax, c = 0, ca, len;
0308     int ret = 0;
0309 
0310     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
0311         return -EINVAL;
0312 
0313     if (copy_from_user(&kprl, a, sizeof(kprl)))
0314         return -EFAULT;
0315     cmax = kprl.datalen / sizeof(kprl);
0316     if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
0317         cmax = 1;
0318 
0319     /* For simple GET or for root users,
0320      * we try harder to allocate.
0321      */
0322     kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
0323         kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
0324         NULL;
0325 
0326     ca = min(t->prl_count, cmax);
0327 
0328     if (!kp) {
0329         /* We don't try hard to allocate much memory for
0330          * non-root users.
0331          * For root users, retry allocating enough memory for
0332          * the answer.
0333          */
0334         kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
0335                           __GFP_NOWARN);
0336         if (!kp) {
0337             ret = -ENOMEM;
0338             goto out;
0339         }
0340     }
0341 
0342     rcu_read_lock();
0343     for_each_prl_rcu(t->prl) {
0344         if (c >= cmax)
0345             break;
0346         if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
0347             continue;
0348         kp[c].addr = prl->addr;
0349         kp[c].flags = prl->flags;
0350         c++;
0351         if (kprl.addr != htonl(INADDR_ANY))
0352             break;
0353     }
0354 
0355     rcu_read_unlock();
0356 
0357     len = sizeof(*kp) * c;
0358     ret = 0;
0359     if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
0360         ret = -EFAULT;
0361 
0362     kfree(kp);
0363 out:
0364     return ret;
0365 }
0366 
0367 static int
0368 ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
0369 {
0370     struct ip_tunnel_prl_entry *p;
0371     int err = 0;
0372 
0373     if (a->addr == htonl(INADDR_ANY))
0374         return -EINVAL;
0375 
0376     ASSERT_RTNL();
0377 
0378     for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
0379         if (p->addr == a->addr) {
0380             if (chg) {
0381                 p->flags = a->flags;
0382                 goto out;
0383             }
0384             err = -EEXIST;
0385             goto out;
0386         }
0387     }
0388 
0389     if (chg) {
0390         err = -ENXIO;
0391         goto out;
0392     }
0393 
0394     p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
0395     if (!p) {
0396         err = -ENOBUFS;
0397         goto out;
0398     }
0399 
0400     p->next = t->prl;
0401     p->addr = a->addr;
0402     p->flags = a->flags;
0403     t->prl_count++;
0404     rcu_assign_pointer(t->prl, p);
0405 out:
0406     return err;
0407 }
0408 
0409 static void prl_list_destroy_rcu(struct rcu_head *head)
0410 {
0411     struct ip_tunnel_prl_entry *p, *n;
0412 
0413     p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
0414     do {
0415         n = rcu_dereference_protected(p->next, 1);
0416         kfree(p);
0417         p = n;
0418     } while (p);
0419 }
0420 
0421 static int
0422 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
0423 {
0424     struct ip_tunnel_prl_entry *x;
0425     struct ip_tunnel_prl_entry __rcu **p;
0426     int err = 0;
0427 
0428     ASSERT_RTNL();
0429 
0430     if (a && a->addr != htonl(INADDR_ANY)) {
0431         for (p = &t->prl;
0432              (x = rtnl_dereference(*p)) != NULL;
0433              p = &x->next) {
0434             if (x->addr == a->addr) {
0435                 *p = x->next;
0436                 kfree_rcu(x, rcu_head);
0437                 t->prl_count--;
0438                 goto out;
0439             }
0440         }
0441         err = -ENXIO;
0442     } else {
0443         x = rtnl_dereference(t->prl);
0444         if (x) {
0445             t->prl_count = 0;
0446             call_rcu(&x->rcu_head, prl_list_destroy_rcu);
0447             t->prl = NULL;
0448         }
0449     }
0450 out:
0451     return err;
0452 }
0453 
0454 static int ipip6_tunnel_prl_ctl(struct net_device *dev,
0455                 struct ip_tunnel_prl __user *data, int cmd)
0456 {
0457     struct ip_tunnel *t = netdev_priv(dev);
0458     struct ip_tunnel_prl prl;
0459     int err;
0460 
0461     if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
0462         return -EPERM;
0463     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
0464         return -EINVAL;
0465 
0466     if (copy_from_user(&prl, data, sizeof(prl)))
0467         return -EFAULT;
0468 
0469     switch (cmd) {
0470     case SIOCDELPRL:
0471         err = ipip6_tunnel_del_prl(t, &prl);
0472         break;
0473     case SIOCADDPRL:
0474     case SIOCCHGPRL:
0475         err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
0476         break;
0477     }
0478     dst_cache_reset(&t->dst_cache);
0479     netdev_state_change(dev);
0480     return err;
0481 }
0482 
0483 static int
0484 isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
0485 {
0486     struct ip_tunnel_prl_entry *p;
0487     int ok = 1;
0488 
0489     rcu_read_lock();
0490     p = __ipip6_tunnel_locate_prl(t, iph->saddr);
0491     if (p) {
0492         if (p->flags & PRL_DEFAULT)
0493             skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
0494         else
0495             skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
0496     } else {
0497         const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
0498 
0499         if (ipv6_addr_is_isatap(addr6) &&
0500             (addr6->s6_addr32[3] == iph->saddr) &&
0501             ipv6_chk_prefix(addr6, t->dev))
0502             skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
0503         else
0504             ok = 0;
0505     }
0506     rcu_read_unlock();
0507     return ok;
0508 }
0509 
0510 static void ipip6_tunnel_uninit(struct net_device *dev)
0511 {
0512     struct ip_tunnel *tunnel = netdev_priv(dev);
0513     struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
0514 
0515     if (dev == sitn->fb_tunnel_dev) {
0516         RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
0517     } else {
0518         ipip6_tunnel_unlink(sitn, tunnel);
0519         ipip6_tunnel_del_prl(tunnel, NULL);
0520     }
0521     dst_cache_reset(&tunnel->dst_cache);
0522     netdev_put(dev, &tunnel->dev_tracker);
0523 }
0524 
0525 static int ipip6_err(struct sk_buff *skb, u32 info)
0526 {
0527     const struct iphdr *iph = (const struct iphdr *)skb->data;
0528     const int type = icmp_hdr(skb)->type;
0529     const int code = icmp_hdr(skb)->code;
0530     unsigned int data_len = 0;
0531     struct ip_tunnel *t;
0532     int sifindex;
0533     int err;
0534 
0535     switch (type) {
0536     default:
0537     case ICMP_PARAMETERPROB:
0538         return 0;
0539 
0540     case ICMP_DEST_UNREACH:
0541         switch (code) {
0542         case ICMP_SR_FAILED:
0543             /* Impossible event. */
0544             return 0;
0545         default:
0546             /* All others are translated to HOST_UNREACH.
0547                rfc2003 contains "deep thoughts" about NET_UNREACH,
0548                I believe they are just ether pollution. --ANK
0549              */
0550             break;
0551         }
0552         break;
0553     case ICMP_TIME_EXCEEDED:
0554         if (code != ICMP_EXC_TTL)
0555             return 0;
0556         data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
0557         break;
0558     case ICMP_REDIRECT:
0559         break;
0560     }
0561 
0562     err = -ENOENT;
0563 
0564     sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
0565     t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
0566                 iph->daddr, iph->saddr, sifindex);
0567     if (!t)
0568         goto out;
0569 
0570     if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
0571         ipv4_update_pmtu(skb, dev_net(skb->dev), info,
0572                  t->parms.link, iph->protocol);
0573         err = 0;
0574         goto out;
0575     }
0576     if (type == ICMP_REDIRECT) {
0577         ipv4_redirect(skb, dev_net(skb->dev), t->parms.link,
0578                   iph->protocol);
0579         err = 0;
0580         goto out;
0581     }
0582 
0583     err = 0;
0584     if (__in6_dev_get(skb->dev) &&
0585         !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
0586         goto out;
0587 
0588     if (t->parms.iph.daddr == 0)
0589         goto out;
0590 
0591     if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
0592         goto out;
0593 
0594     if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
0595         t->err_count++;
0596     else
0597         t->err_count = 1;
0598     t->err_time = jiffies;
0599 out:
0600     return err;
0601 }
0602 
0603 static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
0604                   const struct in6_addr *v6addr)
0605 {
0606     __be32 v4embed = 0;
0607     if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
0608         return true;
0609     return false;
0610 }
0611 
0612 /* Checks if an address matches an address on the tunnel interface.
0613  * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
0614  * Long story:
0615  * This function is called after we considered the packet as spoofed
0616  * in is_spoofed_6rd.
0617  * We may have a router that is doing NAT for proto 41 packets
0618  * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
0619  * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
0620  * function will return true, dropping the packet.
0621  * But, we can still check if is spoofed against the IP
0622  * addresses associated with the interface.
0623  */
0624 static bool only_dnatted(const struct ip_tunnel *tunnel,
0625     const struct in6_addr *v6dst)
0626 {
0627     int prefix_len;
0628 
0629 #ifdef CONFIG_IPV6_SIT_6RD
0630     prefix_len = tunnel->ip6rd.prefixlen + 32
0631         - tunnel->ip6rd.relay_prefixlen;
0632 #else
0633     prefix_len = 48;
0634 #endif
0635     return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
0636 }
0637 
0638 /* Returns true if a packet is spoofed */
0639 static bool packet_is_spoofed(struct sk_buff *skb,
0640                   const struct iphdr *iph,
0641                   struct ip_tunnel *tunnel)
0642 {
0643     const struct ipv6hdr *ipv6h;
0644 
0645     if (tunnel->dev->priv_flags & IFF_ISATAP) {
0646         if (!isatap_chksrc(skb, iph, tunnel))
0647             return true;
0648 
0649         return false;
0650     }
0651 
0652     if (tunnel->dev->flags & IFF_POINTOPOINT)
0653         return false;
0654 
0655     ipv6h = ipv6_hdr(skb);
0656 
0657     if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
0658         net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
0659                      &iph->saddr, &ipv6h->saddr,
0660                      &iph->daddr, &ipv6h->daddr);
0661         return true;
0662     }
0663 
0664     if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
0665         return false;
0666 
0667     if (only_dnatted(tunnel, &ipv6h->daddr))
0668         return false;
0669 
0670     net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
0671                  &iph->saddr, &ipv6h->saddr,
0672                  &iph->daddr, &ipv6h->daddr);
0673     return true;
0674 }
0675 
0676 static int ipip6_rcv(struct sk_buff *skb)
0677 {
0678     const struct iphdr *iph = ip_hdr(skb);
0679     struct ip_tunnel *tunnel;
0680     int sifindex;
0681     int err;
0682 
0683     sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
0684     tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
0685                      iph->saddr, iph->daddr, sifindex);
0686     if (tunnel) {
0687         if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
0688             tunnel->parms.iph.protocol != 0)
0689             goto out;
0690 
0691         skb->mac_header = skb->network_header;
0692         skb_reset_network_header(skb);
0693         IPCB(skb)->flags = 0;
0694         skb->dev = tunnel->dev;
0695 
0696         if (packet_is_spoofed(skb, iph, tunnel)) {
0697             tunnel->dev->stats.rx_errors++;
0698             goto out;
0699         }
0700 
0701         if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
0702             !net_eq(tunnel->net, dev_net(tunnel->dev))))
0703             goto out;
0704 
0705         /* skb can be uncloned in iptunnel_pull_header, so
0706          * old iph is no longer valid
0707          */
0708         iph = (const struct iphdr *)skb_mac_header(skb);
0709         skb_reset_mac_header(skb);
0710 
0711         err = IP_ECN_decapsulate(iph, skb);
0712         if (unlikely(err)) {
0713             if (log_ecn_error)
0714                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
0715                              &iph->saddr, iph->tos);
0716             if (err > 1) {
0717                 ++tunnel->dev->stats.rx_frame_errors;
0718                 ++tunnel->dev->stats.rx_errors;
0719                 goto out;
0720             }
0721         }
0722 
0723         dev_sw_netstats_rx_add(tunnel->dev, skb->len);
0724 
0725         netif_rx(skb);
0726 
0727         return 0;
0728     }
0729 
0730     /* no tunnel matched,  let upstream know, ipsec may handle it */
0731     return 1;
0732 out:
0733     kfree_skb(skb);
0734     return 0;
0735 }
0736 
0737 static const struct tnl_ptk_info ipip_tpi = {
0738     /* no tunnel info required for ipip. */
0739     .proto = htons(ETH_P_IP),
0740 };
0741 
0742 #if IS_ENABLED(CONFIG_MPLS)
0743 static const struct tnl_ptk_info mplsip_tpi = {
0744     /* no tunnel info required for mplsip. */
0745     .proto = htons(ETH_P_MPLS_UC),
0746 };
0747 #endif
0748 
0749 static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
0750 {
0751     const struct iphdr *iph;
0752     struct ip_tunnel *tunnel;
0753     int sifindex;
0754 
0755     sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
0756 
0757     iph = ip_hdr(skb);
0758     tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
0759                      iph->saddr, iph->daddr, sifindex);
0760     if (tunnel) {
0761         const struct tnl_ptk_info *tpi;
0762 
0763         if (tunnel->parms.iph.protocol != ipproto &&
0764             tunnel->parms.iph.protocol != 0)
0765             goto drop;
0766 
0767         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
0768             goto drop;
0769 #if IS_ENABLED(CONFIG_MPLS)
0770         if (ipproto == IPPROTO_MPLS)
0771             tpi = &mplsip_tpi;
0772         else
0773 #endif
0774             tpi = &ipip_tpi;
0775         if (iptunnel_pull_header(skb, 0, tpi->proto, false))
0776             goto drop;
0777         skb_reset_mac_header(skb);
0778 
0779         return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
0780     }
0781 
0782     return 1;
0783 
0784 drop:
0785     kfree_skb(skb);
0786     return 0;
0787 }
0788 
0789 static int ipip_rcv(struct sk_buff *skb)
0790 {
0791     return sit_tunnel_rcv(skb, IPPROTO_IPIP);
0792 }
0793 
0794 #if IS_ENABLED(CONFIG_MPLS)
0795 static int mplsip_rcv(struct sk_buff *skb)
0796 {
0797     return sit_tunnel_rcv(skb, IPPROTO_MPLS);
0798 }
0799 #endif
0800 
0801 /*
0802  * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
0803  * stores the embedded IPv4 address in v4dst and returns true.
0804  */
0805 static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
0806               __be32 *v4dst)
0807 {
0808 #ifdef CONFIG_IPV6_SIT_6RD
0809     if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
0810                   tunnel->ip6rd.prefixlen)) {
0811         unsigned int pbw0, pbi0;
0812         int pbi1;
0813         u32 d;
0814 
0815         pbw0 = tunnel->ip6rd.prefixlen >> 5;
0816         pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
0817 
0818         d = tunnel->ip6rd.relay_prefixlen < 32 ?
0819             (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
0820             tunnel->ip6rd.relay_prefixlen : 0;
0821 
0822         pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
0823         if (pbi1 > 0)
0824             d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
0825                  (32 - pbi1);
0826 
0827         *v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
0828         return true;
0829     }
0830 #else
0831     if (v6dst->s6_addr16[0] == htons(0x2002)) {
0832         /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
0833         memcpy(v4dst, &v6dst->s6_addr16[1], 4);
0834         return true;
0835     }
0836 #endif
0837     return false;
0838 }
0839 
0840 static inline __be32 try_6rd(struct ip_tunnel *tunnel,
0841                  const struct in6_addr *v6dst)
0842 {
0843     __be32 dst = 0;
0844     check_6rd(tunnel, v6dst, &dst);
0845     return dst;
0846 }
0847 
0848 /*
0849  *  This function assumes it is being called from dev_queue_xmit()
0850  *  and that skb is filled properly by that function.
0851  */
0852 
0853 static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
0854                      struct net_device *dev)
0855 {
0856     struct ip_tunnel *tunnel = netdev_priv(dev);
0857     const struct iphdr  *tiph = &tunnel->parms.iph;
0858     const struct ipv6hdr *iph6 = ipv6_hdr(skb);
0859     u8     tos = tunnel->parms.iph.tos;
0860     __be16 df = tiph->frag_off;
0861     struct rtable *rt;      /* Route to the other host */
0862     struct net_device *tdev;    /* Device to other host */
0863     unsigned int max_headroom;  /* The extra header space needed */
0864     __be32 dst = tiph->daddr;
0865     struct flowi4 fl4;
0866     int    mtu;
0867     const struct in6_addr *addr6;
0868     int addr_type;
0869     u8 ttl;
0870     u8 protocol = IPPROTO_IPV6;
0871     int t_hlen = tunnel->hlen + sizeof(struct iphdr);
0872 
0873     if (tos == 1)
0874         tos = ipv6_get_dsfield(iph6);
0875 
0876     /* ISATAP (RFC4214) - must come before 6to4 */
0877     if (dev->priv_flags & IFF_ISATAP) {
0878         struct neighbour *neigh = NULL;
0879         bool do_tx_error = false;
0880 
0881         if (skb_dst(skb))
0882             neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
0883 
0884         if (!neigh) {
0885             net_dbg_ratelimited("nexthop == NULL\n");
0886             goto tx_error;
0887         }
0888 
0889         addr6 = (const struct in6_addr *)&neigh->primary_key;
0890         addr_type = ipv6_addr_type(addr6);
0891 
0892         if ((addr_type & IPV6_ADDR_UNICAST) &&
0893              ipv6_addr_is_isatap(addr6))
0894             dst = addr6->s6_addr32[3];
0895         else
0896             do_tx_error = true;
0897 
0898         neigh_release(neigh);
0899         if (do_tx_error)
0900             goto tx_error;
0901     }
0902 
0903     if (!dst)
0904         dst = try_6rd(tunnel, &iph6->daddr);
0905 
0906     if (!dst) {
0907         struct neighbour *neigh = NULL;
0908         bool do_tx_error = false;
0909 
0910         if (skb_dst(skb))
0911             neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
0912 
0913         if (!neigh) {
0914             net_dbg_ratelimited("nexthop == NULL\n");
0915             goto tx_error;
0916         }
0917 
0918         addr6 = (const struct in6_addr *)&neigh->primary_key;
0919         addr_type = ipv6_addr_type(addr6);
0920 
0921         if (addr_type == IPV6_ADDR_ANY) {
0922             addr6 = &ipv6_hdr(skb)->daddr;
0923             addr_type = ipv6_addr_type(addr6);
0924         }
0925 
0926         if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
0927             dst = addr6->s6_addr32[3];
0928         else
0929             do_tx_error = true;
0930 
0931         neigh_release(neigh);
0932         if (do_tx_error)
0933             goto tx_error;
0934     }
0935 
0936     flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
0937                RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
0938                0, dst, tiph->saddr, 0, 0,
0939                sock_net_uid(tunnel->net, NULL));
0940 
0941     rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
0942     if (!rt) {
0943         rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
0944         if (IS_ERR(rt)) {
0945             dev->stats.tx_carrier_errors++;
0946             goto tx_error_icmp;
0947         }
0948         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
0949     }
0950 
0951     if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
0952         ip_rt_put(rt);
0953         dev->stats.tx_carrier_errors++;
0954         goto tx_error_icmp;
0955     }
0956     tdev = rt->dst.dev;
0957 
0958     if (tdev == dev) {
0959         ip_rt_put(rt);
0960         dev->stats.collisions++;
0961         goto tx_error;
0962     }
0963 
0964     if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) {
0965         ip_rt_put(rt);
0966         goto tx_error;
0967     }
0968 
0969     if (df) {
0970         mtu = dst_mtu(&rt->dst) - t_hlen;
0971 
0972         if (mtu < IPV4_MIN_MTU) {
0973             dev->stats.collisions++;
0974             ip_rt_put(rt);
0975             goto tx_error;
0976         }
0977 
0978         if (mtu < IPV6_MIN_MTU) {
0979             mtu = IPV6_MIN_MTU;
0980             df = 0;
0981         }
0982 
0983         if (tunnel->parms.iph.daddr)
0984             skb_dst_update_pmtu_no_confirm(skb, mtu);
0985 
0986         if (skb->len > mtu && !skb_is_gso(skb)) {
0987             icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
0988             ip_rt_put(rt);
0989             goto tx_error;
0990         }
0991     }
0992 
0993     if (tunnel->err_count > 0) {
0994         if (time_before(jiffies,
0995                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
0996             tunnel->err_count--;
0997             dst_link_failure(skb);
0998         } else
0999             tunnel->err_count = 0;
1000     }
1001 
1002     /*
1003      * Okay, now see if we can stuff it in the buffer as-is.
1004      */
1005     max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
1006 
1007     if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1008         (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1009         struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
1010         if (!new_skb) {
1011             ip_rt_put(rt);
1012             dev->stats.tx_dropped++;
1013             kfree_skb(skb);
1014             return NETDEV_TX_OK;
1015         }
1016         if (skb->sk)
1017             skb_set_owner_w(new_skb, skb->sk);
1018         dev_kfree_skb(skb);
1019         skb = new_skb;
1020         iph6 = ipv6_hdr(skb);
1021     }
1022     ttl = tiph->ttl;
1023     if (ttl == 0)
1024         ttl = iph6->hop_limit;
1025     tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
1026 
1027     if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
1028         ip_rt_put(rt);
1029         goto tx_error;
1030     }
1031 
1032     skb_set_inner_ipproto(skb, IPPROTO_IPV6);
1033 
1034     iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
1035               df, !net_eq(tunnel->net, dev_net(dev)));
1036     return NETDEV_TX_OK;
1037 
1038 tx_error_icmp:
1039     dst_link_failure(skb);
1040 tx_error:
1041     kfree_skb(skb);
1042     dev->stats.tx_errors++;
1043     return NETDEV_TX_OK;
1044 }
1045 
1046 static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
1047                      struct net_device *dev, u8 ipproto)
1048 {
1049     struct ip_tunnel *tunnel = netdev_priv(dev);
1050     const struct iphdr  *tiph = &tunnel->parms.iph;
1051 
1052     if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
1053         goto tx_error;
1054 
1055     skb_set_inner_ipproto(skb, ipproto);
1056 
1057     ip_tunnel_xmit(skb, dev, tiph, ipproto);
1058     return NETDEV_TX_OK;
1059 tx_error:
1060     kfree_skb(skb);
1061     dev->stats.tx_errors++;
1062     return NETDEV_TX_OK;
1063 }
1064 
1065 static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
1066                    struct net_device *dev)
1067 {
1068     if (!pskb_inet_may_pull(skb))
1069         goto tx_err;
1070 
1071     switch (skb->protocol) {
1072     case htons(ETH_P_IP):
1073         sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
1074         break;
1075     case htons(ETH_P_IPV6):
1076         ipip6_tunnel_xmit(skb, dev);
1077         break;
1078 #if IS_ENABLED(CONFIG_MPLS)
1079     case htons(ETH_P_MPLS_UC):
1080         sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
1081         break;
1082 #endif
1083     default:
1084         goto tx_err;
1085     }
1086 
1087     return NETDEV_TX_OK;
1088 
1089 tx_err:
1090     dev->stats.tx_errors++;
1091     kfree_skb(skb);
1092     return NETDEV_TX_OK;
1093 
1094 }
1095 
1096 static void ipip6_tunnel_bind_dev(struct net_device *dev)
1097 {
1098     struct net_device *tdev = NULL;
1099     struct ip_tunnel *tunnel;
1100     const struct iphdr *iph;
1101     struct flowi4 fl4;
1102 
1103     tunnel = netdev_priv(dev);
1104     iph = &tunnel->parms.iph;
1105 
1106     if (iph->daddr) {
1107         struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
1108                               NULL,
1109                               iph->daddr, iph->saddr,
1110                               0, 0,
1111                               IPPROTO_IPV6,
1112                               RT_TOS(iph->tos),
1113                               tunnel->parms.link);
1114 
1115         if (!IS_ERR(rt)) {
1116             tdev = rt->dst.dev;
1117             ip_rt_put(rt);
1118         }
1119         dev->flags |= IFF_POINTOPOINT;
1120     }
1121 
1122     if (!tdev && tunnel->parms.link)
1123         tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
1124 
1125     if (tdev && !netif_is_l3_master(tdev)) {
1126         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1127 
1128         dev->mtu = tdev->mtu - t_hlen;
1129         if (dev->mtu < IPV6_MIN_MTU)
1130             dev->mtu = IPV6_MIN_MTU;
1131     }
1132 }
1133 
1134 static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
1135                 __u32 fwmark)
1136 {
1137     struct net *net = t->net;
1138     struct sit_net *sitn = net_generic(net, sit_net_id);
1139 
1140     ipip6_tunnel_unlink(sitn, t);
1141     synchronize_net();
1142     t->parms.iph.saddr = p->iph.saddr;
1143     t->parms.iph.daddr = p->iph.daddr;
1144     __dev_addr_set(t->dev, &p->iph.saddr, 4);
1145     memcpy(t->dev->broadcast, &p->iph.daddr, 4);
1146     ipip6_tunnel_link(sitn, t);
1147     t->parms.iph.ttl = p->iph.ttl;
1148     t->parms.iph.tos = p->iph.tos;
1149     t->parms.iph.frag_off = p->iph.frag_off;
1150     if (t->parms.link != p->link || t->fwmark != fwmark) {
1151         t->parms.link = p->link;
1152         t->fwmark = fwmark;
1153         ipip6_tunnel_bind_dev(t->dev);
1154     }
1155     dst_cache_reset(&t->dst_cache);
1156     netdev_state_change(t->dev);
1157 }
1158 
1159 #ifdef CONFIG_IPV6_SIT_6RD
1160 static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
1161                    struct ip_tunnel_6rd *ip6rd)
1162 {
1163     struct in6_addr prefix;
1164     __be32 relay_prefix;
1165 
1166     if (ip6rd->relay_prefixlen > 32 ||
1167         ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
1168         return -EINVAL;
1169 
1170     ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
1171     if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
1172         return -EINVAL;
1173     if (ip6rd->relay_prefixlen)
1174         relay_prefix = ip6rd->relay_prefix &
1175                    htonl(0xffffffffUL <<
1176                      (32 - ip6rd->relay_prefixlen));
1177     else
1178         relay_prefix = 0;
1179     if (relay_prefix != ip6rd->relay_prefix)
1180         return -EINVAL;
1181 
1182     t->ip6rd.prefix = prefix;
1183     t->ip6rd.relay_prefix = relay_prefix;
1184     t->ip6rd.prefixlen = ip6rd->prefixlen;
1185     t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
1186     dst_cache_reset(&t->dst_cache);
1187     netdev_state_change(t->dev);
1188     return 0;
1189 }
1190 
1191 static int
1192 ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
1193 {
1194     struct ip_tunnel *t = netdev_priv(dev);
1195     struct ip_tunnel_6rd ip6rd;
1196     struct ip_tunnel_parm p;
1197 
1198     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1199         if (copy_from_user(&p, data, sizeof(p)))
1200             return -EFAULT;
1201         t = ipip6_tunnel_locate(t->net, &p, 0);
1202     }
1203     if (!t)
1204         t = netdev_priv(dev);
1205 
1206     ip6rd.prefix = t->ip6rd.prefix;
1207     ip6rd.relay_prefix = t->ip6rd.relay_prefix;
1208     ip6rd.prefixlen = t->ip6rd.prefixlen;
1209     ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
1210     if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
1211         return -EFAULT;
1212     return 0;
1213 }
1214 
1215 static int
1216 ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
1217             int cmd)
1218 {
1219     struct ip_tunnel *t = netdev_priv(dev);
1220     struct ip_tunnel_6rd ip6rd;
1221     int err;
1222 
1223     if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
1224         return -EPERM;
1225     if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
1226         return -EFAULT;
1227 
1228     if (cmd != SIOCDEL6RD) {
1229         err = ipip6_tunnel_update_6rd(t, &ip6rd);
1230         if (err < 0)
1231             return err;
1232     } else
1233         ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev));
1234     return 0;
1235 }
1236 
1237 #endif /* CONFIG_IPV6_SIT_6RD */
1238 
1239 static bool ipip6_valid_ip_proto(u8 ipproto)
1240 {
1241     return ipproto == IPPROTO_IPV6 ||
1242         ipproto == IPPROTO_IPIP ||
1243 #if IS_ENABLED(CONFIG_MPLS)
1244         ipproto == IPPROTO_MPLS ||
1245 #endif
1246         ipproto == 0;
1247 }
1248 
1249 static int
1250 __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
1251 {
1252     if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1253         return -EPERM;
1254 
1255     if (!ipip6_valid_ip_proto(p->iph.protocol))
1256         return -EINVAL;
1257     if (p->iph.version != 4 ||
1258         p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
1259         return -EINVAL;
1260 
1261     if (p->iph.ttl)
1262         p->iph.frag_off |= htons(IP_DF);
1263     return 0;
1264 }
1265 
1266 static int
1267 ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
1268 {
1269     struct ip_tunnel *t = netdev_priv(dev);
1270 
1271     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
1272         t = ipip6_tunnel_locate(t->net, p, 0);
1273     if (!t)
1274         t = netdev_priv(dev);
1275     memcpy(p, &t->parms, sizeof(*p));
1276     return 0;
1277 }
1278 
1279 static int
1280 ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
1281 {
1282     struct ip_tunnel *t = netdev_priv(dev);
1283     int err;
1284 
1285     err = __ipip6_tunnel_ioctl_validate(t->net, p);
1286     if (err)
1287         return err;
1288 
1289     t = ipip6_tunnel_locate(t->net, p, 1);
1290     if (!t)
1291         return -ENOBUFS;
1292     return 0;
1293 }
1294 
1295 static int
1296 ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
1297 {
1298     struct ip_tunnel *t = netdev_priv(dev);
1299     int err;
1300 
1301     err = __ipip6_tunnel_ioctl_validate(t->net, p);
1302     if (err)
1303         return err;
1304 
1305     t = ipip6_tunnel_locate(t->net, p, 0);
1306     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1307         if (!t)
1308             return -ENOENT;
1309     } else {
1310         if (t) {
1311             if (t->dev != dev)
1312                 return -EEXIST;
1313         } else {
1314             if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) ||
1315                 (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr))
1316                 return -EINVAL;
1317             t = netdev_priv(dev);
1318         }
1319 
1320         ipip6_tunnel_update(t, p, t->fwmark);
1321     }
1322 
1323     return 0;
1324 }
1325 
1326 static int
1327 ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
1328 {
1329     struct ip_tunnel *t = netdev_priv(dev);
1330 
1331     if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
1332         return -EPERM;
1333 
1334     if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1335         t = ipip6_tunnel_locate(t->net, p, 0);
1336         if (!t)
1337             return -ENOENT;
1338         if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev))
1339             return -EPERM;
1340         dev = t->dev;
1341     }
1342     unregister_netdevice(dev);
1343     return 0;
1344 }
1345 
1346 static int
1347 ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
1348 {
1349     switch (cmd) {
1350     case SIOCGETTUNNEL:
1351         return ipip6_tunnel_get(dev, p);
1352     case SIOCADDTUNNEL:
1353         return ipip6_tunnel_add(dev, p);
1354     case SIOCCHGTUNNEL:
1355         return ipip6_tunnel_change(dev, p);
1356     case SIOCDELTUNNEL:
1357         return ipip6_tunnel_del(dev, p);
1358     default:
1359         return -EINVAL;
1360     }
1361 }
1362 
1363 static int
1364 ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1365                 void __user *data, int cmd)
1366 {
1367     switch (cmd) {
1368     case SIOCGETTUNNEL:
1369     case SIOCADDTUNNEL:
1370     case SIOCCHGTUNNEL:
1371     case SIOCDELTUNNEL:
1372         return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
1373     case SIOCGETPRL:
1374         return ipip6_tunnel_get_prl(dev, data);
1375     case SIOCADDPRL:
1376     case SIOCDELPRL:
1377     case SIOCCHGPRL:
1378         return ipip6_tunnel_prl_ctl(dev, data, cmd);
1379 #ifdef CONFIG_IPV6_SIT_6RD
1380     case SIOCGET6RD:
1381         return ipip6_tunnel_get6rd(dev, data);
1382     case SIOCADD6RD:
1383     case SIOCCHG6RD:
1384     case SIOCDEL6RD:
1385         return ipip6_tunnel_6rdctl(dev, data, cmd);
1386 #endif
1387     default:
1388         return -EINVAL;
1389     }
1390 }
1391 
1392 static const struct net_device_ops ipip6_netdev_ops = {
1393     .ndo_init   = ipip6_tunnel_init,
1394     .ndo_uninit = ipip6_tunnel_uninit,
1395     .ndo_start_xmit = sit_tunnel_xmit,
1396     .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
1397     .ndo_get_stats64 = dev_get_tstats64,
1398     .ndo_get_iflink = ip_tunnel_get_iflink,
1399     .ndo_tunnel_ctl = ipip6_tunnel_ctl,
1400 };
1401 
1402 static void ipip6_dev_free(struct net_device *dev)
1403 {
1404     struct ip_tunnel *tunnel = netdev_priv(dev);
1405 
1406     dst_cache_destroy(&tunnel->dst_cache);
1407     free_percpu(dev->tstats);
1408 }
1409 
1410 #define SIT_FEATURES (NETIF_F_SG       | \
1411               NETIF_F_FRAGLIST     | \
1412               NETIF_F_HIGHDMA      | \
1413               NETIF_F_GSO_SOFTWARE | \
1414               NETIF_F_HW_CSUM)
1415 
1416 static void ipip6_tunnel_setup(struct net_device *dev)
1417 {
1418     struct ip_tunnel *tunnel = netdev_priv(dev);
1419     int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1420 
1421     dev->netdev_ops     = &ipip6_netdev_ops;
1422     dev->header_ops     = &ip_tunnel_header_ops;
1423     dev->needs_free_netdev  = true;
1424     dev->priv_destructor    = ipip6_dev_free;
1425 
1426     dev->type       = ARPHRD_SIT;
1427     dev->mtu        = ETH_DATA_LEN - t_hlen;
1428     dev->min_mtu        = IPV6_MIN_MTU;
1429     dev->max_mtu        = IP6_MAX_MTU - t_hlen;
1430     dev->flags      = IFF_NOARP;
1431     netif_keep_dst(dev);
1432     dev->addr_len       = 4;
1433     dev->features       |= NETIF_F_LLTX;
1434     dev->features       |= SIT_FEATURES;
1435     dev->hw_features    |= SIT_FEATURES;
1436 }
1437 
1438 static int ipip6_tunnel_init(struct net_device *dev)
1439 {
1440     struct ip_tunnel *tunnel = netdev_priv(dev);
1441     int err;
1442 
1443     tunnel->dev = dev;
1444     tunnel->net = dev_net(dev);
1445     strcpy(tunnel->parms.name, dev->name);
1446 
1447     ipip6_tunnel_bind_dev(dev);
1448     dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1449     if (!dev->tstats)
1450         return -ENOMEM;
1451 
1452     err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1453     if (err) {
1454         free_percpu(dev->tstats);
1455         dev->tstats = NULL;
1456         return err;
1457     }
1458     netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
1459     return 0;
1460 }
1461 
1462 static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1463 {
1464     struct ip_tunnel *tunnel = netdev_priv(dev);
1465     struct iphdr *iph = &tunnel->parms.iph;
1466     struct net *net = dev_net(dev);
1467     struct sit_net *sitn = net_generic(net, sit_net_id);
1468 
1469     iph->version        = 4;
1470     iph->protocol       = IPPROTO_IPV6;
1471     iph->ihl        = 5;
1472     iph->ttl        = 64;
1473 
1474     rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1475 }
1476 
1477 static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
1478               struct netlink_ext_ack *extack)
1479 {
1480     u8 proto;
1481 
1482     if (!data || !data[IFLA_IPTUN_PROTO])
1483         return 0;
1484 
1485     proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1486     if (!ipip6_valid_ip_proto(proto))
1487         return -EINVAL;
1488 
1489     return 0;
1490 }
1491 
1492 static void ipip6_netlink_parms(struct nlattr *data[],
1493                 struct ip_tunnel_parm *parms,
1494                 __u32 *fwmark)
1495 {
1496     memset(parms, 0, sizeof(*parms));
1497 
1498     parms->iph.version = 4;
1499     parms->iph.protocol = IPPROTO_IPV6;
1500     parms->iph.ihl = 5;
1501     parms->iph.ttl = 64;
1502 
1503     if (!data)
1504         return;
1505 
1506     if (data[IFLA_IPTUN_LINK])
1507         parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1508 
1509     if (data[IFLA_IPTUN_LOCAL])
1510         parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
1511 
1512     if (data[IFLA_IPTUN_REMOTE])
1513         parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
1514 
1515     if (data[IFLA_IPTUN_TTL]) {
1516         parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
1517         if (parms->iph.ttl)
1518             parms->iph.frag_off = htons(IP_DF);
1519     }
1520 
1521     if (data[IFLA_IPTUN_TOS])
1522         parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
1523 
1524     if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
1525         parms->iph.frag_off = htons(IP_DF);
1526 
1527     if (data[IFLA_IPTUN_FLAGS])
1528         parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
1529 
1530     if (data[IFLA_IPTUN_PROTO])
1531         parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1532 
1533     if (data[IFLA_IPTUN_FWMARK])
1534         *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
1535 }
1536 
1537 /* This function returns true when ENCAP attributes are present in the nl msg */
1538 static bool ipip6_netlink_encap_parms(struct nlattr *data[],
1539                       struct ip_tunnel_encap *ipencap)
1540 {
1541     bool ret = false;
1542 
1543     memset(ipencap, 0, sizeof(*ipencap));
1544 
1545     if (!data)
1546         return ret;
1547 
1548     if (data[IFLA_IPTUN_ENCAP_TYPE]) {
1549         ret = true;
1550         ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
1551     }
1552 
1553     if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
1554         ret = true;
1555         ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
1556     }
1557 
1558     if (data[IFLA_IPTUN_ENCAP_SPORT]) {
1559         ret = true;
1560         ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
1561     }
1562 
1563     if (data[IFLA_IPTUN_ENCAP_DPORT]) {
1564         ret = true;
1565         ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
1566     }
1567 
1568     return ret;
1569 }
1570 
1571 #ifdef CONFIG_IPV6_SIT_6RD
1572 /* This function returns true when 6RD attributes are present in the nl msg */
1573 static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
1574                     struct ip_tunnel_6rd *ip6rd)
1575 {
1576     bool ret = false;
1577     memset(ip6rd, 0, sizeof(*ip6rd));
1578 
1579     if (!data)
1580         return ret;
1581 
1582     if (data[IFLA_IPTUN_6RD_PREFIX]) {
1583         ret = true;
1584         ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]);
1585     }
1586 
1587     if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
1588         ret = true;
1589         ip6rd->relay_prefix =
1590             nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
1591     }
1592 
1593     if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
1594         ret = true;
1595         ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
1596     }
1597 
1598     if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
1599         ret = true;
1600         ip6rd->relay_prefixlen =
1601             nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
1602     }
1603 
1604     return ret;
1605 }
1606 #endif
1607 
1608 static int ipip6_newlink(struct net *src_net, struct net_device *dev,
1609              struct nlattr *tb[], struct nlattr *data[],
1610              struct netlink_ext_ack *extack)
1611 {
1612     struct net *net = dev_net(dev);
1613     struct ip_tunnel *nt;
1614     struct ip_tunnel_encap ipencap;
1615 #ifdef CONFIG_IPV6_SIT_6RD
1616     struct ip_tunnel_6rd ip6rd;
1617 #endif
1618     int err;
1619 
1620     nt = netdev_priv(dev);
1621 
1622     if (ipip6_netlink_encap_parms(data, &ipencap)) {
1623         err = ip_tunnel_encap_setup(nt, &ipencap);
1624         if (err < 0)
1625             return err;
1626     }
1627 
1628     ipip6_netlink_parms(data, &nt->parms, &nt->fwmark);
1629 
1630     if (ipip6_tunnel_locate(net, &nt->parms, 0))
1631         return -EEXIST;
1632 
1633     err = ipip6_tunnel_create(dev);
1634     if (err < 0)
1635         return err;
1636 
1637     if (tb[IFLA_MTU]) {
1638         u32 mtu = nla_get_u32(tb[IFLA_MTU]);
1639 
1640         if (mtu >= IPV6_MIN_MTU &&
1641             mtu <= IP6_MAX_MTU - dev->hard_header_len)
1642             dev->mtu = mtu;
1643     }
1644 
1645 #ifdef CONFIG_IPV6_SIT_6RD
1646     if (ipip6_netlink_6rd_parms(data, &ip6rd)) {
1647         err = ipip6_tunnel_update_6rd(nt, &ip6rd);
1648         if (err < 0)
1649             unregister_netdevice_queue(dev, NULL);
1650     }
1651 #endif
1652 
1653     return err;
1654 }
1655 
1656 static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
1657                 struct nlattr *data[],
1658                 struct netlink_ext_ack *extack)
1659 {
1660     struct ip_tunnel *t = netdev_priv(dev);
1661     struct ip_tunnel_parm p;
1662     struct ip_tunnel_encap ipencap;
1663     struct net *net = t->net;
1664     struct sit_net *sitn = net_generic(net, sit_net_id);
1665 #ifdef CONFIG_IPV6_SIT_6RD
1666     struct ip_tunnel_6rd ip6rd;
1667 #endif
1668     __u32 fwmark = t->fwmark;
1669     int err;
1670 
1671     if (dev == sitn->fb_tunnel_dev)
1672         return -EINVAL;
1673 
1674     if (ipip6_netlink_encap_parms(data, &ipencap)) {
1675         err = ip_tunnel_encap_setup(t, &ipencap);
1676         if (err < 0)
1677             return err;
1678     }
1679 
1680     ipip6_netlink_parms(data, &p, &fwmark);
1681 
1682     if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
1683         (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
1684         return -EINVAL;
1685 
1686     t = ipip6_tunnel_locate(net, &p, 0);
1687 
1688     if (t) {
1689         if (t->dev != dev)
1690             return -EEXIST;
1691     } else
1692         t = netdev_priv(dev);
1693 
1694     ipip6_tunnel_update(t, &p, fwmark);
1695 
1696 #ifdef CONFIG_IPV6_SIT_6RD
1697     if (ipip6_netlink_6rd_parms(data, &ip6rd))
1698         return ipip6_tunnel_update_6rd(t, &ip6rd);
1699 #endif
1700 
1701     return 0;
1702 }
1703 
1704 static size_t ipip6_get_size(const struct net_device *dev)
1705 {
1706     return
1707         /* IFLA_IPTUN_LINK */
1708         nla_total_size(4) +
1709         /* IFLA_IPTUN_LOCAL */
1710         nla_total_size(4) +
1711         /* IFLA_IPTUN_REMOTE */
1712         nla_total_size(4) +
1713         /* IFLA_IPTUN_TTL */
1714         nla_total_size(1) +
1715         /* IFLA_IPTUN_TOS */
1716         nla_total_size(1) +
1717         /* IFLA_IPTUN_PMTUDISC */
1718         nla_total_size(1) +
1719         /* IFLA_IPTUN_FLAGS */
1720         nla_total_size(2) +
1721         /* IFLA_IPTUN_PROTO */
1722         nla_total_size(1) +
1723 #ifdef CONFIG_IPV6_SIT_6RD
1724         /* IFLA_IPTUN_6RD_PREFIX */
1725         nla_total_size(sizeof(struct in6_addr)) +
1726         /* IFLA_IPTUN_6RD_RELAY_PREFIX */
1727         nla_total_size(4) +
1728         /* IFLA_IPTUN_6RD_PREFIXLEN */
1729         nla_total_size(2) +
1730         /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
1731         nla_total_size(2) +
1732 #endif
1733         /* IFLA_IPTUN_ENCAP_TYPE */
1734         nla_total_size(2) +
1735         /* IFLA_IPTUN_ENCAP_FLAGS */
1736         nla_total_size(2) +
1737         /* IFLA_IPTUN_ENCAP_SPORT */
1738         nla_total_size(2) +
1739         /* IFLA_IPTUN_ENCAP_DPORT */
1740         nla_total_size(2) +
1741         /* IFLA_IPTUN_FWMARK */
1742         nla_total_size(4) +
1743         0;
1744 }
1745 
1746 static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1747 {
1748     struct ip_tunnel *tunnel = netdev_priv(dev);
1749     struct ip_tunnel_parm *parm = &tunnel->parms;
1750 
1751     if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1752         nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
1753         nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
1754         nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
1755         nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1756         nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
1757                !!(parm->iph.frag_off & htons(IP_DF))) ||
1758         nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
1759         nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
1760         nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
1761         goto nla_put_failure;
1762 
1763 #ifdef CONFIG_IPV6_SIT_6RD
1764     if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX,
1765                  &tunnel->ip6rd.prefix) ||
1766         nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
1767                 tunnel->ip6rd.relay_prefix) ||
1768         nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
1769             tunnel->ip6rd.prefixlen) ||
1770         nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
1771             tunnel->ip6rd.relay_prefixlen))
1772         goto nla_put_failure;
1773 #endif
1774 
1775     if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
1776             tunnel->encap.type) ||
1777         nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
1778             tunnel->encap.sport) ||
1779         nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
1780             tunnel->encap.dport) ||
1781         nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
1782             tunnel->encap.flags))
1783         goto nla_put_failure;
1784 
1785     return 0;
1786 
1787 nla_put_failure:
1788     return -EMSGSIZE;
1789 }
1790 
1791 static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
1792     [IFLA_IPTUN_LINK]       = { .type = NLA_U32 },
1793     [IFLA_IPTUN_LOCAL]      = { .type = NLA_U32 },
1794     [IFLA_IPTUN_REMOTE]     = { .type = NLA_U32 },
1795     [IFLA_IPTUN_TTL]        = { .type = NLA_U8 },
1796     [IFLA_IPTUN_TOS]        = { .type = NLA_U8 },
1797     [IFLA_IPTUN_PMTUDISC]       = { .type = NLA_U8 },
1798     [IFLA_IPTUN_FLAGS]      = { .type = NLA_U16 },
1799     [IFLA_IPTUN_PROTO]      = { .type = NLA_U8 },
1800 #ifdef CONFIG_IPV6_SIT_6RD
1801     [IFLA_IPTUN_6RD_PREFIX]     = { .len = sizeof(struct in6_addr) },
1802     [IFLA_IPTUN_6RD_RELAY_PREFIX]   = { .type = NLA_U32 },
1803     [IFLA_IPTUN_6RD_PREFIXLEN]  = { .type = NLA_U16 },
1804     [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
1805 #endif
1806     [IFLA_IPTUN_ENCAP_TYPE]     = { .type = NLA_U16 },
1807     [IFLA_IPTUN_ENCAP_FLAGS]    = { .type = NLA_U16 },
1808     [IFLA_IPTUN_ENCAP_SPORT]    = { .type = NLA_U16 },
1809     [IFLA_IPTUN_ENCAP_DPORT]    = { .type = NLA_U16 },
1810     [IFLA_IPTUN_FWMARK]     = { .type = NLA_U32 },
1811 };
1812 
1813 static void ipip6_dellink(struct net_device *dev, struct list_head *head)
1814 {
1815     struct net *net = dev_net(dev);
1816     struct sit_net *sitn = net_generic(net, sit_net_id);
1817 
1818     if (dev != sitn->fb_tunnel_dev)
1819         unregister_netdevice_queue(dev, head);
1820 }
1821 
1822 static struct rtnl_link_ops sit_link_ops __read_mostly = {
1823     .kind       = "sit",
1824     .maxtype    = IFLA_IPTUN_MAX,
1825     .policy     = ipip6_policy,
1826     .priv_size  = sizeof(struct ip_tunnel),
1827     .setup      = ipip6_tunnel_setup,
1828     .validate   = ipip6_validate,
1829     .newlink    = ipip6_newlink,
1830     .changelink = ipip6_changelink,
1831     .get_size   = ipip6_get_size,
1832     .fill_info  = ipip6_fill_info,
1833     .dellink    = ipip6_dellink,
1834     .get_link_net   = ip_tunnel_get_link_net,
1835 };
1836 
1837 static struct xfrm_tunnel sit_handler __read_mostly = {
1838     .handler    =   ipip6_rcv,
1839     .err_handler    =   ipip6_err,
1840     .priority   =   1,
1841 };
1842 
1843 static struct xfrm_tunnel ipip_handler __read_mostly = {
1844     .handler    =   ipip_rcv,
1845     .err_handler    =   ipip6_err,
1846     .priority   =   2,
1847 };
1848 
1849 #if IS_ENABLED(CONFIG_MPLS)
1850 static struct xfrm_tunnel mplsip_handler __read_mostly = {
1851     .handler    =   mplsip_rcv,
1852     .err_handler    =   ipip6_err,
1853     .priority   =   2,
1854 };
1855 #endif
1856 
1857 static void __net_exit sit_destroy_tunnels(struct net *net,
1858                        struct list_head *head)
1859 {
1860     struct sit_net *sitn = net_generic(net, sit_net_id);
1861     struct net_device *dev, *aux;
1862     int prio;
1863 
1864     for_each_netdev_safe(net, dev, aux)
1865         if (dev->rtnl_link_ops == &sit_link_ops)
1866             unregister_netdevice_queue(dev, head);
1867 
1868     for (prio = 0; prio < 4; prio++) {
1869         int h;
1870         for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
1871             struct ip_tunnel *t;
1872 
1873             t = rtnl_dereference(sitn->tunnels[prio][h]);
1874             while (t) {
1875                 /* If dev is in the same netns, it has already
1876                  * been added to the list by the previous loop.
1877                  */
1878                 if (!net_eq(dev_net(t->dev), net))
1879                     unregister_netdevice_queue(t->dev,
1880                                    head);
1881                 t = rtnl_dereference(t->next);
1882             }
1883         }
1884     }
1885 }
1886 
1887 static int __net_init sit_init_net(struct net *net)
1888 {
1889     struct sit_net *sitn = net_generic(net, sit_net_id);
1890     struct ip_tunnel *t;
1891     int err;
1892 
1893     sitn->tunnels[0] = sitn->tunnels_wc;
1894     sitn->tunnels[1] = sitn->tunnels_l;
1895     sitn->tunnels[2] = sitn->tunnels_r;
1896     sitn->tunnels[3] = sitn->tunnels_r_l;
1897 
1898     if (!net_has_fallback_tunnels(net))
1899         return 0;
1900 
1901     sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
1902                        NET_NAME_UNKNOWN,
1903                        ipip6_tunnel_setup);
1904     if (!sitn->fb_tunnel_dev) {
1905         err = -ENOMEM;
1906         goto err_alloc_dev;
1907     }
1908     dev_net_set(sitn->fb_tunnel_dev, net);
1909     sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1910     /* FB netdevice is special: we have one, and only one per netns.
1911      * Allowing to move it to another netns is clearly unsafe.
1912      */
1913     sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1914 
1915     err = register_netdev(sitn->fb_tunnel_dev);
1916     if (err)
1917         goto err_reg_dev;
1918 
1919     ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1920     ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1921 
1922     t = netdev_priv(sitn->fb_tunnel_dev);
1923 
1924     strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
1925     return 0;
1926 
1927 err_reg_dev:
1928     free_netdev(sitn->fb_tunnel_dev);
1929 err_alloc_dev:
1930     return err;
1931 }
1932 
1933 static void __net_exit sit_exit_batch_net(struct list_head *net_list)
1934 {
1935     LIST_HEAD(list);
1936     struct net *net;
1937 
1938     rtnl_lock();
1939     list_for_each_entry(net, net_list, exit_list)
1940         sit_destroy_tunnels(net, &list);
1941 
1942     unregister_netdevice_many(&list);
1943     rtnl_unlock();
1944 }
1945 
1946 static struct pernet_operations sit_net_ops = {
1947     .init = sit_init_net,
1948     .exit_batch = sit_exit_batch_net,
1949     .id   = &sit_net_id,
1950     .size = sizeof(struct sit_net),
1951 };
1952 
1953 static void __exit sit_cleanup(void)
1954 {
1955     rtnl_link_unregister(&sit_link_ops);
1956     xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1957     xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1958 #if IS_ENABLED(CONFIG_MPLS)
1959     xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
1960 #endif
1961 
1962     unregister_pernet_device(&sit_net_ops);
1963     rcu_barrier(); /* Wait for completion of call_rcu()'s */
1964 }
1965 
1966 static int __init sit_init(void)
1967 {
1968     int err;
1969 
1970     pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
1971 
1972     err = register_pernet_device(&sit_net_ops);
1973     if (err < 0)
1974         return err;
1975     err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1976     if (err < 0) {
1977         pr_info("%s: can't register ip6ip4\n", __func__);
1978         goto xfrm_tunnel_failed;
1979     }
1980     err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1981     if (err < 0) {
1982         pr_info("%s: can't register ip4ip4\n", __func__);
1983         goto xfrm_tunnel4_failed;
1984     }
1985 #if IS_ENABLED(CONFIG_MPLS)
1986     err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
1987     if (err < 0) {
1988         pr_info("%s: can't register mplsip\n", __func__);
1989         goto xfrm_tunnel_mpls_failed;
1990     }
1991 #endif
1992     err = rtnl_link_register(&sit_link_ops);
1993     if (err < 0)
1994         goto rtnl_link_failed;
1995 
1996 out:
1997     return err;
1998 
1999 rtnl_link_failed:
2000 #if IS_ENABLED(CONFIG_MPLS)
2001     xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
2002 xfrm_tunnel_mpls_failed:
2003 #endif
2004     xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
2005 xfrm_tunnel4_failed:
2006     xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
2007 xfrm_tunnel_failed:
2008     unregister_pernet_device(&sit_net_ops);
2009     goto out;
2010 }
2011 
2012 module_init(sit_init);
2013 module_exit(sit_cleanup);
2014 MODULE_LICENSE("GPL");
2015 MODULE_ALIAS_RTNL_LINK("sit");
2016 MODULE_ALIAS_NETDEV("sit0");