0001
0002
0003
0004
0005
0006 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0007
0008 #include <linux/capability.h>
0009 #include <linux/module.h>
0010 #include <linux/types.h>
0011 #include <linux/kernel.h>
0012 #include <linux/slab.h>
0013 #include <linux/uaccess.h>
0014 #include <linux/skbuff.h>
0015 #include <linux/netdevice.h>
0016 #include <linux/in.h>
0017 #include <linux/tcp.h>
0018 #include <linux/udp.h>
0019 #include <linux/if_arp.h>
0020 #include <linux/init.h>
0021 #include <linux/in6.h>
0022 #include <linux/inetdevice.h>
0023 #include <linux/igmp.h>
0024 #include <linux/netfilter_ipv4.h>
0025 #include <linux/etherdevice.h>
0026 #include <linux/if_ether.h>
0027 #include <linux/if_vlan.h>
0028 #include <linux/rculist.h>
0029 #include <linux/err.h>
0030
0031 #include <net/sock.h>
0032 #include <net/ip.h>
0033 #include <net/icmp.h>
0034 #include <net/protocol.h>
0035 #include <net/ip_tunnels.h>
0036 #include <net/arp.h>
0037 #include <net/checksum.h>
0038 #include <net/dsfield.h>
0039 #include <net/inet_ecn.h>
0040 #include <net/xfrm.h>
0041 #include <net/net_namespace.h>
0042 #include <net/netns/generic.h>
0043 #include <net/rtnetlink.h>
0044 #include <net/udp.h>
0045 #include <net/dst_metadata.h>
0046
0047 #if IS_ENABLED(CONFIG_IPV6)
0048 #include <net/ipv6.h>
0049 #include <net/ip6_fib.h>
0050 #include <net/ip6_route.h>
0051 #endif
0052
0053 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
0054 {
0055 return hash_32((__force u32)key ^ (__force u32)remote,
0056 IP_TNL_HASH_BITS);
0057 }
0058
0059 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
0060 __be16 flags, __be32 key)
0061 {
0062 if (p->i_flags & TUNNEL_KEY) {
0063 if (flags & TUNNEL_KEY)
0064 return key == p->i_key;
0065 else
0066
0067 return false;
0068 } else
0069 return !(flags & TUNNEL_KEY);
0070 }
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
0084 int link, __be16 flags,
0085 __be32 remote, __be32 local,
0086 __be32 key)
0087 {
0088 struct ip_tunnel *t, *cand = NULL;
0089 struct hlist_head *head;
0090 struct net_device *ndev;
0091 unsigned int hash;
0092
0093 hash = ip_tunnel_hash(key, remote);
0094 head = &itn->tunnels[hash];
0095
0096 hlist_for_each_entry_rcu(t, head, hash_node) {
0097 if (local != t->parms.iph.saddr ||
0098 remote != t->parms.iph.daddr ||
0099 !(t->dev->flags & IFF_UP))
0100 continue;
0101
0102 if (!ip_tunnel_key_match(&t->parms, flags, key))
0103 continue;
0104
0105 if (t->parms.link == link)
0106 return t;
0107 else
0108 cand = t;
0109 }
0110
0111 hlist_for_each_entry_rcu(t, head, hash_node) {
0112 if (remote != t->parms.iph.daddr ||
0113 t->parms.iph.saddr != 0 ||
0114 !(t->dev->flags & IFF_UP))
0115 continue;
0116
0117 if (!ip_tunnel_key_match(&t->parms, flags, key))
0118 continue;
0119
0120 if (t->parms.link == link)
0121 return t;
0122 else if (!cand)
0123 cand = t;
0124 }
0125
0126 hash = ip_tunnel_hash(key, 0);
0127 head = &itn->tunnels[hash];
0128
0129 hlist_for_each_entry_rcu(t, head, hash_node) {
0130 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
0131 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
0132 continue;
0133
0134 if (!(t->dev->flags & IFF_UP))
0135 continue;
0136
0137 if (!ip_tunnel_key_match(&t->parms, flags, key))
0138 continue;
0139
0140 if (t->parms.link == link)
0141 return t;
0142 else if (!cand)
0143 cand = t;
0144 }
0145
0146 hlist_for_each_entry_rcu(t, head, hash_node) {
0147 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
0148 t->parms.iph.saddr != 0 ||
0149 t->parms.iph.daddr != 0 ||
0150 !(t->dev->flags & IFF_UP))
0151 continue;
0152
0153 if (t->parms.link == link)
0154 return t;
0155 else if (!cand)
0156 cand = t;
0157 }
0158
0159 if (cand)
0160 return cand;
0161
0162 t = rcu_dereference(itn->collect_md_tun);
0163 if (t && t->dev->flags & IFF_UP)
0164 return t;
0165
0166 ndev = READ_ONCE(itn->fb_tunnel_dev);
0167 if (ndev && ndev->flags & IFF_UP)
0168 return netdev_priv(ndev);
0169
0170 return NULL;
0171 }
0172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
0173
0174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
0175 struct ip_tunnel_parm *parms)
0176 {
0177 unsigned int h;
0178 __be32 remote;
0179 __be32 i_key = parms->i_key;
0180
0181 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
0182 remote = parms->iph.daddr;
0183 else
0184 remote = 0;
0185
0186 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
0187 i_key = 0;
0188
0189 h = ip_tunnel_hash(i_key, remote);
0190 return &itn->tunnels[h];
0191 }
0192
0193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
0194 {
0195 struct hlist_head *head = ip_bucket(itn, &t->parms);
0196
0197 if (t->collect_md)
0198 rcu_assign_pointer(itn->collect_md_tun, t);
0199 hlist_add_head_rcu(&t->hash_node, head);
0200 }
0201
0202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
0203 {
0204 if (t->collect_md)
0205 rcu_assign_pointer(itn->collect_md_tun, NULL);
0206 hlist_del_init_rcu(&t->hash_node);
0207 }
0208
0209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
0210 struct ip_tunnel_parm *parms,
0211 int type)
0212 {
0213 __be32 remote = parms->iph.daddr;
0214 __be32 local = parms->iph.saddr;
0215 __be32 key = parms->i_key;
0216 __be16 flags = parms->i_flags;
0217 int link = parms->link;
0218 struct ip_tunnel *t = NULL;
0219 struct hlist_head *head = ip_bucket(itn, parms);
0220
0221 hlist_for_each_entry_rcu(t, head, hash_node) {
0222 if (local == t->parms.iph.saddr &&
0223 remote == t->parms.iph.daddr &&
0224 link == t->parms.link &&
0225 type == t->dev->type &&
0226 ip_tunnel_key_match(&t->parms, flags, key))
0227 break;
0228 }
0229 return t;
0230 }
0231
0232 static struct net_device *__ip_tunnel_create(struct net *net,
0233 const struct rtnl_link_ops *ops,
0234 struct ip_tunnel_parm *parms)
0235 {
0236 int err;
0237 struct ip_tunnel *tunnel;
0238 struct net_device *dev;
0239 char name[IFNAMSIZ];
0240
0241 err = -E2BIG;
0242 if (parms->name[0]) {
0243 if (!dev_valid_name(parms->name))
0244 goto failed;
0245 strscpy(name, parms->name, IFNAMSIZ);
0246 } else {
0247 if (strlen(ops->kind) > (IFNAMSIZ - 3))
0248 goto failed;
0249 strcpy(name, ops->kind);
0250 strcat(name, "%d");
0251 }
0252
0253 ASSERT_RTNL();
0254 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
0255 if (!dev) {
0256 err = -ENOMEM;
0257 goto failed;
0258 }
0259 dev_net_set(dev, net);
0260
0261 dev->rtnl_link_ops = ops;
0262
0263 tunnel = netdev_priv(dev);
0264 tunnel->parms = *parms;
0265 tunnel->net = net;
0266
0267 err = register_netdevice(dev);
0268 if (err)
0269 goto failed_free;
0270
0271 return dev;
0272
0273 failed_free:
0274 free_netdev(dev);
0275 failed:
0276 return ERR_PTR(err);
0277 }
0278
0279 static int ip_tunnel_bind_dev(struct net_device *dev)
0280 {
0281 struct net_device *tdev = NULL;
0282 struct ip_tunnel *tunnel = netdev_priv(dev);
0283 const struct iphdr *iph;
0284 int hlen = LL_MAX_HEADER;
0285 int mtu = ETH_DATA_LEN;
0286 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
0287
0288 iph = &tunnel->parms.iph;
0289
0290
0291 if (iph->daddr) {
0292 struct flowi4 fl4;
0293 struct rtable *rt;
0294
0295 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
0296 iph->saddr, tunnel->parms.o_key,
0297 RT_TOS(iph->tos), dev_net(dev),
0298 tunnel->parms.link, tunnel->fwmark, 0, 0);
0299 rt = ip_route_output_key(tunnel->net, &fl4);
0300
0301 if (!IS_ERR(rt)) {
0302 tdev = rt->dst.dev;
0303 ip_rt_put(rt);
0304 }
0305 if (dev->type != ARPHRD_ETHER)
0306 dev->flags |= IFF_POINTOPOINT;
0307
0308 dst_cache_reset(&tunnel->dst_cache);
0309 }
0310
0311 if (!tdev && tunnel->parms.link)
0312 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
0313
0314 if (tdev) {
0315 hlen = tdev->hard_header_len + tdev->needed_headroom;
0316 mtu = min(tdev->mtu, IP_MAX_MTU);
0317 }
0318
0319 dev->needed_headroom = t_hlen + hlen;
0320 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
0321
0322 if (mtu < IPV4_MIN_MTU)
0323 mtu = IPV4_MIN_MTU;
0324
0325 return mtu;
0326 }
0327
0328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
0329 struct ip_tunnel_net *itn,
0330 struct ip_tunnel_parm *parms)
0331 {
0332 struct ip_tunnel *nt;
0333 struct net_device *dev;
0334 int t_hlen;
0335 int mtu;
0336 int err;
0337
0338 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
0339 if (IS_ERR(dev))
0340 return ERR_CAST(dev);
0341
0342 mtu = ip_tunnel_bind_dev(dev);
0343 err = dev_set_mtu(dev, mtu);
0344 if (err)
0345 goto err_dev_set_mtu;
0346
0347 nt = netdev_priv(dev);
0348 t_hlen = nt->hlen + sizeof(struct iphdr);
0349 dev->min_mtu = ETH_MIN_MTU;
0350 dev->max_mtu = IP_MAX_MTU - t_hlen;
0351 if (dev->type == ARPHRD_ETHER)
0352 dev->max_mtu -= dev->hard_header_len;
0353
0354 ip_tunnel_add(itn, nt);
0355 return nt;
0356
0357 err_dev_set_mtu:
0358 unregister_netdevice(dev);
0359 return ERR_PTR(err);
0360 }
0361
0362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
0363 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
0364 bool log_ecn_error)
0365 {
0366 const struct iphdr *iph = ip_hdr(skb);
0367 int err;
0368
0369 #ifdef CONFIG_NET_IPGRE_BROADCAST
0370 if (ipv4_is_multicast(iph->daddr)) {
0371 tunnel->dev->stats.multicast++;
0372 skb->pkt_type = PACKET_BROADCAST;
0373 }
0374 #endif
0375
0376 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
0377 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
0378 tunnel->dev->stats.rx_crc_errors++;
0379 tunnel->dev->stats.rx_errors++;
0380 goto drop;
0381 }
0382
0383 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
0384 if (!(tpi->flags&TUNNEL_SEQ) ||
0385 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
0386 tunnel->dev->stats.rx_fifo_errors++;
0387 tunnel->dev->stats.rx_errors++;
0388 goto drop;
0389 }
0390 tunnel->i_seqno = ntohl(tpi->seq) + 1;
0391 }
0392
0393 skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
0394
0395 err = IP_ECN_decapsulate(iph, skb);
0396 if (unlikely(err)) {
0397 if (log_ecn_error)
0398 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
0399 &iph->saddr, iph->tos);
0400 if (err > 1) {
0401 ++tunnel->dev->stats.rx_frame_errors;
0402 ++tunnel->dev->stats.rx_errors;
0403 goto drop;
0404 }
0405 }
0406
0407 dev_sw_netstats_rx_add(tunnel->dev, skb->len);
0408 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
0409
0410 if (tunnel->dev->type == ARPHRD_ETHER) {
0411 skb->protocol = eth_type_trans(skb, tunnel->dev);
0412 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
0413 } else {
0414 skb->dev = tunnel->dev;
0415 }
0416
0417 if (tun_dst)
0418 skb_dst_set(skb, (struct dst_entry *)tun_dst);
0419
0420 gro_cells_receive(&tunnel->gro_cells, skb);
0421 return 0;
0422
0423 drop:
0424 if (tun_dst)
0425 dst_release((struct dst_entry *)tun_dst);
0426 kfree_skb(skb);
0427 return 0;
0428 }
0429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
0430
0431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
0432 unsigned int num)
0433 {
0434 if (num >= MAX_IPTUN_ENCAP_OPS)
0435 return -ERANGE;
0436
0437 return !cmpxchg((const struct ip_tunnel_encap_ops **)
0438 &iptun_encaps[num],
0439 NULL, ops) ? 0 : -1;
0440 }
0441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
0442
0443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
0444 unsigned int num)
0445 {
0446 int ret;
0447
0448 if (num >= MAX_IPTUN_ENCAP_OPS)
0449 return -ERANGE;
0450
0451 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
0452 &iptun_encaps[num],
0453 ops, NULL) == ops) ? 0 : -1;
0454
0455 synchronize_net();
0456
0457 return ret;
0458 }
0459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
0460
0461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
0462 struct ip_tunnel_encap *ipencap)
0463 {
0464 int hlen;
0465
0466 memset(&t->encap, 0, sizeof(t->encap));
0467
0468 hlen = ip_encap_hlen(ipencap);
0469 if (hlen < 0)
0470 return hlen;
0471
0472 t->encap.type = ipencap->type;
0473 t->encap.sport = ipencap->sport;
0474 t->encap.dport = ipencap->dport;
0475 t->encap.flags = ipencap->flags;
0476
0477 t->encap_hlen = hlen;
0478 t->hlen = t->encap_hlen + t->tun_hlen;
0479
0480 return 0;
0481 }
0482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
0483
0484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
0485 struct rtable *rt, __be16 df,
0486 const struct iphdr *inner_iph,
0487 int tunnel_hlen, __be32 dst, bool md)
0488 {
0489 struct ip_tunnel *tunnel = netdev_priv(dev);
0490 int pkt_size;
0491 int mtu;
0492
0493 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
0494 pkt_size = skb->len - tunnel_hlen;
0495 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
0496
0497 if (df) {
0498 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
0499 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
0500 } else {
0501 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
0502 }
0503
0504 if (skb_valid_dst(skb))
0505 skb_dst_update_pmtu_no_confirm(skb, mtu);
0506
0507 if (skb->protocol == htons(ETH_P_IP)) {
0508 if (!skb_is_gso(skb) &&
0509 (inner_iph->frag_off & htons(IP_DF)) &&
0510 mtu < pkt_size) {
0511 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
0512 return -E2BIG;
0513 }
0514 }
0515 #if IS_ENABLED(CONFIG_IPV6)
0516 else if (skb->protocol == htons(ETH_P_IPV6)) {
0517 struct rt6_info *rt6;
0518 __be32 daddr;
0519
0520 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
0521 NULL;
0522 daddr = md ? dst : tunnel->parms.iph.daddr;
0523
0524 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
0525 mtu >= IPV6_MIN_MTU) {
0526 if ((daddr && !ipv4_is_multicast(daddr)) ||
0527 rt6->rt6i_dst.plen == 128) {
0528 rt6->rt6i_flags |= RTF_MODIFIED;
0529 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
0530 }
0531 }
0532
0533 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
0534 mtu < pkt_size) {
0535 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
0536 return -E2BIG;
0537 }
0538 }
0539 #endif
0540 return 0;
0541 }
0542
0543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
0544 u8 proto, int tunnel_hlen)
0545 {
0546 struct ip_tunnel *tunnel = netdev_priv(dev);
0547 u32 headroom = sizeof(struct iphdr);
0548 struct ip_tunnel_info *tun_info;
0549 const struct ip_tunnel_key *key;
0550 const struct iphdr *inner_iph;
0551 struct rtable *rt = NULL;
0552 struct flowi4 fl4;
0553 __be16 df = 0;
0554 u8 tos, ttl;
0555 bool use_cache;
0556
0557 tun_info = skb_tunnel_info(skb);
0558 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
0559 ip_tunnel_info_af(tun_info) != AF_INET))
0560 goto tx_error;
0561 key = &tun_info->key;
0562 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
0563 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
0564 tos = key->tos;
0565 if (tos == 1) {
0566 if (skb->protocol == htons(ETH_P_IP))
0567 tos = inner_iph->tos;
0568 else if (skb->protocol == htons(ETH_P_IPV6))
0569 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
0570 }
0571 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
0572 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
0573 dev_net(dev), 0, skb->mark, skb_get_hash(skb),
0574 key->flow_flags);
0575 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
0576 goto tx_error;
0577
0578 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
0579 if (use_cache)
0580 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
0581 if (!rt) {
0582 rt = ip_route_output_key(tunnel->net, &fl4);
0583 if (IS_ERR(rt)) {
0584 dev->stats.tx_carrier_errors++;
0585 goto tx_error;
0586 }
0587 if (use_cache)
0588 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
0589 fl4.saddr);
0590 }
0591 if (rt->dst.dev == dev) {
0592 ip_rt_put(rt);
0593 dev->stats.collisions++;
0594 goto tx_error;
0595 }
0596
0597 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
0598 df = htons(IP_DF);
0599 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
0600 key->u.ipv4.dst, true)) {
0601 ip_rt_put(rt);
0602 goto tx_error;
0603 }
0604
0605 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
0606 ttl = key->ttl;
0607 if (ttl == 0) {
0608 if (skb->protocol == htons(ETH_P_IP))
0609 ttl = inner_iph->ttl;
0610 else if (skb->protocol == htons(ETH_P_IPV6))
0611 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
0612 else
0613 ttl = ip4_dst_hoplimit(&rt->dst);
0614 }
0615
0616 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
0617 if (headroom > dev->needed_headroom)
0618 dev->needed_headroom = headroom;
0619
0620 if (skb_cow_head(skb, dev->needed_headroom)) {
0621 ip_rt_put(rt);
0622 goto tx_dropped;
0623 }
0624 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
0625 df, !net_eq(tunnel->net, dev_net(dev)));
0626 return;
0627 tx_error:
0628 dev->stats.tx_errors++;
0629 goto kfree;
0630 tx_dropped:
0631 dev->stats.tx_dropped++;
0632 kfree:
0633 kfree_skb(skb);
0634 }
0635 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
0636
0637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
0638 const struct iphdr *tnl_params, u8 protocol)
0639 {
0640 struct ip_tunnel *tunnel = netdev_priv(dev);
0641 struct ip_tunnel_info *tun_info = NULL;
0642 const struct iphdr *inner_iph;
0643 unsigned int max_headroom;
0644 struct rtable *rt = NULL;
0645 __be16 payload_protocol;
0646 bool use_cache = false;
0647 struct flowi4 fl4;
0648 bool md = false;
0649 bool connected;
0650 u8 tos, ttl;
0651 __be32 dst;
0652 __be16 df;
0653
0654 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
0655 connected = (tunnel->parms.iph.daddr != 0);
0656 payload_protocol = skb_protocol(skb, true);
0657
0658 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
0659
0660 dst = tnl_params->daddr;
0661 if (dst == 0) {
0662
0663
0664 if (!skb_dst(skb)) {
0665 dev->stats.tx_fifo_errors++;
0666 goto tx_error;
0667 }
0668
0669 tun_info = skb_tunnel_info(skb);
0670 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
0671 ip_tunnel_info_af(tun_info) == AF_INET &&
0672 tun_info->key.u.ipv4.dst) {
0673 dst = tun_info->key.u.ipv4.dst;
0674 md = true;
0675 connected = true;
0676 } else if (payload_protocol == htons(ETH_P_IP)) {
0677 rt = skb_rtable(skb);
0678 dst = rt_nexthop(rt, inner_iph->daddr);
0679 }
0680 #if IS_ENABLED(CONFIG_IPV6)
0681 else if (payload_protocol == htons(ETH_P_IPV6)) {
0682 const struct in6_addr *addr6;
0683 struct neighbour *neigh;
0684 bool do_tx_error_icmp;
0685 int addr_type;
0686
0687 neigh = dst_neigh_lookup(skb_dst(skb),
0688 &ipv6_hdr(skb)->daddr);
0689 if (!neigh)
0690 goto tx_error;
0691
0692 addr6 = (const struct in6_addr *)&neigh->primary_key;
0693 addr_type = ipv6_addr_type(addr6);
0694
0695 if (addr_type == IPV6_ADDR_ANY) {
0696 addr6 = &ipv6_hdr(skb)->daddr;
0697 addr_type = ipv6_addr_type(addr6);
0698 }
0699
0700 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
0701 do_tx_error_icmp = true;
0702 else {
0703 do_tx_error_icmp = false;
0704 dst = addr6->s6_addr32[3];
0705 }
0706 neigh_release(neigh);
0707 if (do_tx_error_icmp)
0708 goto tx_error_icmp;
0709 }
0710 #endif
0711 else
0712 goto tx_error;
0713
0714 if (!md)
0715 connected = false;
0716 }
0717
0718 tos = tnl_params->tos;
0719 if (tos & 0x1) {
0720 tos &= ~0x1;
0721 if (payload_protocol == htons(ETH_P_IP)) {
0722 tos = inner_iph->tos;
0723 connected = false;
0724 } else if (payload_protocol == htons(ETH_P_IPV6)) {
0725 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
0726 connected = false;
0727 }
0728 }
0729
0730 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
0731 tunnel->parms.o_key, RT_TOS(tos),
0732 dev_net(dev), tunnel->parms.link,
0733 tunnel->fwmark, skb_get_hash(skb), 0);
0734
0735 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
0736 goto tx_error;
0737
0738 if (connected && md) {
0739 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
0740 if (use_cache)
0741 rt = dst_cache_get_ip4(&tun_info->dst_cache,
0742 &fl4.saddr);
0743 } else {
0744 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
0745 &fl4.saddr) : NULL;
0746 }
0747
0748 if (!rt) {
0749 rt = ip_route_output_key(tunnel->net, &fl4);
0750
0751 if (IS_ERR(rt)) {
0752 dev->stats.tx_carrier_errors++;
0753 goto tx_error;
0754 }
0755 if (use_cache)
0756 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
0757 fl4.saddr);
0758 else if (!md && connected)
0759 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
0760 fl4.saddr);
0761 }
0762
0763 if (rt->dst.dev == dev) {
0764 ip_rt_put(rt);
0765 dev->stats.collisions++;
0766 goto tx_error;
0767 }
0768
0769 df = tnl_params->frag_off;
0770 if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
0771 df |= (inner_iph->frag_off & htons(IP_DF));
0772
0773 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
0774 ip_rt_put(rt);
0775 goto tx_error;
0776 }
0777
0778 if (tunnel->err_count > 0) {
0779 if (time_before(jiffies,
0780 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
0781 tunnel->err_count--;
0782
0783 dst_link_failure(skb);
0784 } else
0785 tunnel->err_count = 0;
0786 }
0787
0788 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
0789 ttl = tnl_params->ttl;
0790 if (ttl == 0) {
0791 if (payload_protocol == htons(ETH_P_IP))
0792 ttl = inner_iph->ttl;
0793 #if IS_ENABLED(CONFIG_IPV6)
0794 else if (payload_protocol == htons(ETH_P_IPV6))
0795 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
0796 #endif
0797 else
0798 ttl = ip4_dst_hoplimit(&rt->dst);
0799 }
0800
0801 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
0802 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
0803 if (max_headroom > dev->needed_headroom)
0804 dev->needed_headroom = max_headroom;
0805
0806 if (skb_cow_head(skb, dev->needed_headroom)) {
0807 ip_rt_put(rt);
0808 dev->stats.tx_dropped++;
0809 kfree_skb(skb);
0810 return;
0811 }
0812
0813 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
0814 df, !net_eq(tunnel->net, dev_net(dev)));
0815 return;
0816
0817 #if IS_ENABLED(CONFIG_IPV6)
0818 tx_error_icmp:
0819 dst_link_failure(skb);
0820 #endif
0821 tx_error:
0822 dev->stats.tx_errors++;
0823 kfree_skb(skb);
0824 }
0825 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
0826
0827 static void ip_tunnel_update(struct ip_tunnel_net *itn,
0828 struct ip_tunnel *t,
0829 struct net_device *dev,
0830 struct ip_tunnel_parm *p,
0831 bool set_mtu,
0832 __u32 fwmark)
0833 {
0834 ip_tunnel_del(itn, t);
0835 t->parms.iph.saddr = p->iph.saddr;
0836 t->parms.iph.daddr = p->iph.daddr;
0837 t->parms.i_key = p->i_key;
0838 t->parms.o_key = p->o_key;
0839 if (dev->type != ARPHRD_ETHER) {
0840 __dev_addr_set(dev, &p->iph.saddr, 4);
0841 memcpy(dev->broadcast, &p->iph.daddr, 4);
0842 }
0843 ip_tunnel_add(itn, t);
0844
0845 t->parms.iph.ttl = p->iph.ttl;
0846 t->parms.iph.tos = p->iph.tos;
0847 t->parms.iph.frag_off = p->iph.frag_off;
0848
0849 if (t->parms.link != p->link || t->fwmark != fwmark) {
0850 int mtu;
0851
0852 t->parms.link = p->link;
0853 t->fwmark = fwmark;
0854 mtu = ip_tunnel_bind_dev(dev);
0855 if (set_mtu)
0856 dev->mtu = mtu;
0857 }
0858 dst_cache_reset(&t->dst_cache);
0859 netdev_state_change(dev);
0860 }
0861
0862 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
0863 {
0864 int err = 0;
0865 struct ip_tunnel *t = netdev_priv(dev);
0866 struct net *net = t->net;
0867 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
0868
0869 switch (cmd) {
0870 case SIOCGETTUNNEL:
0871 if (dev == itn->fb_tunnel_dev) {
0872 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
0873 if (!t)
0874 t = netdev_priv(dev);
0875 }
0876 memcpy(p, &t->parms, sizeof(*p));
0877 break;
0878
0879 case SIOCADDTUNNEL:
0880 case SIOCCHGTUNNEL:
0881 err = -EPERM;
0882 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
0883 goto done;
0884 if (p->iph.ttl)
0885 p->iph.frag_off |= htons(IP_DF);
0886 if (!(p->i_flags & VTI_ISVTI)) {
0887 if (!(p->i_flags & TUNNEL_KEY))
0888 p->i_key = 0;
0889 if (!(p->o_flags & TUNNEL_KEY))
0890 p->o_key = 0;
0891 }
0892
0893 t = ip_tunnel_find(itn, p, itn->type);
0894
0895 if (cmd == SIOCADDTUNNEL) {
0896 if (!t) {
0897 t = ip_tunnel_create(net, itn, p);
0898 err = PTR_ERR_OR_ZERO(t);
0899 break;
0900 }
0901
0902 err = -EEXIST;
0903 break;
0904 }
0905 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
0906 if (t) {
0907 if (t->dev != dev) {
0908 err = -EEXIST;
0909 break;
0910 }
0911 } else {
0912 unsigned int nflags = 0;
0913
0914 if (ipv4_is_multicast(p->iph.daddr))
0915 nflags = IFF_BROADCAST;
0916 else if (p->iph.daddr)
0917 nflags = IFF_POINTOPOINT;
0918
0919 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
0920 err = -EINVAL;
0921 break;
0922 }
0923
0924 t = netdev_priv(dev);
0925 }
0926 }
0927
0928 if (t) {
0929 err = 0;
0930 ip_tunnel_update(itn, t, dev, p, true, 0);
0931 } else {
0932 err = -ENOENT;
0933 }
0934 break;
0935
0936 case SIOCDELTUNNEL:
0937 err = -EPERM;
0938 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
0939 goto done;
0940
0941 if (dev == itn->fb_tunnel_dev) {
0942 err = -ENOENT;
0943 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
0944 if (!t)
0945 goto done;
0946 err = -EPERM;
0947 if (t == netdev_priv(itn->fb_tunnel_dev))
0948 goto done;
0949 dev = t->dev;
0950 }
0951 unregister_netdevice(dev);
0952 err = 0;
0953 break;
0954
0955 default:
0956 err = -EINVAL;
0957 }
0958
0959 done:
0960 return err;
0961 }
0962 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
0963
0964 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
0965 void __user *data, int cmd)
0966 {
0967 struct ip_tunnel_parm p;
0968 int err;
0969
0970 if (copy_from_user(&p, data, sizeof(p)))
0971 return -EFAULT;
0972 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
0973 if (!err && copy_to_user(data, &p, sizeof(p)))
0974 return -EFAULT;
0975 return err;
0976 }
0977 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
0978
0979 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
0980 {
0981 struct ip_tunnel *tunnel = netdev_priv(dev);
0982 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
0983 int max_mtu = IP_MAX_MTU - t_hlen;
0984
0985 if (dev->type == ARPHRD_ETHER)
0986 max_mtu -= dev->hard_header_len;
0987
0988 if (new_mtu < ETH_MIN_MTU)
0989 return -EINVAL;
0990
0991 if (new_mtu > max_mtu) {
0992 if (strict)
0993 return -EINVAL;
0994
0995 new_mtu = max_mtu;
0996 }
0997
0998 dev->mtu = new_mtu;
0999 return 0;
1000 }
1001 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1002
1003 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1004 {
1005 return __ip_tunnel_change_mtu(dev, new_mtu, true);
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1008
1009 static void ip_tunnel_dev_free(struct net_device *dev)
1010 {
1011 struct ip_tunnel *tunnel = netdev_priv(dev);
1012
1013 gro_cells_destroy(&tunnel->gro_cells);
1014 dst_cache_destroy(&tunnel->dst_cache);
1015 free_percpu(dev->tstats);
1016 }
1017
1018 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1019 {
1020 struct ip_tunnel *tunnel = netdev_priv(dev);
1021 struct ip_tunnel_net *itn;
1022
1023 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1024
1025 if (itn->fb_tunnel_dev != dev) {
1026 ip_tunnel_del(itn, netdev_priv(dev));
1027 unregister_netdevice_queue(dev, head);
1028 }
1029 }
1030 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1031
1032 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1033 {
1034 struct ip_tunnel *tunnel = netdev_priv(dev);
1035
1036 return tunnel->net;
1037 }
1038 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1039
1040 int ip_tunnel_get_iflink(const struct net_device *dev)
1041 {
1042 struct ip_tunnel *tunnel = netdev_priv(dev);
1043
1044 return tunnel->parms.link;
1045 }
1046 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1047
1048 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1049 struct rtnl_link_ops *ops, char *devname)
1050 {
1051 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1052 struct ip_tunnel_parm parms;
1053 unsigned int i;
1054
1055 itn->rtnl_link_ops = ops;
1056 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1057 INIT_HLIST_HEAD(&itn->tunnels[i]);
1058
1059 if (!ops || !net_has_fallback_tunnels(net)) {
1060 struct ip_tunnel_net *it_init_net;
1061
1062 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1063 itn->type = it_init_net->type;
1064 itn->fb_tunnel_dev = NULL;
1065 return 0;
1066 }
1067
1068 memset(&parms, 0, sizeof(parms));
1069 if (devname)
1070 strscpy(parms.name, devname, IFNAMSIZ);
1071
1072 rtnl_lock();
1073 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1074
1075
1076
1077 if (!IS_ERR(itn->fb_tunnel_dev)) {
1078 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1079 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1080 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1081 itn->type = itn->fb_tunnel_dev->type;
1082 }
1083 rtnl_unlock();
1084
1085 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1086 }
1087 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1088
1089 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1090 struct list_head *head,
1091 struct rtnl_link_ops *ops)
1092 {
1093 struct net_device *dev, *aux;
1094 int h;
1095
1096 for_each_netdev_safe(net, dev, aux)
1097 if (dev->rtnl_link_ops == ops)
1098 unregister_netdevice_queue(dev, head);
1099
1100 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1101 struct ip_tunnel *t;
1102 struct hlist_node *n;
1103 struct hlist_head *thead = &itn->tunnels[h];
1104
1105 hlist_for_each_entry_safe(t, n, thead, hash_node)
1106
1107
1108
1109 if (!net_eq(dev_net(t->dev), net))
1110 unregister_netdevice_queue(t->dev, head);
1111 }
1112 }
1113
1114 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1115 struct rtnl_link_ops *ops)
1116 {
1117 struct ip_tunnel_net *itn;
1118 struct net *net;
1119 LIST_HEAD(list);
1120
1121 rtnl_lock();
1122 list_for_each_entry(net, net_list, exit_list) {
1123 itn = net_generic(net, id);
1124 ip_tunnel_destroy(net, itn, &list, ops);
1125 }
1126 unregister_netdevice_many(&list);
1127 rtnl_unlock();
1128 }
1129 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1130
1131 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1132 struct ip_tunnel_parm *p, __u32 fwmark)
1133 {
1134 struct ip_tunnel *nt;
1135 struct net *net = dev_net(dev);
1136 struct ip_tunnel_net *itn;
1137 int mtu;
1138 int err;
1139
1140 nt = netdev_priv(dev);
1141 itn = net_generic(net, nt->ip_tnl_net_id);
1142
1143 if (nt->collect_md) {
1144 if (rtnl_dereference(itn->collect_md_tun))
1145 return -EEXIST;
1146 } else {
1147 if (ip_tunnel_find(itn, p, dev->type))
1148 return -EEXIST;
1149 }
1150
1151 nt->net = net;
1152 nt->parms = *p;
1153 nt->fwmark = fwmark;
1154 err = register_netdevice(dev);
1155 if (err)
1156 goto err_register_netdevice;
1157
1158 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1159 eth_hw_addr_random(dev);
1160
1161 mtu = ip_tunnel_bind_dev(dev);
1162 if (tb[IFLA_MTU]) {
1163 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1164
1165 if (dev->type == ARPHRD_ETHER)
1166 max -= dev->hard_header_len;
1167
1168 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1169 }
1170
1171 err = dev_set_mtu(dev, mtu);
1172 if (err)
1173 goto err_dev_set_mtu;
1174
1175 ip_tunnel_add(itn, nt);
1176 return 0;
1177
1178 err_dev_set_mtu:
1179 unregister_netdevice(dev);
1180 err_register_netdevice:
1181 return err;
1182 }
1183 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1184
1185 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1186 struct ip_tunnel_parm *p, __u32 fwmark)
1187 {
1188 struct ip_tunnel *t;
1189 struct ip_tunnel *tunnel = netdev_priv(dev);
1190 struct net *net = tunnel->net;
1191 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1192
1193 if (dev == itn->fb_tunnel_dev)
1194 return -EINVAL;
1195
1196 t = ip_tunnel_find(itn, p, dev->type);
1197
1198 if (t) {
1199 if (t->dev != dev)
1200 return -EEXIST;
1201 } else {
1202 t = tunnel;
1203
1204 if (dev->type != ARPHRD_ETHER) {
1205 unsigned int nflags = 0;
1206
1207 if (ipv4_is_multicast(p->iph.daddr))
1208 nflags = IFF_BROADCAST;
1209 else if (p->iph.daddr)
1210 nflags = IFF_POINTOPOINT;
1211
1212 if ((dev->flags ^ nflags) &
1213 (IFF_POINTOPOINT | IFF_BROADCAST))
1214 return -EINVAL;
1215 }
1216 }
1217
1218 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1219 return 0;
1220 }
1221 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1222
1223 int ip_tunnel_init(struct net_device *dev)
1224 {
1225 struct ip_tunnel *tunnel = netdev_priv(dev);
1226 struct iphdr *iph = &tunnel->parms.iph;
1227 int err;
1228
1229 dev->needs_free_netdev = true;
1230 dev->priv_destructor = ip_tunnel_dev_free;
1231 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1232 if (!dev->tstats)
1233 return -ENOMEM;
1234
1235 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1236 if (err) {
1237 free_percpu(dev->tstats);
1238 return err;
1239 }
1240
1241 err = gro_cells_init(&tunnel->gro_cells, dev);
1242 if (err) {
1243 dst_cache_destroy(&tunnel->dst_cache);
1244 free_percpu(dev->tstats);
1245 return err;
1246 }
1247
1248 tunnel->dev = dev;
1249 tunnel->net = dev_net(dev);
1250 strcpy(tunnel->parms.name, dev->name);
1251 iph->version = 4;
1252 iph->ihl = 5;
1253
1254 if (tunnel->collect_md)
1255 netif_keep_dst(dev);
1256 return 0;
1257 }
1258 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1259
1260 void ip_tunnel_uninit(struct net_device *dev)
1261 {
1262 struct ip_tunnel *tunnel = netdev_priv(dev);
1263 struct net *net = tunnel->net;
1264 struct ip_tunnel_net *itn;
1265
1266 itn = net_generic(net, tunnel->ip_tnl_net_id);
1267 ip_tunnel_del(itn, netdev_priv(dev));
1268 if (itn->fb_tunnel_dev == dev)
1269 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1270
1271 dst_cache_reset(&tunnel->dst_cache);
1272 }
1273 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1274
1275
1276 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1277 {
1278 struct ip_tunnel *tunnel = netdev_priv(dev);
1279 tunnel->ip_tnl_net_id = net_id;
1280 }
1281 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1282
1283 MODULE_LICENSE("GPL");