0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/errno.h>
0026 #include <linux/kernel.h>
0027 #include <linux/string.h>
0028 #include <linux/socket.h>
0029 #include <linux/net.h>
0030 #include <linux/netdevice.h>
0031 #include <linux/if_arp.h>
0032 #include <linux/in6.h>
0033 #include <linux/tcp.h>
0034 #include <linux/route.h>
0035 #include <linux/module.h>
0036 #include <linux/slab.h>
0037
0038 #include <linux/bpf-cgroup.h>
0039 #include <linux/netfilter.h>
0040 #include <linux/netfilter_ipv6.h>
0041
0042 #include <net/sock.h>
0043 #include <net/snmp.h>
0044
0045 #include <net/ipv6.h>
0046 #include <net/ndisc.h>
0047 #include <net/protocol.h>
0048 #include <net/ip6_route.h>
0049 #include <net/addrconf.h>
0050 #include <net/rawv6.h>
0051 #include <net/icmp.h>
0052 #include <net/xfrm.h>
0053 #include <net/checksum.h>
0054 #include <linux/mroute6.h>
0055 #include <net/l3mdev.h>
0056 #include <net/lwtunnel.h>
0057 #include <net/ip_tunnels.h>
0058
0059 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
0060 {
0061 struct dst_entry *dst = skb_dst(skb);
0062 struct net_device *dev = dst->dev;
0063 struct inet6_dev *idev = ip6_dst_idev(dst);
0064 unsigned int hh_len = LL_RESERVED_SPACE(dev);
0065 const struct in6_addr *daddr, *nexthop;
0066 struct ipv6hdr *hdr;
0067 struct neighbour *neigh;
0068 int ret;
0069
0070
0071 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
0072 skb = skb_expand_head(skb, hh_len);
0073 if (!skb) {
0074 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
0075 return -ENOMEM;
0076 }
0077 }
0078
0079 hdr = ipv6_hdr(skb);
0080 daddr = &hdr->daddr;
0081 if (ipv6_addr_is_multicast(daddr)) {
0082 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
0083 ((mroute6_is_socket(net, skb) &&
0084 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
0085 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
0086 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
0087
0088
0089
0090
0091 if (newskb)
0092 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
0093 net, sk, newskb, NULL, newskb->dev,
0094 dev_loopback_xmit);
0095
0096 if (hdr->hop_limit == 0) {
0097 IP6_INC_STATS(net, idev,
0098 IPSTATS_MIB_OUTDISCARDS);
0099 kfree_skb(skb);
0100 return 0;
0101 }
0102 }
0103
0104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
0105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
0106 !(dev->flags & IFF_LOOPBACK)) {
0107 kfree_skb(skb);
0108 return 0;
0109 }
0110 }
0111
0112 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
0113 int res = lwtunnel_xmit(skb);
0114
0115 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
0116 return res;
0117 }
0118
0119 rcu_read_lock_bh();
0120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
0121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
0122
0123 if (unlikely(IS_ERR_OR_NULL(neigh))) {
0124 if (unlikely(!neigh))
0125 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
0126 if (IS_ERR(neigh)) {
0127 rcu_read_unlock_bh();
0128 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
0129 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
0130 return -EINVAL;
0131 }
0132 }
0133 sock_confirm_neigh(skb, neigh);
0134 ret = neigh_output(neigh, skb, false);
0135 rcu_read_unlock_bh();
0136 return ret;
0137 }
0138
0139 static int
0140 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
0141 struct sk_buff *skb, unsigned int mtu)
0142 {
0143 struct sk_buff *segs, *nskb;
0144 netdev_features_t features;
0145 int ret = 0;
0146
0147
0148
0149
0150
0151 features = netif_skb_features(skb);
0152 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
0153 if (IS_ERR_OR_NULL(segs)) {
0154 kfree_skb(skb);
0155 return -ENOMEM;
0156 }
0157
0158 consume_skb(skb);
0159
0160 skb_list_walk_safe(segs, segs, nskb) {
0161 int err;
0162
0163 skb_mark_not_on_list(segs);
0164 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
0165 if (err && ret == 0)
0166 ret = err;
0167 }
0168
0169 return ret;
0170 }
0171
0172 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
0173 {
0174 unsigned int mtu;
0175
0176 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
0177
0178 if (skb_dst(skb)->xfrm) {
0179 IP6CB(skb)->flags |= IP6SKB_REROUTED;
0180 return dst_output(net, sk, skb);
0181 }
0182 #endif
0183
0184 mtu = ip6_skb_dst_mtu(skb);
0185 if (skb_is_gso(skb) &&
0186 !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
0187 !skb_gso_validate_network_len(skb, mtu))
0188 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
0189
0190 if ((skb->len > mtu && !skb_is_gso(skb)) ||
0191 dst_allfrag(skb_dst(skb)) ||
0192 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
0193 return ip6_fragment(net, sk, skb, ip6_finish_output2);
0194 else
0195 return ip6_finish_output2(net, sk, skb);
0196 }
0197
0198 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
0199 {
0200 int ret;
0201
0202 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
0203 switch (ret) {
0204 case NET_XMIT_SUCCESS:
0205 case NET_XMIT_CN:
0206 return __ip6_finish_output(net, sk, skb) ? : ret;
0207 default:
0208 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
0209 return ret;
0210 }
0211 }
0212
0213 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
0214 {
0215 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
0216 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
0217
0218 skb->protocol = htons(ETH_P_IPV6);
0219 skb->dev = dev;
0220
0221 if (unlikely(idev->cnf.disable_ipv6)) {
0222 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
0223 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
0224 return 0;
0225 }
0226
0227 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
0228 net, sk, skb, indev, dev,
0229 ip6_finish_output,
0230 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
0231 }
0232 EXPORT_SYMBOL(ip6_output);
0233
0234 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
0235 {
0236 if (!np->autoflowlabel_set)
0237 return ip6_default_np_autolabel(net);
0238 else
0239 return np->autoflowlabel;
0240 }
0241
0242
0243
0244
0245
0246
0247
0248 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
0249 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
0250 {
0251 struct net *net = sock_net(sk);
0252 const struct ipv6_pinfo *np = inet6_sk(sk);
0253 struct in6_addr *first_hop = &fl6->daddr;
0254 struct dst_entry *dst = skb_dst(skb);
0255 struct net_device *dev = dst->dev;
0256 struct inet6_dev *idev = ip6_dst_idev(dst);
0257 struct hop_jumbo_hdr *hop_jumbo;
0258 int hoplen = sizeof(*hop_jumbo);
0259 unsigned int head_room;
0260 struct ipv6hdr *hdr;
0261 u8 proto = fl6->flowi6_proto;
0262 int seg_len = skb->len;
0263 int hlimit = -1;
0264 u32 mtu;
0265
0266 head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
0267 if (opt)
0268 head_room += opt->opt_nflen + opt->opt_flen;
0269
0270 if (unlikely(head_room > skb_headroom(skb))) {
0271 skb = skb_expand_head(skb, head_room);
0272 if (!skb) {
0273 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
0274 return -ENOBUFS;
0275 }
0276 }
0277
0278 if (opt) {
0279 seg_len += opt->opt_nflen + opt->opt_flen;
0280
0281 if (opt->opt_flen)
0282 ipv6_push_frag_opts(skb, opt, &proto);
0283
0284 if (opt->opt_nflen)
0285 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
0286 &fl6->saddr);
0287 }
0288
0289 if (unlikely(seg_len > IPV6_MAXPLEN)) {
0290 hop_jumbo = skb_push(skb, hoplen);
0291
0292 hop_jumbo->nexthdr = proto;
0293 hop_jumbo->hdrlen = 0;
0294 hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
0295 hop_jumbo->tlv_len = 4;
0296 hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
0297
0298 proto = IPPROTO_HOPOPTS;
0299 seg_len = 0;
0300 IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
0301 }
0302
0303 skb_push(skb, sizeof(struct ipv6hdr));
0304 skb_reset_network_header(skb);
0305 hdr = ipv6_hdr(skb);
0306
0307
0308
0309
0310 if (np)
0311 hlimit = np->hop_limit;
0312 if (hlimit < 0)
0313 hlimit = ip6_dst_hoplimit(dst);
0314
0315 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
0316 ip6_autoflowlabel(net, np), fl6));
0317
0318 hdr->payload_len = htons(seg_len);
0319 hdr->nexthdr = proto;
0320 hdr->hop_limit = hlimit;
0321
0322 hdr->saddr = fl6->saddr;
0323 hdr->daddr = *first_hop;
0324
0325 skb->protocol = htons(ETH_P_IPV6);
0326 skb->priority = priority;
0327 skb->mark = mark;
0328
0329 mtu = dst_mtu(dst);
0330 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
0331 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
0332
0333
0334
0335
0336 skb = l3mdev_ip6_out((struct sock *)sk, skb);
0337 if (unlikely(!skb))
0338 return 0;
0339
0340
0341
0342
0343 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
0344 net, (struct sock *)sk, skb, NULL, dev,
0345 dst_output);
0346 }
0347
0348 skb->dev = dev;
0349
0350
0351
0352 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
0353
0354 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
0355 kfree_skb(skb);
0356 return -EMSGSIZE;
0357 }
0358 EXPORT_SYMBOL(ip6_xmit);
0359
0360 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
0361 {
0362 struct ip6_ra_chain *ra;
0363 struct sock *last = NULL;
0364
0365 read_lock(&ip6_ra_lock);
0366 for (ra = ip6_ra_chain; ra; ra = ra->next) {
0367 struct sock *sk = ra->sk;
0368 if (sk && ra->sel == sel &&
0369 (!sk->sk_bound_dev_if ||
0370 sk->sk_bound_dev_if == skb->dev->ifindex)) {
0371 struct ipv6_pinfo *np = inet6_sk(sk);
0372
0373 if (np && np->rtalert_isolate &&
0374 !net_eq(sock_net(sk), dev_net(skb->dev))) {
0375 continue;
0376 }
0377 if (last) {
0378 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
0379 if (skb2)
0380 rawv6_rcv(last, skb2);
0381 }
0382 last = sk;
0383 }
0384 }
0385
0386 if (last) {
0387 rawv6_rcv(last, skb);
0388 read_unlock(&ip6_ra_lock);
0389 return 1;
0390 }
0391 read_unlock(&ip6_ra_lock);
0392 return 0;
0393 }
0394
0395 static int ip6_forward_proxy_check(struct sk_buff *skb)
0396 {
0397 struct ipv6hdr *hdr = ipv6_hdr(skb);
0398 u8 nexthdr = hdr->nexthdr;
0399 __be16 frag_off;
0400 int offset;
0401
0402 if (ipv6_ext_hdr(nexthdr)) {
0403 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
0404 if (offset < 0)
0405 return 0;
0406 } else
0407 offset = sizeof(struct ipv6hdr);
0408
0409 if (nexthdr == IPPROTO_ICMPV6) {
0410 struct icmp6hdr *icmp6;
0411
0412 if (!pskb_may_pull(skb, (skb_network_header(skb) +
0413 offset + 1 - skb->data)))
0414 return 0;
0415
0416 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
0417
0418 switch (icmp6->icmp6_type) {
0419 case NDISC_ROUTER_SOLICITATION:
0420 case NDISC_ROUTER_ADVERTISEMENT:
0421 case NDISC_NEIGHBOUR_SOLICITATION:
0422 case NDISC_NEIGHBOUR_ADVERTISEMENT:
0423 case NDISC_REDIRECT:
0424
0425
0426
0427
0428 return 1;
0429 default:
0430 break;
0431 }
0432 }
0433
0434
0435
0436
0437
0438
0439 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
0440 dst_link_failure(skb);
0441 return -1;
0442 }
0443
0444 return 0;
0445 }
0446
0447 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
0448 struct sk_buff *skb)
0449 {
0450 struct dst_entry *dst = skb_dst(skb);
0451
0452 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
0453 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
0454
0455 #ifdef CONFIG_NET_SWITCHDEV
0456 if (skb->offload_l3_fwd_mark) {
0457 consume_skb(skb);
0458 return 0;
0459 }
0460 #endif
0461
0462 skb_clear_tstamp(skb);
0463 return dst_output(net, sk, skb);
0464 }
0465
0466 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
0467 {
0468 if (skb->len <= mtu)
0469 return false;
0470
0471
0472 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
0473 return true;
0474
0475 if (skb->ignore_df)
0476 return false;
0477
0478 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
0479 return false;
0480
0481 return true;
0482 }
0483
0484 int ip6_forward(struct sk_buff *skb)
0485 {
0486 struct dst_entry *dst = skb_dst(skb);
0487 struct ipv6hdr *hdr = ipv6_hdr(skb);
0488 struct inet6_skb_parm *opt = IP6CB(skb);
0489 struct net *net = dev_net(dst->dev);
0490 struct inet6_dev *idev;
0491 SKB_DR(reason);
0492 u32 mtu;
0493
0494 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
0495 if (net->ipv6.devconf_all->forwarding == 0)
0496 goto error;
0497
0498 if (skb->pkt_type != PACKET_HOST)
0499 goto drop;
0500
0501 if (unlikely(skb->sk))
0502 goto drop;
0503
0504 if (skb_warn_if_lro(skb))
0505 goto drop;
0506
0507 if (!net->ipv6.devconf_all->disable_policy &&
0508 (!idev || !idev->cnf.disable_policy) &&
0509 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
0510 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
0511 goto drop;
0512 }
0513
0514 skb_forward_csum(skb);
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
0530 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
0531 return 0;
0532 }
0533
0534
0535
0536
0537 if (hdr->hop_limit <= 1) {
0538 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
0539 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
0540
0541 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
0542 return -ETIMEDOUT;
0543 }
0544
0545
0546 if (net->ipv6.devconf_all->proxy_ndp &&
0547 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
0548 int proxied = ip6_forward_proxy_check(skb);
0549 if (proxied > 0) {
0550 hdr->hop_limit--;
0551 return ip6_input(skb);
0552 } else if (proxied < 0) {
0553 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
0554 goto drop;
0555 }
0556 }
0557
0558 if (!xfrm6_route_forward(skb)) {
0559 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
0560 SKB_DR_SET(reason, XFRM_POLICY);
0561 goto drop;
0562 }
0563 dst = skb_dst(skb);
0564
0565
0566
0567
0568
0569 if (IP6CB(skb)->iif == dst->dev->ifindex &&
0570 opt->srcrt == 0 && !skb_sec_path(skb)) {
0571 struct in6_addr *target = NULL;
0572 struct inet_peer *peer;
0573 struct rt6_info *rt;
0574
0575
0576
0577
0578
0579
0580 rt = (struct rt6_info *) dst;
0581 if (rt->rt6i_flags & RTF_GATEWAY)
0582 target = &rt->rt6i_gateway;
0583 else
0584 target = &hdr->daddr;
0585
0586 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
0587
0588
0589
0590
0591 if (inet_peer_xrlim_allow(peer, 1*HZ))
0592 ndisc_send_redirect(skb, target);
0593 if (peer)
0594 inet_putpeer(peer);
0595 } else {
0596 int addrtype = ipv6_addr_type(&hdr->saddr);
0597
0598
0599 if (addrtype == IPV6_ADDR_ANY ||
0600 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
0601 goto error;
0602 if (addrtype & IPV6_ADDR_LINKLOCAL) {
0603 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
0604 ICMPV6_NOT_NEIGHBOUR, 0);
0605 goto error;
0606 }
0607 }
0608
0609 mtu = ip6_dst_mtu_maybe_forward(dst, true);
0610 if (mtu < IPV6_MIN_MTU)
0611 mtu = IPV6_MIN_MTU;
0612
0613 if (ip6_pkt_too_big(skb, mtu)) {
0614
0615 skb->dev = dst->dev;
0616 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
0617 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
0618 __IP6_INC_STATS(net, ip6_dst_idev(dst),
0619 IPSTATS_MIB_FRAGFAILS);
0620 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
0621 return -EMSGSIZE;
0622 }
0623
0624 if (skb_cow(skb, dst->dev->hard_header_len)) {
0625 __IP6_INC_STATS(net, ip6_dst_idev(dst),
0626 IPSTATS_MIB_OUTDISCARDS);
0627 goto drop;
0628 }
0629
0630 hdr = ipv6_hdr(skb);
0631
0632
0633
0634 hdr->hop_limit--;
0635
0636 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
0637 net, NULL, skb, skb->dev, dst->dev,
0638 ip6_forward_finish);
0639
0640 error:
0641 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
0642 SKB_DR_SET(reason, IP_INADDRERRORS);
0643 drop:
0644 kfree_skb_reason(skb, reason);
0645 return -EINVAL;
0646 }
0647
0648 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
0649 {
0650 to->pkt_type = from->pkt_type;
0651 to->priority = from->priority;
0652 to->protocol = from->protocol;
0653 skb_dst_drop(to);
0654 skb_dst_set(to, dst_clone(skb_dst(from)));
0655 to->dev = from->dev;
0656 to->mark = from->mark;
0657
0658 skb_copy_hash(to, from);
0659
0660 #ifdef CONFIG_NET_SCHED
0661 to->tc_index = from->tc_index;
0662 #endif
0663 nf_copy(to, from);
0664 skb_ext_copy(to, from);
0665 skb_copy_secmark(to, from);
0666 }
0667
0668 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
0669 u8 nexthdr, __be32 frag_id,
0670 struct ip6_fraglist_iter *iter)
0671 {
0672 unsigned int first_len;
0673 struct frag_hdr *fh;
0674
0675
0676 *prevhdr = NEXTHDR_FRAGMENT;
0677 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
0678 if (!iter->tmp_hdr)
0679 return -ENOMEM;
0680
0681 iter->frag = skb_shinfo(skb)->frag_list;
0682 skb_frag_list_init(skb);
0683
0684 iter->offset = 0;
0685 iter->hlen = hlen;
0686 iter->frag_id = frag_id;
0687 iter->nexthdr = nexthdr;
0688
0689 __skb_pull(skb, hlen);
0690 fh = __skb_push(skb, sizeof(struct frag_hdr));
0691 __skb_push(skb, hlen);
0692 skb_reset_network_header(skb);
0693 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
0694
0695 fh->nexthdr = nexthdr;
0696 fh->reserved = 0;
0697 fh->frag_off = htons(IP6_MF);
0698 fh->identification = frag_id;
0699
0700 first_len = skb_pagelen(skb);
0701 skb->data_len = first_len - skb_headlen(skb);
0702 skb->len = first_len;
0703 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
0704
0705 return 0;
0706 }
0707 EXPORT_SYMBOL(ip6_fraglist_init);
0708
0709 void ip6_fraglist_prepare(struct sk_buff *skb,
0710 struct ip6_fraglist_iter *iter)
0711 {
0712 struct sk_buff *frag = iter->frag;
0713 unsigned int hlen = iter->hlen;
0714 struct frag_hdr *fh;
0715
0716 frag->ip_summed = CHECKSUM_NONE;
0717 skb_reset_transport_header(frag);
0718 fh = __skb_push(frag, sizeof(struct frag_hdr));
0719 __skb_push(frag, hlen);
0720 skb_reset_network_header(frag);
0721 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
0722 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
0723 fh->nexthdr = iter->nexthdr;
0724 fh->reserved = 0;
0725 fh->frag_off = htons(iter->offset);
0726 if (frag->next)
0727 fh->frag_off |= htons(IP6_MF);
0728 fh->identification = iter->frag_id;
0729 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
0730 ip6_copy_metadata(frag, skb);
0731 }
0732 EXPORT_SYMBOL(ip6_fraglist_prepare);
0733
0734 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
0735 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
0736 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
0737 {
0738 state->prevhdr = prevhdr;
0739 state->nexthdr = nexthdr;
0740 state->frag_id = frag_id;
0741
0742 state->hlen = hlen;
0743 state->mtu = mtu;
0744
0745 state->left = skb->len - hlen;
0746 state->ptr = hlen;
0747
0748 state->hroom = hdr_room;
0749 state->troom = needed_tailroom;
0750
0751 state->offset = 0;
0752 }
0753 EXPORT_SYMBOL(ip6_frag_init);
0754
0755 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
0756 {
0757 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
0758 struct sk_buff *frag;
0759 struct frag_hdr *fh;
0760 unsigned int len;
0761
0762 len = state->left;
0763
0764 if (len > state->mtu)
0765 len = state->mtu;
0766
0767
0768 if (len < state->left)
0769 len &= ~7;
0770
0771
0772 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
0773 state->hroom + state->troom, GFP_ATOMIC);
0774 if (!frag)
0775 return ERR_PTR(-ENOMEM);
0776
0777
0778
0779
0780
0781 ip6_copy_metadata(frag, skb);
0782 skb_reserve(frag, state->hroom);
0783 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
0784 skb_reset_network_header(frag);
0785 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
0786 frag->transport_header = (frag->network_header + state->hlen +
0787 sizeof(struct frag_hdr));
0788
0789
0790
0791
0792
0793 if (skb->sk)
0794 skb_set_owner_w(frag, skb->sk);
0795
0796
0797
0798
0799 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
0800
0801 fragnexthdr_offset = skb_network_header(frag);
0802 fragnexthdr_offset += prevhdr - skb_network_header(skb);
0803 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
0804
0805
0806
0807
0808 fh->nexthdr = state->nexthdr;
0809 fh->reserved = 0;
0810 fh->identification = state->frag_id;
0811
0812
0813
0814
0815 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
0816 len));
0817 state->left -= len;
0818
0819 fh->frag_off = htons(state->offset);
0820 if (state->left > 0)
0821 fh->frag_off |= htons(IP6_MF);
0822 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
0823
0824 state->ptr += len;
0825 state->offset += len;
0826
0827 return frag;
0828 }
0829 EXPORT_SYMBOL(ip6_frag_next);
0830
0831 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
0832 int (*output)(struct net *, struct sock *, struct sk_buff *))
0833 {
0834 struct sk_buff *frag;
0835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
0836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
0837 inet6_sk(skb->sk) : NULL;
0838 bool mono_delivery_time = skb->mono_delivery_time;
0839 struct ip6_frag_state state;
0840 unsigned int mtu, hlen, nexthdr_offset;
0841 ktime_t tstamp = skb->tstamp;
0842 int hroom, err = 0;
0843 __be32 frag_id;
0844 u8 *prevhdr, nexthdr = 0;
0845
0846 err = ip6_find_1stfragopt(skb, &prevhdr);
0847 if (err < 0)
0848 goto fail;
0849 hlen = err;
0850 nexthdr = *prevhdr;
0851 nexthdr_offset = prevhdr - skb_network_header(skb);
0852
0853 mtu = ip6_skb_dst_mtu(skb);
0854
0855
0856
0857
0858 if (unlikely(!skb->ignore_df && skb->len > mtu))
0859 goto fail_toobig;
0860
0861 if (IP6CB(skb)->frag_max_size) {
0862 if (IP6CB(skb)->frag_max_size > mtu)
0863 goto fail_toobig;
0864
0865
0866 mtu = IP6CB(skb)->frag_max_size;
0867 if (mtu < IPV6_MIN_MTU)
0868 mtu = IPV6_MIN_MTU;
0869 }
0870
0871 if (np && np->frag_size < mtu) {
0872 if (np->frag_size)
0873 mtu = np->frag_size;
0874 }
0875 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
0876 goto fail_toobig;
0877 mtu -= hlen + sizeof(struct frag_hdr);
0878
0879 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
0880 &ipv6_hdr(skb)->saddr);
0881
0882 if (skb->ip_summed == CHECKSUM_PARTIAL &&
0883 (err = skb_checksum_help(skb)))
0884 goto fail;
0885
0886 prevhdr = skb_network_header(skb) + nexthdr_offset;
0887 hroom = LL_RESERVED_SPACE(rt->dst.dev);
0888 if (skb_has_frag_list(skb)) {
0889 unsigned int first_len = skb_pagelen(skb);
0890 struct ip6_fraglist_iter iter;
0891 struct sk_buff *frag2;
0892
0893 if (first_len - hlen > mtu ||
0894 ((first_len - hlen) & 7) ||
0895 skb_cloned(skb) ||
0896 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
0897 goto slow_path;
0898
0899 skb_walk_frags(skb, frag) {
0900
0901 if (frag->len > mtu ||
0902 ((frag->len & 7) && frag->next) ||
0903 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
0904 goto slow_path_clean;
0905
0906
0907 if (skb_shared(frag))
0908 goto slow_path_clean;
0909
0910 BUG_ON(frag->sk);
0911 if (skb->sk) {
0912 frag->sk = skb->sk;
0913 frag->destructor = sock_wfree;
0914 }
0915 skb->truesize -= frag->truesize;
0916 }
0917
0918 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
0919 &iter);
0920 if (err < 0)
0921 goto fail;
0922
0923 for (;;) {
0924
0925
0926 if (iter.frag)
0927 ip6_fraglist_prepare(skb, &iter);
0928
0929 skb_set_delivery_time(skb, tstamp, mono_delivery_time);
0930 err = output(net, sk, skb);
0931 if (!err)
0932 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
0933 IPSTATS_MIB_FRAGCREATES);
0934
0935 if (err || !iter.frag)
0936 break;
0937
0938 skb = ip6_fraglist_next(&iter);
0939 }
0940
0941 kfree(iter.tmp_hdr);
0942
0943 if (err == 0) {
0944 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
0945 IPSTATS_MIB_FRAGOKS);
0946 return 0;
0947 }
0948
0949 kfree_skb_list(iter.frag);
0950
0951 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
0952 IPSTATS_MIB_FRAGFAILS);
0953 return err;
0954
0955 slow_path_clean:
0956 skb_walk_frags(skb, frag2) {
0957 if (frag2 == frag)
0958 break;
0959 frag2->sk = NULL;
0960 frag2->destructor = NULL;
0961 skb->truesize += frag2->truesize;
0962 }
0963 }
0964
0965 slow_path:
0966
0967
0968
0969
0970 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
0971 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
0972 &state);
0973
0974
0975
0976
0977
0978 while (state.left > 0) {
0979 frag = ip6_frag_next(skb, &state);
0980 if (IS_ERR(frag)) {
0981 err = PTR_ERR(frag);
0982 goto fail;
0983 }
0984
0985
0986
0987
0988 skb_set_delivery_time(frag, tstamp, mono_delivery_time);
0989 err = output(net, sk, frag);
0990 if (err)
0991 goto fail;
0992
0993 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
0994 IPSTATS_MIB_FRAGCREATES);
0995 }
0996 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
0997 IPSTATS_MIB_FRAGOKS);
0998 consume_skb(skb);
0999 return err;
1000
1001 fail_toobig:
1002 if (skb->sk && dst_allfrag(skb_dst(skb)))
1003 sk_gso_disable(skb->sk);
1004
1005 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1006 err = -EMSGSIZE;
1007
1008 fail:
1009 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1010 IPSTATS_MIB_FRAGFAILS);
1011 kfree_skb(skb);
1012 return err;
1013 }
1014
1015 static inline int ip6_rt_check(const struct rt6key *rt_key,
1016 const struct in6_addr *fl_addr,
1017 const struct in6_addr *addr_cache)
1018 {
1019 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1020 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1021 }
1022
1023 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1024 struct dst_entry *dst,
1025 const struct flowi6 *fl6)
1026 {
1027 struct ipv6_pinfo *np = inet6_sk(sk);
1028 struct rt6_info *rt;
1029
1030 if (!dst)
1031 goto out;
1032
1033 if (dst->ops->family != AF_INET6) {
1034 dst_release(dst);
1035 return NULL;
1036 }
1037
1038 rt = (struct rt6_info *)dst;
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1057 #ifdef CONFIG_IPV6_SUBTREES
1058 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1059 #endif
1060 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
1061 dst_release(dst);
1062 dst = NULL;
1063 }
1064
1065 out:
1066 return dst;
1067 }
1068
1069 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1070 struct dst_entry **dst, struct flowi6 *fl6)
1071 {
1072 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1073 struct neighbour *n;
1074 struct rt6_info *rt;
1075 #endif
1076 int err;
1077 int flags = 0;
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088 if (ipv6_addr_any(&fl6->saddr)) {
1089 struct fib6_info *from;
1090 struct rt6_info *rt;
1091
1092 *dst = ip6_route_output(net, sk, fl6);
1093 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1094
1095 rcu_read_lock();
1096 from = rt ? rcu_dereference(rt->from) : NULL;
1097 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1098 sk ? inet6_sk(sk)->srcprefs : 0,
1099 &fl6->saddr);
1100 rcu_read_unlock();
1101
1102 if (err)
1103 goto out_err_release;
1104
1105
1106
1107
1108
1109 if ((*dst)->error) {
1110 dst_release(*dst);
1111 *dst = NULL;
1112 }
1113
1114 if (fl6->flowi6_oif)
1115 flags |= RT6_LOOKUP_F_IFACE;
1116 }
1117
1118 if (!*dst)
1119 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1120
1121 err = (*dst)->error;
1122 if (err)
1123 goto out_err_release;
1124
1125 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1126
1127
1128
1129
1130
1131
1132
1133
1134 rt = (struct rt6_info *) *dst;
1135 rcu_read_lock_bh();
1136 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1137 rt6_nexthop(rt, &fl6->daddr));
1138 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1139 rcu_read_unlock_bh();
1140
1141 if (err) {
1142 struct inet6_ifaddr *ifp;
1143 struct flowi6 fl_gw6;
1144 int redirect;
1145
1146 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1147 (*dst)->dev, 1);
1148
1149 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1150 if (ifp)
1151 in6_ifa_put(ifp);
1152
1153 if (redirect) {
1154
1155
1156
1157
1158 dst_release(*dst);
1159 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1160 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1161 *dst = ip6_route_output(net, sk, &fl_gw6);
1162 err = (*dst)->error;
1163 if (err)
1164 goto out_err_release;
1165 }
1166 }
1167 #endif
1168 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1169 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1170 err = -EAFNOSUPPORT;
1171 goto out_err_release;
1172 }
1173
1174 return 0;
1175
1176 out_err_release:
1177 dst_release(*dst);
1178 *dst = NULL;
1179
1180 if (err == -ENETUNREACH)
1181 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1182 return err;
1183 }
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1197 struct flowi6 *fl6)
1198 {
1199 *dst = NULL;
1200 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1201 }
1202 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1217 const struct in6_addr *final_dst)
1218 {
1219 struct dst_entry *dst = NULL;
1220 int err;
1221
1222 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1223 if (err)
1224 return ERR_PTR(err);
1225 if (final_dst)
1226 fl6->daddr = *final_dst;
1227
1228 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1229 }
1230 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1251 const struct in6_addr *final_dst,
1252 bool connected)
1253 {
1254 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1255
1256 dst = ip6_sk_dst_check(sk, dst, fl6);
1257 if (dst)
1258 return dst;
1259
1260 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1261 if (connected && !IS_ERR(dst))
1262 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1263
1264 return dst;
1265 }
1266 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1285 struct net_device *dev,
1286 struct net *net,
1287 struct socket *sock,
1288 struct in6_addr *saddr,
1289 const struct ip_tunnel_info *info,
1290 u8 protocol,
1291 bool use_cache)
1292 {
1293 struct dst_entry *dst = NULL;
1294 #ifdef CONFIG_DST_CACHE
1295 struct dst_cache *dst_cache;
1296 #endif
1297 struct flowi6 fl6;
1298 __u8 prio;
1299
1300 #ifdef CONFIG_DST_CACHE
1301 dst_cache = (struct dst_cache *)&info->dst_cache;
1302 if (use_cache) {
1303 dst = dst_cache_get_ip6(dst_cache, saddr);
1304 if (dst)
1305 return dst;
1306 }
1307 #endif
1308 memset(&fl6, 0, sizeof(fl6));
1309 fl6.flowi6_mark = skb->mark;
1310 fl6.flowi6_proto = protocol;
1311 fl6.daddr = info->key.u.ipv6.dst;
1312 fl6.saddr = info->key.u.ipv6.src;
1313 prio = info->key.tos;
1314 fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1315
1316 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1317 NULL);
1318 if (IS_ERR(dst)) {
1319 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1320 return ERR_PTR(-ENETUNREACH);
1321 }
1322 if (dst->dev == dev) {
1323 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1324 dst_release(dst);
1325 return ERR_PTR(-ELOOP);
1326 }
1327 #ifdef CONFIG_DST_CACHE
1328 if (use_cache)
1329 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1330 #endif
1331 *saddr = fl6.saddr;
1332 return dst;
1333 }
1334 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1335
1336 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1337 gfp_t gfp)
1338 {
1339 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1340 }
1341
1342 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1343 gfp_t gfp)
1344 {
1345 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1346 }
1347
1348 static void ip6_append_data_mtu(unsigned int *mtu,
1349 int *maxfraglen,
1350 unsigned int fragheaderlen,
1351 struct sk_buff *skb,
1352 struct rt6_info *rt,
1353 unsigned int orig_mtu)
1354 {
1355 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1356 if (!skb) {
1357
1358 *mtu = orig_mtu - rt->dst.header_len;
1359
1360 } else {
1361
1362
1363
1364
1365 *mtu = orig_mtu;
1366 }
1367 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1368 + fragheaderlen - sizeof(struct frag_hdr);
1369 }
1370 }
1371
1372 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1373 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1374 struct rt6_info *rt)
1375 {
1376 struct ipv6_pinfo *np = inet6_sk(sk);
1377 unsigned int mtu;
1378 struct ipv6_txoptions *nopt, *opt = ipc6->opt;
1379
1380
1381
1382
1383 cork->base.dst = &rt->dst;
1384
1385
1386
1387
1388 if (opt) {
1389 if (WARN_ON(v6_cork->opt))
1390 return -EINVAL;
1391
1392 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1393 if (unlikely(!nopt))
1394 return -ENOBUFS;
1395
1396 nopt->tot_len = sizeof(*opt);
1397 nopt->opt_flen = opt->opt_flen;
1398 nopt->opt_nflen = opt->opt_nflen;
1399
1400 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
1401 if (opt->dst0opt && !nopt->dst0opt)
1402 return -ENOBUFS;
1403
1404 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
1405 if (opt->dst1opt && !nopt->dst1opt)
1406 return -ENOBUFS;
1407
1408 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
1409 if (opt->hopopt && !nopt->hopopt)
1410 return -ENOBUFS;
1411
1412 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
1413 if (opt->srcrt && !nopt->srcrt)
1414 return -ENOBUFS;
1415
1416
1417 }
1418 v6_cork->hop_limit = ipc6->hlimit;
1419 v6_cork->tclass = ipc6->tclass;
1420 if (rt->dst.flags & DST_XFRM_TUNNEL)
1421 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1422 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1423 else
1424 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1425 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1426 if (np->frag_size < mtu) {
1427 if (np->frag_size)
1428 mtu = np->frag_size;
1429 }
1430 cork->base.fragsize = mtu;
1431 cork->base.gso_size = ipc6->gso_size;
1432 cork->base.tx_flags = 0;
1433 cork->base.mark = ipc6->sockc.mark;
1434 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1435
1436 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1437 cork->base.flags |= IPCORK_ALLFRAG;
1438 cork->base.length = 0;
1439
1440 cork->base.transmit_time = ipc6->sockc.transmit_time;
1441
1442 return 0;
1443 }
1444
1445 static int __ip6_append_data(struct sock *sk,
1446 struct sk_buff_head *queue,
1447 struct inet_cork_full *cork_full,
1448 struct inet6_cork *v6_cork,
1449 struct page_frag *pfrag,
1450 int getfrag(void *from, char *to, int offset,
1451 int len, int odd, struct sk_buff *skb),
1452 void *from, size_t length, int transhdrlen,
1453 unsigned int flags, struct ipcm6_cookie *ipc6)
1454 {
1455 struct sk_buff *skb, *skb_prev = NULL;
1456 struct inet_cork *cork = &cork_full->base;
1457 struct flowi6 *fl6 = &cork_full->fl.u.ip6;
1458 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1459 struct ubuf_info *uarg = NULL;
1460 int exthdrlen = 0;
1461 int dst_exthdrlen = 0;
1462 int hh_len;
1463 int copy;
1464 int err;
1465 int offset = 0;
1466 bool zc = false;
1467 u32 tskey = 0;
1468 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1469 struct ipv6_txoptions *opt = v6_cork->opt;
1470 int csummode = CHECKSUM_NONE;
1471 unsigned int maxnonfragsize, headersize;
1472 unsigned int wmem_alloc_delta = 0;
1473 bool paged, extra_uref = false;
1474
1475 skb = skb_peek_tail(queue);
1476 if (!skb) {
1477 exthdrlen = opt ? opt->opt_flen : 0;
1478 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1479 }
1480
1481 paged = !!cork->gso_size;
1482 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1483 orig_mtu = mtu;
1484
1485 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1486 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1487 tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1488
1489 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1490
1491 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1492 (opt ? opt->opt_nflen : 0);
1493
1494 headersize = sizeof(struct ipv6hdr) +
1495 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1496 (dst_allfrag(&rt->dst) ?
1497 sizeof(struct frag_hdr) : 0) +
1498 rt->rt6i_nfheader_len;
1499
1500 if (mtu <= fragheaderlen ||
1501 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1502 goto emsgsize;
1503
1504 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1505 sizeof(struct frag_hdr);
1506
1507
1508
1509
1510 if (headersize + transhdrlen > mtu)
1511 goto emsgsize;
1512
1513 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1514 (sk->sk_protocol == IPPROTO_UDP ||
1515 sk->sk_protocol == IPPROTO_ICMPV6 ||
1516 sk->sk_protocol == IPPROTO_RAW)) {
1517 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1518 sizeof(struct ipv6hdr));
1519 goto emsgsize;
1520 }
1521
1522 if (ip6_sk_ignore_df(sk))
1523 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1524 else
1525 maxnonfragsize = mtu;
1526
1527 if (cork->length + length > maxnonfragsize - headersize) {
1528 emsgsize:
1529 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1530 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1531 return -EMSGSIZE;
1532 }
1533
1534
1535
1536
1537 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1538 headersize == sizeof(struct ipv6hdr) &&
1539 length <= mtu - headersize &&
1540 (!(flags & MSG_MORE) || cork->gso_size) &&
1541 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1542 csummode = CHECKSUM_PARTIAL;
1543
1544 if ((flags & MSG_ZEROCOPY) && length) {
1545 struct msghdr *msg = from;
1546
1547 if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
1548 if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
1549 return -EINVAL;
1550
1551
1552
1553
1554 if ((rt->dst.dev->features & NETIF_F_SG) &&
1555 csummode == CHECKSUM_PARTIAL) {
1556 paged = true;
1557 zc = true;
1558 uarg = msg->msg_ubuf;
1559 }
1560 } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
1561 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1562 if (!uarg)
1563 return -ENOBUFS;
1564 extra_uref = !skb_zcopy(skb);
1565 if (rt->dst.dev->features & NETIF_F_SG &&
1566 csummode == CHECKSUM_PARTIAL) {
1567 paged = true;
1568 zc = true;
1569 } else {
1570 uarg->zerocopy = 0;
1571 skb_zcopy_set(skb, uarg, &extra_uref);
1572 }
1573 }
1574 }
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592 cork->length += length;
1593 if (!skb)
1594 goto alloc_new_skb;
1595
1596 while (length > 0) {
1597
1598 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1599 if (copy < length)
1600 copy = maxfraglen - skb->len;
1601
1602 if (copy <= 0) {
1603 char *data;
1604 unsigned int datalen;
1605 unsigned int fraglen;
1606 unsigned int fraggap;
1607 unsigned int alloclen, alloc_extra;
1608 unsigned int pagedlen;
1609 alloc_new_skb:
1610
1611 if (skb)
1612 fraggap = skb->len - maxfraglen;
1613 else
1614 fraggap = 0;
1615
1616 if (!skb || !skb_prev)
1617 ip6_append_data_mtu(&mtu, &maxfraglen,
1618 fragheaderlen, skb, rt,
1619 orig_mtu);
1620
1621 skb_prev = skb;
1622
1623
1624
1625
1626
1627 datalen = length + fraggap;
1628
1629 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1630 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1631 fraglen = datalen + fragheaderlen;
1632 pagedlen = 0;
1633
1634 alloc_extra = hh_len;
1635 alloc_extra += dst_exthdrlen;
1636 alloc_extra += rt->dst.trailer_len;
1637
1638
1639
1640
1641
1642 alloc_extra += sizeof(struct frag_hdr);
1643
1644 if ((flags & MSG_MORE) &&
1645 !(rt->dst.dev->features&NETIF_F_SG))
1646 alloclen = mtu;
1647 else if (!paged &&
1648 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1649 !(rt->dst.dev->features & NETIF_F_SG)))
1650 alloclen = fraglen;
1651 else if (!zc) {
1652 alloclen = min_t(int, fraglen, MAX_HEADER);
1653 pagedlen = fraglen - alloclen;
1654 } else {
1655 alloclen = fragheaderlen + transhdrlen;
1656 pagedlen = datalen - transhdrlen;
1657 }
1658 alloclen += alloc_extra;
1659
1660 if (datalen != length + fraggap) {
1661
1662
1663
1664
1665 datalen += rt->dst.trailer_len;
1666 }
1667
1668 fraglen = datalen + fragheaderlen;
1669
1670 copy = datalen - transhdrlen - fraggap - pagedlen;
1671 if (copy < 0) {
1672 err = -EINVAL;
1673 goto error;
1674 }
1675 if (transhdrlen) {
1676 skb = sock_alloc_send_skb(sk, alloclen,
1677 (flags & MSG_DONTWAIT), &err);
1678 } else {
1679 skb = NULL;
1680 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1681 2 * sk->sk_sndbuf)
1682 skb = alloc_skb(alloclen,
1683 sk->sk_allocation);
1684 if (unlikely(!skb))
1685 err = -ENOBUFS;
1686 }
1687 if (!skb)
1688 goto error;
1689
1690
1691
1692 skb->protocol = htons(ETH_P_IPV6);
1693 skb->ip_summed = csummode;
1694 skb->csum = 0;
1695
1696 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1697 dst_exthdrlen);
1698
1699
1700
1701
1702 data = skb_put(skb, fraglen - pagedlen);
1703 skb_set_network_header(skb, exthdrlen);
1704 data += fragheaderlen;
1705 skb->transport_header = (skb->network_header +
1706 fragheaderlen);
1707 if (fraggap) {
1708 skb->csum = skb_copy_and_csum_bits(
1709 skb_prev, maxfraglen,
1710 data + transhdrlen, fraggap);
1711 skb_prev->csum = csum_sub(skb_prev->csum,
1712 skb->csum);
1713 data += fraggap;
1714 pskb_trim_unique(skb_prev, maxfraglen);
1715 }
1716 if (copy > 0 &&
1717 getfrag(from, data + transhdrlen, offset,
1718 copy, fraggap, skb) < 0) {
1719 err = -EFAULT;
1720 kfree_skb(skb);
1721 goto error;
1722 }
1723
1724 offset += copy;
1725 length -= copy + transhdrlen;
1726 transhdrlen = 0;
1727 exthdrlen = 0;
1728 dst_exthdrlen = 0;
1729
1730
1731 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1732 cork->tx_flags = 0;
1733 skb_shinfo(skb)->tskey = tskey;
1734 tskey = 0;
1735 skb_zcopy_set(skb, uarg, &extra_uref);
1736
1737 if ((flags & MSG_CONFIRM) && !skb_prev)
1738 skb_set_dst_pending_confirm(skb, 1);
1739
1740
1741
1742
1743 if (!skb->destructor) {
1744 skb->destructor = sock_wfree;
1745 skb->sk = sk;
1746 wmem_alloc_delta += skb->truesize;
1747 }
1748 __skb_queue_tail(queue, skb);
1749 continue;
1750 }
1751
1752 if (copy > length)
1753 copy = length;
1754
1755 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1756 skb_tailroom(skb) >= copy) {
1757 unsigned int off;
1758
1759 off = skb->len;
1760 if (getfrag(from, skb_put(skb, copy),
1761 offset, copy, off, skb) < 0) {
1762 __skb_trim(skb, off);
1763 err = -EFAULT;
1764 goto error;
1765 }
1766 } else if (!zc) {
1767 int i = skb_shinfo(skb)->nr_frags;
1768
1769 err = -ENOMEM;
1770 if (!sk_page_frag_refill(sk, pfrag))
1771 goto error;
1772
1773 skb_zcopy_downgrade_managed(skb);
1774 if (!skb_can_coalesce(skb, i, pfrag->page,
1775 pfrag->offset)) {
1776 err = -EMSGSIZE;
1777 if (i == MAX_SKB_FRAGS)
1778 goto error;
1779
1780 __skb_fill_page_desc(skb, i, pfrag->page,
1781 pfrag->offset, 0);
1782 skb_shinfo(skb)->nr_frags = ++i;
1783 get_page(pfrag->page);
1784 }
1785 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1786 if (getfrag(from,
1787 page_address(pfrag->page) + pfrag->offset,
1788 offset, copy, skb->len, skb) < 0)
1789 goto error_efault;
1790
1791 pfrag->offset += copy;
1792 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1793 skb->len += copy;
1794 skb->data_len += copy;
1795 skb->truesize += copy;
1796 wmem_alloc_delta += copy;
1797 } else {
1798 err = skb_zerocopy_iter_dgram(skb, from, copy);
1799 if (err < 0)
1800 goto error;
1801 }
1802 offset += copy;
1803 length -= copy;
1804 }
1805
1806 if (wmem_alloc_delta)
1807 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1808 return 0;
1809
1810 error_efault:
1811 err = -EFAULT;
1812 error:
1813 net_zcopy_put_abort(uarg, extra_uref);
1814 cork->length -= length;
1815 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1816 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1817 return err;
1818 }
1819
1820 int ip6_append_data(struct sock *sk,
1821 int getfrag(void *from, char *to, int offset, int len,
1822 int odd, struct sk_buff *skb),
1823 void *from, size_t length, int transhdrlen,
1824 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1825 struct rt6_info *rt, unsigned int flags)
1826 {
1827 struct inet_sock *inet = inet_sk(sk);
1828 struct ipv6_pinfo *np = inet6_sk(sk);
1829 int exthdrlen;
1830 int err;
1831
1832 if (flags&MSG_PROBE)
1833 return 0;
1834 if (skb_queue_empty(&sk->sk_write_queue)) {
1835
1836
1837
1838 dst_hold(&rt->dst);
1839 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1840 ipc6, rt);
1841 if (err)
1842 return err;
1843
1844 inet->cork.fl.u.ip6 = *fl6;
1845 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1846 length += exthdrlen;
1847 transhdrlen += exthdrlen;
1848 } else {
1849 transhdrlen = 0;
1850 }
1851
1852 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
1853 &np->cork, sk_page_frag(sk), getfrag,
1854 from, length, transhdrlen, flags, ipc6);
1855 }
1856 EXPORT_SYMBOL_GPL(ip6_append_data);
1857
1858 static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
1859 {
1860 struct dst_entry *dst = cork->base.dst;
1861
1862 cork->base.dst = NULL;
1863 cork->base.flags &= ~IPCORK_ALLFRAG;
1864 skb_dst_set(skb, dst);
1865 }
1866
1867 static void ip6_cork_release(struct inet_cork_full *cork,
1868 struct inet6_cork *v6_cork)
1869 {
1870 if (v6_cork->opt) {
1871 struct ipv6_txoptions *opt = v6_cork->opt;
1872
1873 kfree(opt->dst0opt);
1874 kfree(opt->dst1opt);
1875 kfree(opt->hopopt);
1876 kfree(opt->srcrt);
1877 kfree(opt);
1878 v6_cork->opt = NULL;
1879 }
1880
1881 if (cork->base.dst) {
1882 dst_release(cork->base.dst);
1883 cork->base.dst = NULL;
1884 cork->base.flags &= ~IPCORK_ALLFRAG;
1885 }
1886 }
1887
1888 struct sk_buff *__ip6_make_skb(struct sock *sk,
1889 struct sk_buff_head *queue,
1890 struct inet_cork_full *cork,
1891 struct inet6_cork *v6_cork)
1892 {
1893 struct sk_buff *skb, *tmp_skb;
1894 struct sk_buff **tail_skb;
1895 struct in6_addr *final_dst;
1896 struct ipv6_pinfo *np = inet6_sk(sk);
1897 struct net *net = sock_net(sk);
1898 struct ipv6hdr *hdr;
1899 struct ipv6_txoptions *opt = v6_cork->opt;
1900 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1901 struct flowi6 *fl6 = &cork->fl.u.ip6;
1902 unsigned char proto = fl6->flowi6_proto;
1903
1904 skb = __skb_dequeue(queue);
1905 if (!skb)
1906 goto out;
1907 tail_skb = &(skb_shinfo(skb)->frag_list);
1908
1909
1910 if (skb->data < skb_network_header(skb))
1911 __skb_pull(skb, skb_network_offset(skb));
1912 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1913 __skb_pull(tmp_skb, skb_network_header_len(skb));
1914 *tail_skb = tmp_skb;
1915 tail_skb = &(tmp_skb->next);
1916 skb->len += tmp_skb->len;
1917 skb->data_len += tmp_skb->len;
1918 skb->truesize += tmp_skb->truesize;
1919 tmp_skb->destructor = NULL;
1920 tmp_skb->sk = NULL;
1921 }
1922
1923
1924 skb->ignore_df = ip6_sk_ignore_df(sk);
1925 __skb_pull(skb, skb_network_header_len(skb));
1926
1927 final_dst = &fl6->daddr;
1928 if (opt && opt->opt_flen)
1929 ipv6_push_frag_opts(skb, opt, &proto);
1930 if (opt && opt->opt_nflen)
1931 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1932
1933 skb_push(skb, sizeof(struct ipv6hdr));
1934 skb_reset_network_header(skb);
1935 hdr = ipv6_hdr(skb);
1936
1937 ip6_flow_hdr(hdr, v6_cork->tclass,
1938 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1939 ip6_autoflowlabel(net, np), fl6));
1940 hdr->hop_limit = v6_cork->hop_limit;
1941 hdr->nexthdr = proto;
1942 hdr->saddr = fl6->saddr;
1943 hdr->daddr = *final_dst;
1944
1945 skb->priority = sk->sk_priority;
1946 skb->mark = cork->base.mark;
1947 skb->tstamp = cork->base.transmit_time;
1948
1949 ip6_cork_steal_dst(skb, cork);
1950 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1951 if (proto == IPPROTO_ICMPV6) {
1952 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1953
1954 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1955 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1956 }
1957
1958 ip6_cork_release(cork, v6_cork);
1959 out:
1960 return skb;
1961 }
1962
1963 int ip6_send_skb(struct sk_buff *skb)
1964 {
1965 struct net *net = sock_net(skb->sk);
1966 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1967 int err;
1968
1969 err = ip6_local_out(net, skb->sk, skb);
1970 if (err) {
1971 if (err > 0)
1972 err = net_xmit_errno(err);
1973 if (err)
1974 IP6_INC_STATS(net, rt->rt6i_idev,
1975 IPSTATS_MIB_OUTDISCARDS);
1976 }
1977
1978 return err;
1979 }
1980
1981 int ip6_push_pending_frames(struct sock *sk)
1982 {
1983 struct sk_buff *skb;
1984
1985 skb = ip6_finish_skb(sk);
1986 if (!skb)
1987 return 0;
1988
1989 return ip6_send_skb(skb);
1990 }
1991 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1992
1993 static void __ip6_flush_pending_frames(struct sock *sk,
1994 struct sk_buff_head *queue,
1995 struct inet_cork_full *cork,
1996 struct inet6_cork *v6_cork)
1997 {
1998 struct sk_buff *skb;
1999
2000 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
2001 if (skb_dst(skb))
2002 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
2003 IPSTATS_MIB_OUTDISCARDS);
2004 kfree_skb(skb);
2005 }
2006
2007 ip6_cork_release(cork, v6_cork);
2008 }
2009
2010 void ip6_flush_pending_frames(struct sock *sk)
2011 {
2012 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
2013 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
2014 }
2015 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
2016
2017 struct sk_buff *ip6_make_skb(struct sock *sk,
2018 int getfrag(void *from, char *to, int offset,
2019 int len, int odd, struct sk_buff *skb),
2020 void *from, size_t length, int transhdrlen,
2021 struct ipcm6_cookie *ipc6, struct rt6_info *rt,
2022 unsigned int flags, struct inet_cork_full *cork)
2023 {
2024 struct inet6_cork v6_cork;
2025 struct sk_buff_head queue;
2026 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2027 int err;
2028
2029 if (flags & MSG_PROBE) {
2030 dst_release(&rt->dst);
2031 return NULL;
2032 }
2033
2034 __skb_queue_head_init(&queue);
2035
2036 cork->base.flags = 0;
2037 cork->base.addr = 0;
2038 cork->base.opt = NULL;
2039 v6_cork.opt = NULL;
2040 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
2041 if (err) {
2042 ip6_cork_release(cork, &v6_cork);
2043 return ERR_PTR(err);
2044 }
2045 if (ipc6->dontfrag < 0)
2046 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2047
2048 err = __ip6_append_data(sk, &queue, cork, &v6_cork,
2049 ¤t->task_frag, getfrag, from,
2050 length + exthdrlen, transhdrlen + exthdrlen,
2051 flags, ipc6);
2052 if (err) {
2053 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2054 return ERR_PTR(err);
2055 }
2056
2057 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2058 }