Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  IPv6 fragment reassembly
0004  *  Linux INET6 implementation
0005  *
0006  *  Authors:
0007  *  Pedro Roque     <roque@di.fc.ul.pt>
0008  *
0009  *  Based on: net/ipv4/ip_fragment.c
0010  */
0011 
0012 /*
0013  *  Fixes:
0014  *  Andi Kleen  Make it work with multiple hosts.
0015  *          More RFC compliance.
0016  *
0017  *      Horst von Brand Add missing #include <linux/string.h>
0018  *  Alexey Kuznetsov    SMP races, threading, cleanup.
0019  *  Patrick McHardy     LRU queue of frag heads for evictor.
0020  *  Mitsuru KANDA @USAGI    Register inet6_protocol{}.
0021  *  David Stevens and
0022  *  YOSHIFUJI,H. @USAGI Always remove fragment header to
0023  *              calculate ICV correctly.
0024  */
0025 
0026 #define pr_fmt(fmt) "IPv6: " fmt
0027 
0028 #include <linux/errno.h>
0029 #include <linux/types.h>
0030 #include <linux/string.h>
0031 #include <linux/socket.h>
0032 #include <linux/sockios.h>
0033 #include <linux/jiffies.h>
0034 #include <linux/net.h>
0035 #include <linux/list.h>
0036 #include <linux/netdevice.h>
0037 #include <linux/in6.h>
0038 #include <linux/ipv6.h>
0039 #include <linux/icmpv6.h>
0040 #include <linux/random.h>
0041 #include <linux/jhash.h>
0042 #include <linux/skbuff.h>
0043 #include <linux/slab.h>
0044 #include <linux/export.h>
0045 #include <linux/tcp.h>
0046 #include <linux/udp.h>
0047 
0048 #include <net/sock.h>
0049 #include <net/snmp.h>
0050 
0051 #include <net/ipv6.h>
0052 #include <net/ip6_route.h>
0053 #include <net/protocol.h>
0054 #include <net/transp_v6.h>
0055 #include <net/rawv6.h>
0056 #include <net/ndisc.h>
0057 #include <net/addrconf.h>
0058 #include <net/ipv6_frag.h>
0059 #include <net/inet_ecn.h>
0060 
0061 static const char ip6_frag_cache_name[] = "ip6-frags";
0062 
0063 static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
0064 {
0065     return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
0066 }
0067 
0068 static struct inet_frags ip6_frags;
0069 
0070 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
0071               struct sk_buff *prev_tail, struct net_device *dev);
0072 
0073 static void ip6_frag_expire(struct timer_list *t)
0074 {
0075     struct inet_frag_queue *frag = from_timer(frag, t, timer);
0076     struct frag_queue *fq;
0077 
0078     fq = container_of(frag, struct frag_queue, q);
0079 
0080     ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
0081 }
0082 
0083 static struct frag_queue *
0084 fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
0085 {
0086     struct frag_v6_compare_key key = {
0087         .id = id,
0088         .saddr = hdr->saddr,
0089         .daddr = hdr->daddr,
0090         .user = IP6_DEFRAG_LOCAL_DELIVER,
0091         .iif = iif,
0092     };
0093     struct inet_frag_queue *q;
0094 
0095     if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
0096                         IPV6_ADDR_LINKLOCAL)))
0097         key.iif = 0;
0098 
0099     q = inet_frag_find(net->ipv6.fqdir, &key);
0100     if (!q)
0101         return NULL;
0102 
0103     return container_of(q, struct frag_queue, q);
0104 }
0105 
0106 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
0107               struct frag_hdr *fhdr, int nhoff,
0108               u32 *prob_offset)
0109 {
0110     struct net *net = dev_net(skb_dst(skb)->dev);
0111     int offset, end, fragsize;
0112     struct sk_buff *prev_tail;
0113     struct net_device *dev;
0114     int err = -ENOENT;
0115     u8 ecn;
0116 
0117     if (fq->q.flags & INET_FRAG_COMPLETE)
0118         goto err;
0119 
0120     err = -EINVAL;
0121     offset = ntohs(fhdr->frag_off) & ~0x7;
0122     end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
0123             ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
0124 
0125     if ((unsigned int)end > IPV6_MAXPLEN) {
0126         *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
0127         /* note that if prob_offset is set, the skb is freed elsewhere,
0128          * we do not free it here.
0129          */
0130         return -1;
0131     }
0132 
0133     ecn = ip6_frag_ecn(ipv6_hdr(skb));
0134 
0135     if (skb->ip_summed == CHECKSUM_COMPLETE) {
0136         const unsigned char *nh = skb_network_header(skb);
0137         skb->csum = csum_sub(skb->csum,
0138                      csum_partial(nh, (u8 *)(fhdr + 1) - nh,
0139                           0));
0140     }
0141 
0142     /* Is this the final fragment? */
0143     if (!(fhdr->frag_off & htons(IP6_MF))) {
0144         /* If we already have some bits beyond end
0145          * or have different end, the segment is corrupted.
0146          */
0147         if (end < fq->q.len ||
0148             ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
0149             goto discard_fq;
0150         fq->q.flags |= INET_FRAG_LAST_IN;
0151         fq->q.len = end;
0152     } else {
0153         /* Check if the fragment is rounded to 8 bytes.
0154          * Required by the RFC.
0155          */
0156         if (end & 0x7) {
0157             /* RFC2460 says always send parameter problem in
0158              * this case. -DaveM
0159              */
0160             *prob_offset = offsetof(struct ipv6hdr, payload_len);
0161             return -1;
0162         }
0163         if (end > fq->q.len) {
0164             /* Some bits beyond end -> corruption. */
0165             if (fq->q.flags & INET_FRAG_LAST_IN)
0166                 goto discard_fq;
0167             fq->q.len = end;
0168         }
0169     }
0170 
0171     if (end == offset)
0172         goto discard_fq;
0173 
0174     err = -ENOMEM;
0175     /* Point into the IP datagram 'data' part. */
0176     if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
0177         goto discard_fq;
0178 
0179     err = pskb_trim_rcsum(skb, end - offset);
0180     if (err)
0181         goto discard_fq;
0182 
0183     /* Note : skb->rbnode and skb->dev share the same location. */
0184     dev = skb->dev;
0185     /* Makes sure compiler wont do silly aliasing games */
0186     barrier();
0187 
0188     prev_tail = fq->q.fragments_tail;
0189     err = inet_frag_queue_insert(&fq->q, skb, offset, end);
0190     if (err)
0191         goto insert_error;
0192 
0193     if (dev)
0194         fq->iif = dev->ifindex;
0195 
0196     fq->q.stamp = skb->tstamp;
0197     fq->q.mono_delivery_time = skb->mono_delivery_time;
0198     fq->q.meat += skb->len;
0199     fq->ecn |= ecn;
0200     add_frag_mem_limit(fq->q.fqdir, skb->truesize);
0201 
0202     fragsize = -skb_network_offset(skb) + skb->len;
0203     if (fragsize > fq->q.max_size)
0204         fq->q.max_size = fragsize;
0205 
0206     /* The first fragment.
0207      * nhoffset is obtained from the first fragment, of course.
0208      */
0209     if (offset == 0) {
0210         fq->nhoffset = nhoff;
0211         fq->q.flags |= INET_FRAG_FIRST_IN;
0212     }
0213 
0214     if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
0215         fq->q.meat == fq->q.len) {
0216         unsigned long orefdst = skb->_skb_refdst;
0217 
0218         skb->_skb_refdst = 0UL;
0219         err = ip6_frag_reasm(fq, skb, prev_tail, dev);
0220         skb->_skb_refdst = orefdst;
0221         return err;
0222     }
0223 
0224     skb_dst_drop(skb);
0225     return -EINPROGRESS;
0226 
0227 insert_error:
0228     if (err == IPFRAG_DUP) {
0229         kfree_skb(skb);
0230         return -EINVAL;
0231     }
0232     err = -EINVAL;
0233     __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
0234             IPSTATS_MIB_REASM_OVERLAPS);
0235 discard_fq:
0236     inet_frag_kill(&fq->q);
0237     __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
0238             IPSTATS_MIB_REASMFAILS);
0239 err:
0240     kfree_skb(skb);
0241     return err;
0242 }
0243 
0244 /*
0245  *  Check if this packet is complete.
0246  *
0247  *  It is called with locked fq, and caller must check that
0248  *  queue is eligible for reassembly i.e. it is not COMPLETE,
0249  *  the last and the first frames arrived and all the bits are here.
0250  */
0251 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
0252               struct sk_buff *prev_tail, struct net_device *dev)
0253 {
0254     struct net *net = fq->q.fqdir->net;
0255     unsigned int nhoff;
0256     void *reasm_data;
0257     int payload_len;
0258     u8 ecn;
0259 
0260     inet_frag_kill(&fq->q);
0261 
0262     ecn = ip_frag_ecn_table[fq->ecn];
0263     if (unlikely(ecn == 0xff))
0264         goto out_fail;
0265 
0266     reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
0267     if (!reasm_data)
0268         goto out_oom;
0269 
0270     payload_len = ((skb->data - skb_network_header(skb)) -
0271                sizeof(struct ipv6hdr) + fq->q.len -
0272                sizeof(struct frag_hdr));
0273     if (payload_len > IPV6_MAXPLEN)
0274         goto out_oversize;
0275 
0276     /* We have to remove fragment header from datagram and to relocate
0277      * header in order to calculate ICV correctly. */
0278     nhoff = fq->nhoffset;
0279     skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
0280     memmove(skb->head + sizeof(struct frag_hdr), skb->head,
0281         (skb->data - skb->head) - sizeof(struct frag_hdr));
0282     if (skb_mac_header_was_set(skb))
0283         skb->mac_header += sizeof(struct frag_hdr);
0284     skb->network_header += sizeof(struct frag_hdr);
0285 
0286     skb_reset_transport_header(skb);
0287 
0288     inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
0289 
0290     skb->dev = dev;
0291     ipv6_hdr(skb)->payload_len = htons(payload_len);
0292     ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
0293     IP6CB(skb)->nhoff = nhoff;
0294     IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
0295     IP6CB(skb)->frag_max_size = fq->q.max_size;
0296 
0297     /* Yes, and fold redundant checksum back. 8) */
0298     skb_postpush_rcsum(skb, skb_network_header(skb),
0299                skb_network_header_len(skb));
0300 
0301     rcu_read_lock();
0302     __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
0303     rcu_read_unlock();
0304     fq->q.rb_fragments = RB_ROOT;
0305     fq->q.fragments_tail = NULL;
0306     fq->q.last_run_head = NULL;
0307     return 1;
0308 
0309 out_oversize:
0310     net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
0311     goto out_fail;
0312 out_oom:
0313     net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
0314 out_fail:
0315     rcu_read_lock();
0316     __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
0317     rcu_read_unlock();
0318     inet_frag_kill(&fq->q);
0319     return -1;
0320 }
0321 
0322 static int ipv6_frag_rcv(struct sk_buff *skb)
0323 {
0324     struct frag_hdr *fhdr;
0325     struct frag_queue *fq;
0326     const struct ipv6hdr *hdr = ipv6_hdr(skb);
0327     struct net *net = dev_net(skb_dst(skb)->dev);
0328     u8 nexthdr;
0329     int iif;
0330 
0331     if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
0332         goto fail_hdr;
0333 
0334     __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
0335 
0336     /* Jumbo payload inhibits frag. header */
0337     if (hdr->payload_len == 0)
0338         goto fail_hdr;
0339 
0340     if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
0341                  sizeof(struct frag_hdr))))
0342         goto fail_hdr;
0343 
0344     hdr = ipv6_hdr(skb);
0345     fhdr = (struct frag_hdr *)skb_transport_header(skb);
0346 
0347     if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
0348         /* It is not a fragmented frame */
0349         skb->transport_header += sizeof(struct frag_hdr);
0350         __IP6_INC_STATS(net,
0351                 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
0352 
0353         IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
0354         IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
0355         IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
0356                         sizeof(struct ipv6hdr);
0357         return 1;
0358     }
0359 
0360     /* RFC 8200, Section 4.5 Fragment Header:
0361      * If the first fragment does not include all headers through an
0362      * Upper-Layer header, then that fragment should be discarded and
0363      * an ICMP Parameter Problem, Code 3, message should be sent to
0364      * the source of the fragment, with the Pointer field set to zero.
0365      */
0366     nexthdr = hdr->nexthdr;
0367     if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
0368         __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
0369                 IPSTATS_MIB_INHDRERRORS);
0370         icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
0371         return -1;
0372     }
0373 
0374     iif = skb->dev ? skb->dev->ifindex : 0;
0375     fq = fq_find(net, fhdr->identification, hdr, iif);
0376     if (fq) {
0377         u32 prob_offset = 0;
0378         int ret;
0379 
0380         spin_lock(&fq->q.lock);
0381 
0382         fq->iif = iif;
0383         ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
0384                      &prob_offset);
0385 
0386         spin_unlock(&fq->q.lock);
0387         inet_frag_put(&fq->q);
0388         if (prob_offset) {
0389             __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
0390                     IPSTATS_MIB_INHDRERRORS);
0391             /* icmpv6_param_prob() calls kfree_skb(skb) */
0392             icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
0393         }
0394         return ret;
0395     }
0396 
0397     __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
0398     kfree_skb(skb);
0399     return -1;
0400 
0401 fail_hdr:
0402     __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
0403             IPSTATS_MIB_INHDRERRORS);
0404     icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
0405     return -1;
0406 }
0407 
0408 static const struct inet6_protocol frag_protocol = {
0409     .handler    =   ipv6_frag_rcv,
0410     .flags      =   INET6_PROTO_NOPOLICY,
0411 };
0412 
0413 #ifdef CONFIG_SYSCTL
0414 
0415 static struct ctl_table ip6_frags_ns_ctl_table[] = {
0416     {
0417         .procname   = "ip6frag_high_thresh",
0418         .maxlen     = sizeof(unsigned long),
0419         .mode       = 0644,
0420         .proc_handler   = proc_doulongvec_minmax,
0421     },
0422     {
0423         .procname   = "ip6frag_low_thresh",
0424         .maxlen     = sizeof(unsigned long),
0425         .mode       = 0644,
0426         .proc_handler   = proc_doulongvec_minmax,
0427     },
0428     {
0429         .procname   = "ip6frag_time",
0430         .maxlen     = sizeof(int),
0431         .mode       = 0644,
0432         .proc_handler   = proc_dointvec_jiffies,
0433     },
0434     { }
0435 };
0436 
0437 /* secret interval has been deprecated */
0438 static int ip6_frags_secret_interval_unused;
0439 static struct ctl_table ip6_frags_ctl_table[] = {
0440     {
0441         .procname   = "ip6frag_secret_interval",
0442         .data       = &ip6_frags_secret_interval_unused,
0443         .maxlen     = sizeof(int),
0444         .mode       = 0644,
0445         .proc_handler   = proc_dointvec_jiffies,
0446     },
0447     { }
0448 };
0449 
0450 static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
0451 {
0452     struct ctl_table *table;
0453     struct ctl_table_header *hdr;
0454 
0455     table = ip6_frags_ns_ctl_table;
0456     if (!net_eq(net, &init_net)) {
0457         table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
0458         if (!table)
0459             goto err_alloc;
0460 
0461     }
0462     table[0].data   = &net->ipv6.fqdir->high_thresh;
0463     table[0].extra1 = &net->ipv6.fqdir->low_thresh;
0464     table[1].data   = &net->ipv6.fqdir->low_thresh;
0465     table[1].extra2 = &net->ipv6.fqdir->high_thresh;
0466     table[2].data   = &net->ipv6.fqdir->timeout;
0467 
0468     hdr = register_net_sysctl(net, "net/ipv6", table);
0469     if (!hdr)
0470         goto err_reg;
0471 
0472     net->ipv6.sysctl.frags_hdr = hdr;
0473     return 0;
0474 
0475 err_reg:
0476     if (!net_eq(net, &init_net))
0477         kfree(table);
0478 err_alloc:
0479     return -ENOMEM;
0480 }
0481 
0482 static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
0483 {
0484     struct ctl_table *table;
0485 
0486     table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
0487     unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
0488     if (!net_eq(net, &init_net))
0489         kfree(table);
0490 }
0491 
0492 static struct ctl_table_header *ip6_ctl_header;
0493 
0494 static int ip6_frags_sysctl_register(void)
0495 {
0496     ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
0497             ip6_frags_ctl_table);
0498     return ip6_ctl_header == NULL ? -ENOMEM : 0;
0499 }
0500 
0501 static void ip6_frags_sysctl_unregister(void)
0502 {
0503     unregister_net_sysctl_table(ip6_ctl_header);
0504 }
0505 #else
0506 static int ip6_frags_ns_sysctl_register(struct net *net)
0507 {
0508     return 0;
0509 }
0510 
0511 static void ip6_frags_ns_sysctl_unregister(struct net *net)
0512 {
0513 }
0514 
0515 static int ip6_frags_sysctl_register(void)
0516 {
0517     return 0;
0518 }
0519 
0520 static void ip6_frags_sysctl_unregister(void)
0521 {
0522 }
0523 #endif
0524 
0525 static int __net_init ipv6_frags_init_net(struct net *net)
0526 {
0527     int res;
0528 
0529     res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
0530     if (res < 0)
0531         return res;
0532 
0533     net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
0534     net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
0535     net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
0536 
0537     res = ip6_frags_ns_sysctl_register(net);
0538     if (res < 0)
0539         fqdir_exit(net->ipv6.fqdir);
0540     return res;
0541 }
0542 
0543 static void __net_exit ipv6_frags_pre_exit_net(struct net *net)
0544 {
0545     fqdir_pre_exit(net->ipv6.fqdir);
0546 }
0547 
0548 static void __net_exit ipv6_frags_exit_net(struct net *net)
0549 {
0550     ip6_frags_ns_sysctl_unregister(net);
0551     fqdir_exit(net->ipv6.fqdir);
0552 }
0553 
0554 static struct pernet_operations ip6_frags_ops = {
0555     .init       = ipv6_frags_init_net,
0556     .pre_exit   = ipv6_frags_pre_exit_net,
0557     .exit       = ipv6_frags_exit_net,
0558 };
0559 
0560 static const struct rhashtable_params ip6_rhash_params = {
0561     .head_offset        = offsetof(struct inet_frag_queue, node),
0562     .hashfn         = ip6frag_key_hashfn,
0563     .obj_hashfn     = ip6frag_obj_hashfn,
0564     .obj_cmpfn      = ip6frag_obj_cmpfn,
0565     .automatic_shrinking    = true,
0566 };
0567 
0568 int __init ipv6_frag_init(void)
0569 {
0570     int ret;
0571 
0572     ip6_frags.constructor = ip6frag_init;
0573     ip6_frags.destructor = NULL;
0574     ip6_frags.qsize = sizeof(struct frag_queue);
0575     ip6_frags.frag_expire = ip6_frag_expire;
0576     ip6_frags.frags_cache_name = ip6_frag_cache_name;
0577     ip6_frags.rhash_params = ip6_rhash_params;
0578     ret = inet_frags_init(&ip6_frags);
0579     if (ret)
0580         goto out;
0581 
0582     ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
0583     if (ret)
0584         goto err_protocol;
0585 
0586     ret = ip6_frags_sysctl_register();
0587     if (ret)
0588         goto err_sysctl;
0589 
0590     ret = register_pernet_subsys(&ip6_frags_ops);
0591     if (ret)
0592         goto err_pernet;
0593 
0594 out:
0595     return ret;
0596 
0597 err_pernet:
0598     ip6_frags_sysctl_unregister();
0599 err_sysctl:
0600     inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
0601 err_protocol:
0602     inet_frags_fini(&ip6_frags);
0603     goto out;
0604 }
0605 
0606 void ipv6_frag_exit(void)
0607 {
0608     ip6_frags_sysctl_unregister();
0609     unregister_pernet_subsys(&ip6_frags_ops);
0610     inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
0611     inet_frags_fini(&ip6_frags);
0612 }