0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 #define pr_fmt(fmt) "IPv4: " fmt
0110
0111 #include <linux/module.h>
0112 #include <linux/types.h>
0113 #include <linux/kernel.h>
0114 #include <linux/string.h>
0115 #include <linux/errno.h>
0116 #include <linux/slab.h>
0117
0118 #include <linux/net.h>
0119 #include <linux/socket.h>
0120 #include <linux/sockios.h>
0121 #include <linux/in.h>
0122 #include <linux/inet.h>
0123 #include <linux/inetdevice.h>
0124 #include <linux/netdevice.h>
0125 #include <linux/etherdevice.h>
0126 #include <linux/indirect_call_wrapper.h>
0127
0128 #include <net/snmp.h>
0129 #include <net/ip.h>
0130 #include <net/protocol.h>
0131 #include <net/route.h>
0132 #include <linux/skbuff.h>
0133 #include <net/sock.h>
0134 #include <net/arp.h>
0135 #include <net/icmp.h>
0136 #include <net/raw.h>
0137 #include <net/checksum.h>
0138 #include <net/inet_ecn.h>
0139 #include <linux/netfilter_ipv4.h>
0140 #include <net/xfrm.h>
0141 #include <linux/mroute.h>
0142 #include <linux/netlink.h>
0143 #include <net/dst_metadata.h>
0144
0145
0146
0147
0148 bool ip_call_ra_chain(struct sk_buff *skb)
0149 {
0150 struct ip_ra_chain *ra;
0151 u8 protocol = ip_hdr(skb)->protocol;
0152 struct sock *last = NULL;
0153 struct net_device *dev = skb->dev;
0154 struct net *net = dev_net(dev);
0155
0156 for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
0157 struct sock *sk = ra->sk;
0158
0159
0160
0161
0162 if (sk && inet_sk(sk)->inet_num == protocol &&
0163 (!sk->sk_bound_dev_if ||
0164 sk->sk_bound_dev_if == dev->ifindex)) {
0165 if (ip_is_fragment(ip_hdr(skb))) {
0166 if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
0167 return true;
0168 }
0169 if (last) {
0170 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
0171 if (skb2)
0172 raw_rcv(last, skb2);
0173 }
0174 last = sk;
0175 }
0176 }
0177
0178 if (last) {
0179 raw_rcv(last, skb);
0180 return true;
0181 }
0182 return false;
0183 }
0184
0185 INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
0186 INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
0187 void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
0188 {
0189 const struct net_protocol *ipprot;
0190 int raw, ret;
0191
0192 resubmit:
0193 raw = raw_local_deliver(skb, protocol);
0194
0195 ipprot = rcu_dereference(inet_protos[protocol]);
0196 if (ipprot) {
0197 if (!ipprot->no_policy) {
0198 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
0199 kfree_skb_reason(skb,
0200 SKB_DROP_REASON_XFRM_POLICY);
0201 return;
0202 }
0203 nf_reset_ct(skb);
0204 }
0205 ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
0206 skb);
0207 if (ret < 0) {
0208 protocol = -ret;
0209 goto resubmit;
0210 }
0211 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
0212 } else {
0213 if (!raw) {
0214 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
0215 __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
0216 icmp_send(skb, ICMP_DEST_UNREACH,
0217 ICMP_PROT_UNREACH, 0);
0218 }
0219 kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
0220 } else {
0221 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
0222 consume_skb(skb);
0223 }
0224 }
0225 }
0226
0227 static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
0228 {
0229 skb_clear_delivery_time(skb);
0230 __skb_pull(skb, skb_network_header_len(skb));
0231
0232 rcu_read_lock();
0233 ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
0234 rcu_read_unlock();
0235
0236 return 0;
0237 }
0238
0239
0240
0241
0242 int ip_local_deliver(struct sk_buff *skb)
0243 {
0244
0245
0246
0247 struct net *net = dev_net(skb->dev);
0248
0249 if (ip_is_fragment(ip_hdr(skb))) {
0250 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
0251 return 0;
0252 }
0253
0254 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
0255 net, NULL, skb, skb->dev, NULL,
0256 ip_local_deliver_finish);
0257 }
0258 EXPORT_SYMBOL(ip_local_deliver);
0259
0260 static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
0261 {
0262 struct ip_options *opt;
0263 const struct iphdr *iph;
0264
0265
0266
0267
0268
0269
0270
0271
0272 if (skb_cow(skb, skb_headroom(skb))) {
0273 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
0274 goto drop;
0275 }
0276
0277 iph = ip_hdr(skb);
0278 opt = &(IPCB(skb)->opt);
0279 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
0280
0281 if (ip_options_compile(dev_net(dev), opt, skb)) {
0282 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
0283 goto drop;
0284 }
0285
0286 if (unlikely(opt->srr)) {
0287 struct in_device *in_dev = __in_dev_get_rcu(dev);
0288
0289 if (in_dev) {
0290 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
0291 if (IN_DEV_LOG_MARTIANS(in_dev))
0292 net_info_ratelimited("source route option %pI4 -> %pI4\n",
0293 &iph->saddr,
0294 &iph->daddr);
0295 goto drop;
0296 }
0297 }
0298
0299 if (ip_options_rcv_srr(skb, dev))
0300 goto drop;
0301 }
0302
0303 return false;
0304 drop:
0305 return true;
0306 }
0307
0308 static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
0309 const struct sk_buff *hint)
0310 {
0311 return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
0312 ip_hdr(hint)->tos == iph->tos;
0313 }
0314
0315 int tcp_v4_early_demux(struct sk_buff *skb);
0316 int udp_v4_early_demux(struct sk_buff *skb);
0317 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
0318 struct sk_buff *skb, struct net_device *dev,
0319 const struct sk_buff *hint)
0320 {
0321 const struct iphdr *iph = ip_hdr(skb);
0322 int err, drop_reason;
0323 struct rtable *rt;
0324
0325 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
0326
0327 if (ip_can_use_hint(skb, iph, hint)) {
0328 err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
0329 dev, hint);
0330 if (unlikely(err))
0331 goto drop_error;
0332 }
0333
0334 if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
0335 !skb_dst(skb) &&
0336 !skb->sk &&
0337 !ip_is_fragment(iph)) {
0338 switch (iph->protocol) {
0339 case IPPROTO_TCP:
0340 if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
0341 tcp_v4_early_demux(skb);
0342
0343
0344 iph = ip_hdr(skb);
0345 }
0346 break;
0347 case IPPROTO_UDP:
0348 if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
0349 err = udp_v4_early_demux(skb);
0350 if (unlikely(err))
0351 goto drop_error;
0352
0353
0354 iph = ip_hdr(skb);
0355 }
0356 break;
0357 }
0358 }
0359
0360
0361
0362
0363
0364 if (!skb_valid_dst(skb)) {
0365 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
0366 iph->tos, dev);
0367 if (unlikely(err))
0368 goto drop_error;
0369 }
0370
0371 #ifdef CONFIG_IP_ROUTE_CLASSID
0372 if (unlikely(skb_dst(skb)->tclassid)) {
0373 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
0374 u32 idx = skb_dst(skb)->tclassid;
0375 st[idx&0xFF].o_packets++;
0376 st[idx&0xFF].o_bytes += skb->len;
0377 st[(idx>>16)&0xFF].i_packets++;
0378 st[(idx>>16)&0xFF].i_bytes += skb->len;
0379 }
0380 #endif
0381
0382 if (iph->ihl > 5 && ip_rcv_options(skb, dev))
0383 goto drop;
0384
0385 rt = skb_rtable(skb);
0386 if (rt->rt_type == RTN_MULTICAST) {
0387 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
0388 } else if (rt->rt_type == RTN_BROADCAST) {
0389 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
0390 } else if (skb->pkt_type == PACKET_BROADCAST ||
0391 skb->pkt_type == PACKET_MULTICAST) {
0392 struct in_device *in_dev = __in_dev_get_rcu(dev);
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409 if (in_dev &&
0410 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
0411 drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
0412 goto drop;
0413 }
0414 }
0415
0416 return NET_RX_SUCCESS;
0417
0418 drop:
0419 kfree_skb_reason(skb, drop_reason);
0420 return NET_RX_DROP;
0421
0422 drop_error:
0423 if (err == -EXDEV) {
0424 drop_reason = SKB_DROP_REASON_IP_RPFILTER;
0425 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
0426 }
0427 goto drop;
0428 }
0429
0430 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
0431 {
0432 struct net_device *dev = skb->dev;
0433 int ret;
0434
0435
0436
0437
0438 skb = l3mdev_ip_rcv(skb);
0439 if (!skb)
0440 return NET_RX_SUCCESS;
0441
0442 ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
0443 if (ret != NET_RX_DROP)
0444 ret = dst_input(skb);
0445 return ret;
0446 }
0447
0448
0449
0450
0451 static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
0452 {
0453 const struct iphdr *iph;
0454 int drop_reason;
0455 u32 len;
0456
0457
0458
0459
0460 if (skb->pkt_type == PACKET_OTHERHOST) {
0461 dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
0462 drop_reason = SKB_DROP_REASON_OTHERHOST;
0463 goto drop;
0464 }
0465
0466 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
0467
0468 skb = skb_share_check(skb, GFP_ATOMIC);
0469 if (!skb) {
0470 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
0471 goto out;
0472 }
0473
0474 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
0475 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
0476 goto inhdr_error;
0477
0478 iph = ip_hdr(skb);
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491 if (iph->ihl < 5 || iph->version != 4)
0492 goto inhdr_error;
0493
0494 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
0495 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
0496 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
0497 __IP_ADD_STATS(net,
0498 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
0499 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
0500
0501 if (!pskb_may_pull(skb, iph->ihl*4))
0502 goto inhdr_error;
0503
0504 iph = ip_hdr(skb);
0505
0506 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
0507 goto csum_error;
0508
0509 len = ntohs(iph->tot_len);
0510 if (skb->len < len) {
0511 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
0512 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
0513 goto drop;
0514 } else if (len < (iph->ihl*4))
0515 goto inhdr_error;
0516
0517
0518
0519
0520
0521 if (pskb_trim_rcsum(skb, len)) {
0522 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
0523 goto drop;
0524 }
0525
0526 iph = ip_hdr(skb);
0527 skb->transport_header = skb->network_header + iph->ihl*4;
0528
0529
0530 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
0531 IPCB(skb)->iif = skb->skb_iif;
0532
0533
0534 if (!skb_sk_is_prefetched(skb))
0535 skb_orphan(skb);
0536
0537 return skb;
0538
0539 csum_error:
0540 drop_reason = SKB_DROP_REASON_IP_CSUM;
0541 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
0542 inhdr_error:
0543 if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
0544 drop_reason = SKB_DROP_REASON_IP_INHDR;
0545 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
0546 drop:
0547 kfree_skb_reason(skb, drop_reason);
0548 out:
0549 return NULL;
0550 }
0551
0552
0553
0554
0555 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
0556 struct net_device *orig_dev)
0557 {
0558 struct net *net = dev_net(dev);
0559
0560 skb = ip_rcv_core(skb, net);
0561 if (skb == NULL)
0562 return NET_RX_DROP;
0563
0564 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
0565 net, NULL, skb, dev, NULL,
0566 ip_rcv_finish);
0567 }
0568
0569 static void ip_sublist_rcv_finish(struct list_head *head)
0570 {
0571 struct sk_buff *skb, *next;
0572
0573 list_for_each_entry_safe(skb, next, head, list) {
0574 skb_list_del_init(skb);
0575 dst_input(skb);
0576 }
0577 }
0578
0579 static struct sk_buff *ip_extract_route_hint(const struct net *net,
0580 struct sk_buff *skb, int rt_type)
0581 {
0582 if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
0583 return NULL;
0584
0585 return skb;
0586 }
0587
0588 static void ip_list_rcv_finish(struct net *net, struct sock *sk,
0589 struct list_head *head)
0590 {
0591 struct sk_buff *skb, *next, *hint = NULL;
0592 struct dst_entry *curr_dst = NULL;
0593 struct list_head sublist;
0594
0595 INIT_LIST_HEAD(&sublist);
0596 list_for_each_entry_safe(skb, next, head, list) {
0597 struct net_device *dev = skb->dev;
0598 struct dst_entry *dst;
0599
0600 skb_list_del_init(skb);
0601
0602
0603
0604 skb = l3mdev_ip_rcv(skb);
0605 if (!skb)
0606 continue;
0607 if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
0608 continue;
0609
0610 dst = skb_dst(skb);
0611 if (curr_dst != dst) {
0612 hint = ip_extract_route_hint(net, skb,
0613 ((struct rtable *)dst)->rt_type);
0614
0615
0616 if (!list_empty(&sublist))
0617 ip_sublist_rcv_finish(&sublist);
0618
0619 INIT_LIST_HEAD(&sublist);
0620 curr_dst = dst;
0621 }
0622 list_add_tail(&skb->list, &sublist);
0623 }
0624
0625 ip_sublist_rcv_finish(&sublist);
0626 }
0627
0628 static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
0629 struct net *net)
0630 {
0631 NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
0632 head, dev, NULL, ip_rcv_finish);
0633 ip_list_rcv_finish(net, NULL, head);
0634 }
0635
0636
0637 void ip_list_rcv(struct list_head *head, struct packet_type *pt,
0638 struct net_device *orig_dev)
0639 {
0640 struct net_device *curr_dev = NULL;
0641 struct net *curr_net = NULL;
0642 struct sk_buff *skb, *next;
0643 struct list_head sublist;
0644
0645 INIT_LIST_HEAD(&sublist);
0646 list_for_each_entry_safe(skb, next, head, list) {
0647 struct net_device *dev = skb->dev;
0648 struct net *net = dev_net(dev);
0649
0650 skb_list_del_init(skb);
0651 skb = ip_rcv_core(skb, net);
0652 if (skb == NULL)
0653 continue;
0654
0655 if (curr_dev != dev || curr_net != net) {
0656
0657 if (!list_empty(&sublist))
0658 ip_sublist_rcv(&sublist, curr_dev, curr_net);
0659
0660 INIT_LIST_HEAD(&sublist);
0661 curr_dev = dev;
0662 curr_net = net;
0663 }
0664 list_add_tail(&skb->list, &sublist);
0665 }
0666
0667 if (!list_empty(&sublist))
0668 ip_sublist_rcv(&sublist, curr_dev, curr_net);
0669 }