0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0061
0062 #include <linux/module.h>
0063 #include <linux/types.h>
0064 #include <linux/jiffies.h>
0065 #include <linux/kernel.h>
0066 #include <linux/fcntl.h>
0067 #include <linux/socket.h>
0068 #include <linux/in.h>
0069 #include <linux/inet.h>
0070 #include <linux/inetdevice.h>
0071 #include <linux/netdevice.h>
0072 #include <linux/string.h>
0073 #include <linux/netfilter_ipv4.h>
0074 #include <linux/slab.h>
0075 #include <net/snmp.h>
0076 #include <net/ip.h>
0077 #include <net/route.h>
0078 #include <net/protocol.h>
0079 #include <net/icmp.h>
0080 #include <net/tcp.h>
0081 #include <net/udp.h>
0082 #include <net/raw.h>
0083 #include <net/ping.h>
0084 #include <linux/skbuff.h>
0085 #include <net/sock.h>
0086 #include <linux/errno.h>
0087 #include <linux/timer.h>
0088 #include <linux/init.h>
0089 #include <linux/uaccess.h>
0090 #include <net/checksum.h>
0091 #include <net/xfrm.h>
0092 #include <net/inet_common.h>
0093 #include <net/ip_fib.h>
0094 #include <net/l3mdev.h>
0095
0096
0097
0098
0099
0100 struct icmp_bxm {
0101 struct sk_buff *skb;
0102 int offset;
0103 int data_len;
0104
0105 struct {
0106 struct icmphdr icmph;
0107 __be32 times[3];
0108 } data;
0109 int head_len;
0110 struct ip_options_data replyopts;
0111 };
0112
0113
0114
0115
0116 const struct icmp_err icmp_err_convert[] = {
0117 {
0118 .errno = ENETUNREACH,
0119 .fatal = 0,
0120 },
0121 {
0122 .errno = EHOSTUNREACH,
0123 .fatal = 0,
0124 },
0125 {
0126 .errno = ENOPROTOOPT ,
0127 .fatal = 1,
0128 },
0129 {
0130 .errno = ECONNREFUSED,
0131 .fatal = 1,
0132 },
0133 {
0134 .errno = EMSGSIZE,
0135 .fatal = 0,
0136 },
0137 {
0138 .errno = EOPNOTSUPP,
0139 .fatal = 0,
0140 },
0141 {
0142 .errno = ENETUNREACH,
0143 .fatal = 1,
0144 },
0145 {
0146 .errno = EHOSTDOWN,
0147 .fatal = 1,
0148 },
0149 {
0150 .errno = ENONET,
0151 .fatal = 1,
0152 },
0153 {
0154 .errno = ENETUNREACH,
0155 .fatal = 1,
0156 },
0157 {
0158 .errno = EHOSTUNREACH,
0159 .fatal = 1,
0160 },
0161 {
0162 .errno = ENETUNREACH,
0163 .fatal = 0,
0164 },
0165 {
0166 .errno = EHOSTUNREACH,
0167 .fatal = 0,
0168 },
0169 {
0170 .errno = EHOSTUNREACH,
0171 .fatal = 1,
0172 },
0173 {
0174 .errno = EHOSTUNREACH,
0175 .fatal = 1,
0176 },
0177 {
0178 .errno = EHOSTUNREACH,
0179 .fatal = 1,
0180 },
0181 };
0182 EXPORT_SYMBOL(icmp_err_convert);
0183
0184
0185
0186
0187
0188 struct icmp_control {
0189 enum skb_drop_reason (*handler)(struct sk_buff *skb);
0190 short error;
0191 };
0192
0193 static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
0194
0195 static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
0196
0197
0198 static inline struct sock *icmp_xmit_lock(struct net *net)
0199 {
0200 struct sock *sk;
0201
0202 sk = this_cpu_read(ipv4_icmp_sk);
0203
0204 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
0205
0206
0207
0208 return NULL;
0209 }
0210 sock_net_set(sk, net);
0211 return sk;
0212 }
0213
0214 static inline void icmp_xmit_unlock(struct sock *sk)
0215 {
0216 sock_net_set(sk, &init_net);
0217 spin_unlock(&sk->sk_lock.slock);
0218 }
0219
0220 int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
0221 int sysctl_icmp_msgs_burst __read_mostly = 50;
0222
0223 static struct {
0224 spinlock_t lock;
0225 u32 credit;
0226 u32 stamp;
0227 } icmp_global = {
0228 .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
0229 };
0230
0231
0232
0233
0234
0235
0236
0237
0238 bool icmp_global_allow(void)
0239 {
0240 u32 credit, delta, incr = 0, now = (u32)jiffies;
0241 bool rc = false;
0242
0243
0244
0245
0246
0247 if (!READ_ONCE(icmp_global.credit)) {
0248 delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
0249 if (delta < HZ / 50)
0250 return false;
0251 }
0252
0253 spin_lock(&icmp_global.lock);
0254 delta = min_t(u32, now - icmp_global.stamp, HZ);
0255 if (delta >= HZ / 50) {
0256 incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
0257 if (incr)
0258 WRITE_ONCE(icmp_global.stamp, now);
0259 }
0260 credit = min_t(u32, icmp_global.credit + incr,
0261 READ_ONCE(sysctl_icmp_msgs_burst));
0262 if (credit) {
0263
0264
0265
0266 credit = max_t(int, credit - prandom_u32_max(3), 0);
0267 rc = true;
0268 }
0269 WRITE_ONCE(icmp_global.credit, credit);
0270 spin_unlock(&icmp_global.lock);
0271 return rc;
0272 }
0273 EXPORT_SYMBOL(icmp_global_allow);
0274
0275 static bool icmpv4_mask_allow(struct net *net, int type, int code)
0276 {
0277 if (type > NR_ICMP_TYPES)
0278 return true;
0279
0280
0281 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
0282 return true;
0283
0284
0285 if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
0286 return true;
0287
0288 return false;
0289 }
0290
0291 static bool icmpv4_global_allow(struct net *net, int type, int code)
0292 {
0293 if (icmpv4_mask_allow(net, type, code))
0294 return true;
0295
0296 if (icmp_global_allow())
0297 return true;
0298
0299 return false;
0300 }
0301
0302
0303
0304
0305
0306 static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
0307 struct flowi4 *fl4, int type, int code)
0308 {
0309 struct dst_entry *dst = &rt->dst;
0310 struct inet_peer *peer;
0311 bool rc = true;
0312 int vif;
0313
0314 if (icmpv4_mask_allow(net, type, code))
0315 goto out;
0316
0317
0318 if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
0319 goto out;
0320
0321 vif = l3mdev_master_ifindex(dst->dev);
0322 peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
0323 rc = inet_peer_xrlim_allow(peer,
0324 READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
0325 if (peer)
0326 inet_putpeer(peer);
0327 out:
0328 return rc;
0329 }
0330
0331
0332
0333
0334 void icmp_out_count(struct net *net, unsigned char type)
0335 {
0336 ICMPMSGOUT_INC_STATS(net, type);
0337 ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
0338 }
0339
0340
0341
0342
0343
0344 static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
0345 struct sk_buff *skb)
0346 {
0347 struct icmp_bxm *icmp_param = from;
0348 __wsum csum;
0349
0350 csum = skb_copy_and_csum_bits(icmp_param->skb,
0351 icmp_param->offset + offset,
0352 to, len);
0353
0354 skb->csum = csum_block_add(skb->csum, csum, odd);
0355 if (icmp_pointers[icmp_param->data.icmph.type].error)
0356 nf_ct_attach(skb, icmp_param->skb);
0357 return 0;
0358 }
0359
0360 static void icmp_push_reply(struct sock *sk,
0361 struct icmp_bxm *icmp_param,
0362 struct flowi4 *fl4,
0363 struct ipcm_cookie *ipc, struct rtable **rt)
0364 {
0365 struct sk_buff *skb;
0366
0367 if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
0368 icmp_param->data_len+icmp_param->head_len,
0369 icmp_param->head_len,
0370 ipc, rt, MSG_DONTWAIT) < 0) {
0371 __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
0372 ip_flush_pending_frames(sk);
0373 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
0374 struct icmphdr *icmph = icmp_hdr(skb);
0375 __wsum csum;
0376 struct sk_buff *skb1;
0377
0378 csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
0379 (char *)icmph,
0380 icmp_param->head_len);
0381 skb_queue_walk(&sk->sk_write_queue, skb1) {
0382 csum = csum_add(csum, skb1->csum);
0383 }
0384 icmph->checksum = csum_fold(csum);
0385 skb->ip_summed = CHECKSUM_NONE;
0386 ip_push_pending_frames(sk, fl4);
0387 }
0388 }
0389
0390
0391
0392
0393
0394 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
0395 {
0396 struct ipcm_cookie ipc;
0397 struct rtable *rt = skb_rtable(skb);
0398 struct net *net = dev_net(rt->dst.dev);
0399 struct flowi4 fl4;
0400 struct sock *sk;
0401 struct inet_sock *inet;
0402 __be32 daddr, saddr;
0403 u32 mark = IP4_REPLY_MARK(net, skb->mark);
0404 int type = icmp_param->data.icmph.type;
0405 int code = icmp_param->data.icmph.code;
0406
0407 if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
0408 return;
0409
0410
0411 local_bh_disable();
0412
0413
0414 if (!icmpv4_global_allow(net, type, code))
0415 goto out_bh_enable;
0416
0417 sk = icmp_xmit_lock(net);
0418 if (!sk)
0419 goto out_bh_enable;
0420 inet = inet_sk(sk);
0421
0422 icmp_param->data.icmph.checksum = 0;
0423
0424 ipcm_init(&ipc);
0425 inet->tos = ip_hdr(skb)->tos;
0426 ipc.sockc.mark = mark;
0427 daddr = ipc.addr = ip_hdr(skb)->saddr;
0428 saddr = fib_compute_spec_dst(skb);
0429
0430 if (icmp_param->replyopts.opt.opt.optlen) {
0431 ipc.opt = &icmp_param->replyopts.opt;
0432 if (ipc.opt->opt.srr)
0433 daddr = icmp_param->replyopts.opt.opt.faddr;
0434 }
0435 memset(&fl4, 0, sizeof(fl4));
0436 fl4.daddr = daddr;
0437 fl4.saddr = saddr;
0438 fl4.flowi4_mark = mark;
0439 fl4.flowi4_uid = sock_net_uid(net, NULL);
0440 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
0441 fl4.flowi4_proto = IPPROTO_ICMP;
0442 fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
0443 security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
0444 rt = ip_route_output_key(net, &fl4);
0445 if (IS_ERR(rt))
0446 goto out_unlock;
0447 if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
0448 icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
0449 ip_rt_put(rt);
0450 out_unlock:
0451 icmp_xmit_unlock(sk);
0452 out_bh_enable:
0453 local_bh_enable();
0454 }
0455
0456
0457
0458
0459
0460
0461
0462 static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
0463 {
0464 struct net_device *route_lookup_dev = NULL;
0465
0466 if (skb->dev)
0467 route_lookup_dev = skb->dev;
0468 else if (skb_dst(skb))
0469 route_lookup_dev = skb_dst(skb)->dev;
0470 return route_lookup_dev;
0471 }
0472
0473 static struct rtable *icmp_route_lookup(struct net *net,
0474 struct flowi4 *fl4,
0475 struct sk_buff *skb_in,
0476 const struct iphdr *iph,
0477 __be32 saddr, u8 tos, u32 mark,
0478 int type, int code,
0479 struct icmp_bxm *param)
0480 {
0481 struct net_device *route_lookup_dev;
0482 struct rtable *rt, *rt2;
0483 struct flowi4 fl4_dec;
0484 int err;
0485
0486 memset(fl4, 0, sizeof(*fl4));
0487 fl4->daddr = (param->replyopts.opt.opt.srr ?
0488 param->replyopts.opt.opt.faddr : iph->saddr);
0489 fl4->saddr = saddr;
0490 fl4->flowi4_mark = mark;
0491 fl4->flowi4_uid = sock_net_uid(net, NULL);
0492 fl4->flowi4_tos = RT_TOS(tos);
0493 fl4->flowi4_proto = IPPROTO_ICMP;
0494 fl4->fl4_icmp_type = type;
0495 fl4->fl4_icmp_code = code;
0496 route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
0497 fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
0498
0499 security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4));
0500 rt = ip_route_output_key_hash(net, fl4, skb_in);
0501 if (IS_ERR(rt))
0502 return rt;
0503
0504
0505 rt2 = rt;
0506
0507 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
0508 flowi4_to_flowi(fl4), NULL, 0);
0509 if (!IS_ERR(rt)) {
0510 if (rt != rt2)
0511 return rt;
0512 } else if (PTR_ERR(rt) == -EPERM) {
0513 rt = NULL;
0514 } else
0515 return rt;
0516
0517 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
0518 if (err)
0519 goto relookup_failed;
0520
0521 if (inet_addr_type_dev_table(net, route_lookup_dev,
0522 fl4_dec.saddr) == RTN_LOCAL) {
0523 rt2 = __ip_route_output_key(net, &fl4_dec);
0524 if (IS_ERR(rt2))
0525 err = PTR_ERR(rt2);
0526 } else {
0527 struct flowi4 fl4_2 = {};
0528 unsigned long orefdst;
0529
0530 fl4_2.daddr = fl4_dec.saddr;
0531 rt2 = ip_route_output_key(net, &fl4_2);
0532 if (IS_ERR(rt2)) {
0533 err = PTR_ERR(rt2);
0534 goto relookup_failed;
0535 }
0536
0537 orefdst = skb_in->_skb_refdst;
0538 skb_dst_set(skb_in, NULL);
0539 err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
0540 RT_TOS(tos), rt2->dst.dev);
0541
0542 dst_release(&rt2->dst);
0543 rt2 = skb_rtable(skb_in);
0544 skb_in->_skb_refdst = orefdst;
0545 }
0546
0547 if (err)
0548 goto relookup_failed;
0549
0550 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
0551 flowi4_to_flowi(&fl4_dec), NULL,
0552 XFRM_LOOKUP_ICMP);
0553 if (!IS_ERR(rt2)) {
0554 dst_release(&rt->dst);
0555 memcpy(fl4, &fl4_dec, sizeof(*fl4));
0556 rt = rt2;
0557 } else if (PTR_ERR(rt2) == -EPERM) {
0558 if (rt)
0559 dst_release(&rt->dst);
0560 return rt2;
0561 } else {
0562 err = PTR_ERR(rt2);
0563 goto relookup_failed;
0564 }
0565 return rt;
0566
0567 relookup_failed:
0568 if (rt)
0569 return rt;
0570 return ERR_PTR(err);
0571 }
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584 void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
0585 const struct ip_options *opt)
0586 {
0587 struct iphdr *iph;
0588 int room;
0589 struct icmp_bxm icmp_param;
0590 struct rtable *rt = skb_rtable(skb_in);
0591 struct ipcm_cookie ipc;
0592 struct flowi4 fl4;
0593 __be32 saddr;
0594 u8 tos;
0595 u32 mark;
0596 struct net *net;
0597 struct sock *sk;
0598
0599 if (!rt)
0600 goto out;
0601
0602 if (rt->dst.dev)
0603 net = dev_net(rt->dst.dev);
0604 else if (skb_in->dev)
0605 net = dev_net(skb_in->dev);
0606 else
0607 goto out;
0608
0609
0610
0611
0612
0613
0614 iph = ip_hdr(skb_in);
0615
0616 if ((u8 *)iph < skb_in->head ||
0617 (skb_network_header(skb_in) + sizeof(*iph)) >
0618 skb_tail_pointer(skb_in))
0619 goto out;
0620
0621
0622
0623
0624 if (skb_in->pkt_type != PACKET_HOST)
0625 goto out;
0626
0627
0628
0629
0630 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
0631 goto out;
0632
0633
0634
0635
0636
0637 if (iph->frag_off & htons(IP_OFFSET))
0638 goto out;
0639
0640
0641
0642
0643 if (icmp_pointers[type].error) {
0644
0645
0646
0647
0648 if (iph->protocol == IPPROTO_ICMP) {
0649 u8 _inner_type, *itp;
0650
0651 itp = skb_header_pointer(skb_in,
0652 skb_network_header(skb_in) +
0653 (iph->ihl << 2) +
0654 offsetof(struct icmphdr,
0655 type) -
0656 skb_in->data,
0657 sizeof(_inner_type),
0658 &_inner_type);
0659 if (!itp)
0660 goto out;
0661
0662
0663
0664
0665
0666 if (*itp > NR_ICMP_TYPES ||
0667 icmp_pointers[*itp].error)
0668 goto out;
0669 }
0670 }
0671
0672
0673 local_bh_disable();
0674
0675
0676
0677
0678
0679 if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
0680 !icmpv4_global_allow(net, type, code))
0681 goto out_bh_enable;
0682
0683 sk = icmp_xmit_lock(net);
0684 if (!sk)
0685 goto out_bh_enable;
0686
0687
0688
0689
0690
0691 saddr = iph->daddr;
0692 if (!(rt->rt_flags & RTCF_LOCAL)) {
0693 struct net_device *dev = NULL;
0694
0695 rcu_read_lock();
0696 if (rt_is_input_route(rt) &&
0697 READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
0698 dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
0699
0700 if (dev)
0701 saddr = inet_select_addr(dev, iph->saddr,
0702 RT_SCOPE_LINK);
0703 else
0704 saddr = 0;
0705 rcu_read_unlock();
0706 }
0707
0708 tos = icmp_pointers[type].error ? (RT_TOS(iph->tos) |
0709 IPTOS_PREC_INTERNETCONTROL) :
0710 iph->tos;
0711 mark = IP4_REPLY_MARK(net, skb_in->mark);
0712
0713 if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
0714 goto out_unlock;
0715
0716
0717
0718
0719
0720
0721 icmp_param.data.icmph.type = type;
0722 icmp_param.data.icmph.code = code;
0723 icmp_param.data.icmph.un.gateway = info;
0724 icmp_param.data.icmph.checksum = 0;
0725 icmp_param.skb = skb_in;
0726 icmp_param.offset = skb_network_offset(skb_in);
0727 inet_sk(sk)->tos = tos;
0728 ipcm_init(&ipc);
0729 ipc.addr = iph->saddr;
0730 ipc.opt = &icmp_param.replyopts.opt;
0731 ipc.sockc.mark = mark;
0732
0733 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
0734 type, code, &icmp_param);
0735 if (IS_ERR(rt))
0736 goto out_unlock;
0737
0738
0739 if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
0740 goto ende;
0741
0742
0743
0744 room = dst_mtu(&rt->dst);
0745 if (room > 576)
0746 room = 576;
0747 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
0748 room -= sizeof(struct icmphdr);
0749
0750 icmp_param.data_len = skb_in->len - icmp_param.offset;
0751 if (icmp_param.data_len > room)
0752 icmp_param.data_len = room;
0753 icmp_param.head_len = sizeof(struct icmphdr);
0754
0755
0756
0757
0758
0759 if (!fl4.saddr)
0760 fl4.saddr = htonl(INADDR_DUMMY);
0761
0762 icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
0763 ende:
0764 ip_rt_put(rt);
0765 out_unlock:
0766 icmp_xmit_unlock(sk);
0767 out_bh_enable:
0768 local_bh_enable();
0769 out:;
0770 }
0771 EXPORT_SYMBOL(__icmp_send);
0772
0773 #if IS_ENABLED(CONFIG_NF_NAT)
0774 #include <net/netfilter/nf_conntrack.h>
0775 void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
0776 {
0777 struct sk_buff *cloned_skb = NULL;
0778 struct ip_options opts = { 0 };
0779 enum ip_conntrack_info ctinfo;
0780 struct nf_conn *ct;
0781 __be32 orig_ip;
0782
0783 ct = nf_ct_get(skb_in, &ctinfo);
0784 if (!ct || !(ct->status & IPS_SRC_NAT)) {
0785 __icmp_send(skb_in, type, code, info, &opts);
0786 return;
0787 }
0788
0789 if (skb_shared(skb_in))
0790 skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
0791
0792 if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
0793 (skb_network_header(skb_in) + sizeof(struct iphdr)) >
0794 skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
0795 skb_network_offset(skb_in) + sizeof(struct iphdr))))
0796 goto out;
0797
0798 orig_ip = ip_hdr(skb_in)->saddr;
0799 ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
0800 __icmp_send(skb_in, type, code, info, &opts);
0801 ip_hdr(skb_in)->saddr = orig_ip;
0802 out:
0803 consume_skb(cloned_skb);
0804 }
0805 EXPORT_SYMBOL(icmp_ndo_send);
0806 #endif
0807
0808 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
0809 {
0810 const struct iphdr *iph = (const struct iphdr *)skb->data;
0811 const struct net_protocol *ipprot;
0812 int protocol = iph->protocol;
0813
0814
0815
0816
0817 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
0818 __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
0819 return;
0820 }
0821
0822 raw_icmp_error(skb, protocol, info);
0823
0824 ipprot = rcu_dereference(inet_protos[protocol]);
0825 if (ipprot && ipprot->err_handler)
0826 ipprot->err_handler(skb, info);
0827 }
0828
0829 static bool icmp_tag_validation(int proto)
0830 {
0831 bool ok;
0832
0833 rcu_read_lock();
0834 ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
0835 rcu_read_unlock();
0836 return ok;
0837 }
0838
0839
0840
0841
0842
0843
0844 static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
0845 {
0846 enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
0847 const struct iphdr *iph;
0848 struct icmphdr *icmph;
0849 struct net *net;
0850 u32 info = 0;
0851
0852 net = dev_net(skb_dst(skb)->dev);
0853
0854
0855
0856
0857
0858
0859
0860 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
0861 goto out_err;
0862
0863 icmph = icmp_hdr(skb);
0864 iph = (const struct iphdr *)skb->data;
0865
0866 if (iph->ihl < 5) {
0867 reason = SKB_DROP_REASON_IP_INHDR;
0868 goto out_err;
0869 }
0870
0871 switch (icmph->type) {
0872 case ICMP_DEST_UNREACH:
0873 switch (icmph->code & 15) {
0874 case ICMP_NET_UNREACH:
0875 case ICMP_HOST_UNREACH:
0876 case ICMP_PROT_UNREACH:
0877 case ICMP_PORT_UNREACH:
0878 break;
0879 case ICMP_FRAG_NEEDED:
0880
0881
0882
0883
0884 switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
0885 default:
0886 net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
0887 &iph->daddr);
0888 break;
0889 case 2:
0890 goto out;
0891 case 3:
0892 if (!icmp_tag_validation(iph->protocol))
0893 goto out;
0894 fallthrough;
0895 case 0:
0896 info = ntohs(icmph->un.frag.mtu);
0897 }
0898 break;
0899 case ICMP_SR_FAILED:
0900 net_dbg_ratelimited("%pI4: Source Route Failed\n",
0901 &iph->daddr);
0902 break;
0903 default:
0904 break;
0905 }
0906 if (icmph->code > NR_ICMP_UNREACH)
0907 goto out;
0908 break;
0909 case ICMP_PARAMETERPROB:
0910 info = ntohl(icmph->un.gateway) >> 24;
0911 break;
0912 case ICMP_TIME_EXCEEDED:
0913 __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
0914 if (icmph->code == ICMP_EXC_FRAGTIME)
0915 goto out;
0916 break;
0917 }
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937 if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
0938 inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
0939 net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
0940 &ip_hdr(skb)->saddr,
0941 icmph->type, icmph->code,
0942 &iph->daddr, skb->dev->name);
0943 goto out;
0944 }
0945
0946 icmp_socket_deliver(skb, info);
0947
0948 out:
0949 return reason;
0950 out_err:
0951 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
0952 return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
0953 }
0954
0955
0956
0957
0958
0959
0960 static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
0961 {
0962 if (skb->len < sizeof(struct iphdr)) {
0963 __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
0964 return SKB_DROP_REASON_PKT_TOO_SMALL;
0965 }
0966
0967 if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
0968
0969 return SKB_DROP_REASON_NOMEM;
0970 }
0971
0972 icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
0973 return SKB_NOT_DROPPED_YET;
0974 }
0975
0976
0977
0978
0979
0980
0981
0982
0983
0984
0985
0986
0987
0988
0989
0990 static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
0991 {
0992 struct icmp_bxm icmp_param;
0993 struct net *net;
0994
0995 net = dev_net(skb_dst(skb)->dev);
0996
0997 if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
0998 return SKB_NOT_DROPPED_YET;
0999
1000 icmp_param.data.icmph = *icmp_hdr(skb);
1001 icmp_param.skb = skb;
1002 icmp_param.offset = 0;
1003 icmp_param.data_len = skb->len;
1004 icmp_param.head_len = sizeof(struct icmphdr);
1005
1006 if (icmp_param.data.icmph.type == ICMP_ECHO)
1007 icmp_param.data.icmph.type = ICMP_ECHOREPLY;
1008 else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
1009 return SKB_NOT_DROPPED_YET;
1010
1011 icmp_reply(&icmp_param, skb);
1012 return SKB_NOT_DROPPED_YET;
1013 }
1014
1015
1016
1017
1018
1019
1020
1021
1022 bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
1023 {
1024 struct icmp_ext_hdr *ext_hdr, _ext_hdr;
1025 struct icmp_ext_echo_iio *iio, _iio;
1026 struct net *net = dev_net(skb->dev);
1027 struct net_device *dev;
1028 char buff[IFNAMSIZ];
1029 u16 ident_len;
1030 u8 status;
1031
1032 if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
1033 return false;
1034
1035
1036
1037
1038 if (!(ntohs(icmphdr->un.echo.sequence) & 1))
1039 return false;
1040
1041 icmphdr->un.echo.sequence &= htons(0xFF00);
1042 if (icmphdr->type == ICMP_EXT_ECHO)
1043 icmphdr->type = ICMP_EXT_ECHOREPLY;
1044 else
1045 icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
1046 ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
1047
1048
1049
1050 iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
1051 if (!ext_hdr || !iio)
1052 goto send_mal_query;
1053 if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
1054 ntohs(iio->extobj_hdr.length) > sizeof(_iio))
1055 goto send_mal_query;
1056 ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
1057 iio = skb_header_pointer(skb, sizeof(_ext_hdr),
1058 sizeof(iio->extobj_hdr) + ident_len, &_iio);
1059 if (!iio)
1060 goto send_mal_query;
1061
1062 status = 0;
1063 dev = NULL;
1064 switch (iio->extobj_hdr.class_type) {
1065 case ICMP_EXT_ECHO_CTYPE_NAME:
1066 if (ident_len >= IFNAMSIZ)
1067 goto send_mal_query;
1068 memset(buff, 0, sizeof(buff));
1069 memcpy(buff, &iio->ident.name, ident_len);
1070 dev = dev_get_by_name(net, buff);
1071 break;
1072 case ICMP_EXT_ECHO_CTYPE_INDEX:
1073 if (ident_len != sizeof(iio->ident.ifindex))
1074 goto send_mal_query;
1075 dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
1076 break;
1077 case ICMP_EXT_ECHO_CTYPE_ADDR:
1078 if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
1079 ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
1080 iio->ident.addr.ctype3_hdr.addrlen)
1081 goto send_mal_query;
1082 switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
1083 case ICMP_AFI_IP:
1084 if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
1085 goto send_mal_query;
1086 dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
1087 break;
1088 #if IS_ENABLED(CONFIG_IPV6)
1089 case ICMP_AFI_IP6:
1090 if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
1091 goto send_mal_query;
1092 dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
1093 dev_hold(dev);
1094 break;
1095 #endif
1096 default:
1097 goto send_mal_query;
1098 }
1099 break;
1100 default:
1101 goto send_mal_query;
1102 }
1103 if (!dev) {
1104 icmphdr->code = ICMP_EXT_CODE_NO_IF;
1105 return true;
1106 }
1107
1108 if (dev->flags & IFF_UP)
1109 status |= ICMP_EXT_ECHOREPLY_ACTIVE;
1110 if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
1111 status |= ICMP_EXT_ECHOREPLY_IPV4;
1112 if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
1113 status |= ICMP_EXT_ECHOREPLY_IPV6;
1114 dev_put(dev);
1115 icmphdr->un.echo.sequence |= htons(status);
1116 return true;
1117 send_mal_query:
1118 icmphdr->code = ICMP_EXT_CODE_MAL_QUERY;
1119 return true;
1120 }
1121 EXPORT_SYMBOL_GPL(icmp_build_probe);
1122
1123
1124
1125
1126
1127
1128
1129
1130 static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
1131 {
1132 struct icmp_bxm icmp_param;
1133
1134
1135
1136 if (skb->len < 4)
1137 goto out_err;
1138
1139
1140
1141
1142 icmp_param.data.times[1] = inet_current_timestamp();
1143 icmp_param.data.times[2] = icmp_param.data.times[1];
1144
1145 BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
1146
1147 icmp_param.data.icmph = *icmp_hdr(skb);
1148 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
1149 icmp_param.data.icmph.code = 0;
1150 icmp_param.skb = skb;
1151 icmp_param.offset = 0;
1152 icmp_param.data_len = 0;
1153 icmp_param.head_len = sizeof(struct icmphdr) + 12;
1154 icmp_reply(&icmp_param, skb);
1155 return SKB_NOT_DROPPED_YET;
1156
1157 out_err:
1158 __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
1159 return SKB_DROP_REASON_PKT_TOO_SMALL;
1160 }
1161
1162 static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
1163 {
1164
1165 return SKB_NOT_DROPPED_YET;
1166 }
1167
1168
1169
1170
1171 int icmp_rcv(struct sk_buff *skb)
1172 {
1173 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
1174 struct rtable *rt = skb_rtable(skb);
1175 struct net *net = dev_net(rt->dst.dev);
1176 struct icmphdr *icmph;
1177
1178 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1179 struct sec_path *sp = skb_sec_path(skb);
1180 int nh;
1181
1182 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
1183 XFRM_STATE_ICMP)) {
1184 reason = SKB_DROP_REASON_XFRM_POLICY;
1185 goto drop;
1186 }
1187
1188 if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
1189 goto drop;
1190
1191 nh = skb_network_offset(skb);
1192 skb_set_network_header(skb, sizeof(*icmph));
1193
1194 if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
1195 skb)) {
1196 reason = SKB_DROP_REASON_XFRM_POLICY;
1197 goto drop;
1198 }
1199
1200 skb_set_network_header(skb, nh);
1201 }
1202
1203 __ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
1204
1205 if (skb_checksum_simple_validate(skb))
1206 goto csum_error;
1207
1208 if (!pskb_pull(skb, sizeof(*icmph)))
1209 goto error;
1210
1211 icmph = icmp_hdr(skb);
1212
1213 ICMPMSGIN_INC_STATS(net, icmph->type);
1214
1215
1216 if (icmph->type == ICMP_EXT_ECHO) {
1217
1218
1219
1220 reason = icmp_echo(skb);
1221 goto reason_check;
1222 }
1223
1224 if (icmph->type == ICMP_EXT_ECHOREPLY) {
1225 reason = ping_rcv(skb);
1226 goto reason_check;
1227 }
1228
1229
1230
1231
1232
1233
1234
1235 if (icmph->type > NR_ICMP_TYPES) {
1236 reason = SKB_DROP_REASON_UNHANDLED_PROTO;
1237 goto error;
1238 }
1239
1240
1241
1242
1243
1244 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1245
1246
1247
1248
1249
1250
1251 if ((icmph->type == ICMP_ECHO ||
1252 icmph->type == ICMP_TIMESTAMP) &&
1253 READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
1254 reason = SKB_DROP_REASON_INVALID_PROTO;
1255 goto error;
1256 }
1257 if (icmph->type != ICMP_ECHO &&
1258 icmph->type != ICMP_TIMESTAMP &&
1259 icmph->type != ICMP_ADDRESS &&
1260 icmph->type != ICMP_ADDRESSREPLY) {
1261 reason = SKB_DROP_REASON_INVALID_PROTO;
1262 goto error;
1263 }
1264 }
1265
1266 reason = icmp_pointers[icmph->type].handler(skb);
1267 reason_check:
1268 if (!reason) {
1269 consume_skb(skb);
1270 return NET_RX_SUCCESS;
1271 }
1272
1273 drop:
1274 kfree_skb_reason(skb, reason);
1275 return NET_RX_DROP;
1276 csum_error:
1277 reason = SKB_DROP_REASON_ICMP_CSUM;
1278 __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
1279 error:
1280 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
1281 goto drop;
1282 }
1283
1284 static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
1285 {
1286 struct icmp_extobj_hdr *objh, _objh;
1287 struct icmp_ext_hdr *exth, _exth;
1288 u16 olen;
1289
1290 exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth);
1291 if (!exth)
1292 return false;
1293 if (exth->version != 2)
1294 return true;
1295
1296 if (exth->checksum &&
1297 csum_fold(skb_checksum(skb, off, skb->len - off, 0)))
1298 return false;
1299
1300 off += sizeof(_exth);
1301 while (off < skb->len) {
1302 objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh);
1303 if (!objh)
1304 return false;
1305
1306 olen = ntohs(objh->length);
1307 if (olen < sizeof(_objh))
1308 return false;
1309
1310 off += olen;
1311 if (off > skb->len)
1312 return false;
1313 }
1314
1315 return true;
1316 }
1317
1318 void ip_icmp_error_rfc4884(const struct sk_buff *skb,
1319 struct sock_ee_data_rfc4884 *out,
1320 int thlen, int off)
1321 {
1322 int hlen;
1323
1324
1325 hlen = -skb_transport_offset(skb) - thlen;
1326
1327
1328 if (off < 128 || off < hlen)
1329 return;
1330
1331
1332 off -= hlen;
1333 if (off + sizeof(struct icmp_ext_hdr) > skb->len)
1334 return;
1335
1336 out->len = off;
1337
1338 if (!ip_icmp_error_rfc4884_validate(skb, off))
1339 out->flags |= SO_EE_RFC4884_FLAG_INVALID;
1340 }
1341 EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
1342
1343 int icmp_err(struct sk_buff *skb, u32 info)
1344 {
1345 struct iphdr *iph = (struct iphdr *)skb->data;
1346 int offset = iph->ihl<<2;
1347 struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
1348 int type = icmp_hdr(skb)->type;
1349 int code = icmp_hdr(skb)->code;
1350 struct net *net = dev_net(skb->dev);
1351
1352
1353
1354
1355
1356 if (icmph->type != ICMP_ECHOREPLY) {
1357 ping_err(skb, offset, info);
1358 return 0;
1359 }
1360
1361 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
1362 ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
1363 else if (type == ICMP_REDIRECT)
1364 ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
1365
1366 return 0;
1367 }
1368
1369
1370
1371
1372 static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1373 [ICMP_ECHOREPLY] = {
1374 .handler = ping_rcv,
1375 },
1376 [1] = {
1377 .handler = icmp_discard,
1378 .error = 1,
1379 },
1380 [2] = {
1381 .handler = icmp_discard,
1382 .error = 1,
1383 },
1384 [ICMP_DEST_UNREACH] = {
1385 .handler = icmp_unreach,
1386 .error = 1,
1387 },
1388 [ICMP_SOURCE_QUENCH] = {
1389 .handler = icmp_unreach,
1390 .error = 1,
1391 },
1392 [ICMP_REDIRECT] = {
1393 .handler = icmp_redirect,
1394 .error = 1,
1395 },
1396 [6] = {
1397 .handler = icmp_discard,
1398 .error = 1,
1399 },
1400 [7] = {
1401 .handler = icmp_discard,
1402 .error = 1,
1403 },
1404 [ICMP_ECHO] = {
1405 .handler = icmp_echo,
1406 },
1407 [9] = {
1408 .handler = icmp_discard,
1409 .error = 1,
1410 },
1411 [10] = {
1412 .handler = icmp_discard,
1413 .error = 1,
1414 },
1415 [ICMP_TIME_EXCEEDED] = {
1416 .handler = icmp_unreach,
1417 .error = 1,
1418 },
1419 [ICMP_PARAMETERPROB] = {
1420 .handler = icmp_unreach,
1421 .error = 1,
1422 },
1423 [ICMP_TIMESTAMP] = {
1424 .handler = icmp_timestamp,
1425 },
1426 [ICMP_TIMESTAMPREPLY] = {
1427 .handler = icmp_discard,
1428 },
1429 [ICMP_INFO_REQUEST] = {
1430 .handler = icmp_discard,
1431 },
1432 [ICMP_INFO_REPLY] = {
1433 .handler = icmp_discard,
1434 },
1435 [ICMP_ADDRESS] = {
1436 .handler = icmp_discard,
1437 },
1438 [ICMP_ADDRESSREPLY] = {
1439 .handler = icmp_discard,
1440 },
1441 };
1442
1443 static int __net_init icmp_sk_init(struct net *net)
1444 {
1445
1446 net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1447 net->ipv4.sysctl_icmp_echo_enable_probe = 0;
1448 net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1449
1450
1451 net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465 net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1466 net->ipv4.sysctl_icmp_ratemask = 0x1818;
1467 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1468
1469 return 0;
1470 }
1471
1472 static struct pernet_operations __net_initdata icmp_sk_ops = {
1473 .init = icmp_sk_init,
1474 };
1475
1476 int __init icmp_init(void)
1477 {
1478 int err, i;
1479
1480 for_each_possible_cpu(i) {
1481 struct sock *sk;
1482
1483 err = inet_ctl_sock_create(&sk, PF_INET,
1484 SOCK_RAW, IPPROTO_ICMP, &init_net);
1485 if (err < 0)
1486 return err;
1487
1488 per_cpu(ipv4_icmp_sk, i) = sk;
1489
1490
1491
1492
1493 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1494
1495
1496
1497
1498 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1499 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1500 }
1501 return register_pernet_subsys(&icmp_sk_ops);
1502 }