Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the  BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  *      RAW - implementation of IP "raw" sockets.
0008  *
0009  * Authors: Ross Biro
0010  *      Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
0011  *
0012  * Fixes:
0013  *      Alan Cox    :   verify_area() fixed up
0014  *      Alan Cox    :   ICMP error handling
0015  *      Alan Cox    :   EMSGSIZE if you send too big a packet
0016  *      Alan Cox    :   Now uses generic datagrams and shared
0017  *                  skbuff library. No more peek crashes,
0018  *                  no more backlogs
0019  *      Alan Cox    :   Checks sk->broadcast.
0020  *      Alan Cox    :   Uses skb_free_datagram/skb_copy_datagram
0021  *      Alan Cox    :   Raw passes ip options too
0022  *      Alan Cox    :   Setsocketopt added
0023  *      Alan Cox    :   Fixed error return for broadcasts
0024  *      Alan Cox    :   Removed wake_up calls
0025  *      Alan Cox    :   Use ttl/tos
0026  *      Alan Cox    :   Cleaned up old debugging
0027  *      Alan Cox    :   Use new kernel side addresses
0028  *  Arnt Gulbrandsen    :   Fixed MSG_DONTROUTE in raw sockets.
0029  *      Alan Cox    :   BSD style RAW socket demultiplexing.
0030  *      Alan Cox    :   Beginnings of mrouted support.
0031  *      Alan Cox    :   Added IP_HDRINCL option.
0032  *      Alan Cox    :   Skip broadcast check if BSDism set.
0033  *      David S. Miller :   New socket lookup architecture.
0034  */
0035 
0036 #include <linux/types.h>
0037 #include <linux/atomic.h>
0038 #include <asm/byteorder.h>
0039 #include <asm/current.h>
0040 #include <linux/uaccess.h>
0041 #include <asm/ioctls.h>
0042 #include <linux/stddef.h>
0043 #include <linux/slab.h>
0044 #include <linux/errno.h>
0045 #include <linux/kernel.h>
0046 #include <linux/export.h>
0047 #include <linux/spinlock.h>
0048 #include <linux/sockios.h>
0049 #include <linux/socket.h>
0050 #include <linux/in.h>
0051 #include <linux/mroute.h>
0052 #include <linux/netdevice.h>
0053 #include <linux/in_route.h>
0054 #include <linux/route.h>
0055 #include <linux/skbuff.h>
0056 #include <linux/igmp.h>
0057 #include <net/net_namespace.h>
0058 #include <net/dst.h>
0059 #include <net/sock.h>
0060 #include <linux/ip.h>
0061 #include <linux/net.h>
0062 #include <net/ip.h>
0063 #include <net/icmp.h>
0064 #include <net/udp.h>
0065 #include <net/raw.h>
0066 #include <net/snmp.h>
0067 #include <net/tcp_states.h>
0068 #include <net/inet_common.h>
0069 #include <net/checksum.h>
0070 #include <net/xfrm.h>
0071 #include <linux/rtnetlink.h>
0072 #include <linux/proc_fs.h>
0073 #include <linux/seq_file.h>
0074 #include <linux/netfilter.h>
0075 #include <linux/netfilter_ipv4.h>
0076 #include <linux/compat.h>
0077 #include <linux/uio.h>
0078 
0079 struct raw_frag_vec {
0080     struct msghdr *msg;
0081     union {
0082         struct icmphdr icmph;
0083         char c[1];
0084     } hdr;
0085     int hlen;
0086 };
0087 
0088 struct raw_hashinfo raw_v4_hashinfo;
0089 EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
0090 
0091 int raw_hash_sk(struct sock *sk)
0092 {
0093     struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
0094     struct hlist_nulls_head *hlist;
0095 
0096     hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
0097 
0098     spin_lock(&h->lock);
0099     __sk_nulls_add_node_rcu(sk, hlist);
0100     sock_set_flag(sk, SOCK_RCU_FREE);
0101     spin_unlock(&h->lock);
0102     sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
0103 
0104     return 0;
0105 }
0106 EXPORT_SYMBOL_GPL(raw_hash_sk);
0107 
0108 void raw_unhash_sk(struct sock *sk)
0109 {
0110     struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
0111 
0112     spin_lock(&h->lock);
0113     if (__sk_nulls_del_node_init_rcu(sk))
0114         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
0115     spin_unlock(&h->lock);
0116 }
0117 EXPORT_SYMBOL_GPL(raw_unhash_sk);
0118 
0119 bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
0120           __be32 raddr, __be32 laddr, int dif, int sdif)
0121 {
0122     struct inet_sock *inet = inet_sk(sk);
0123 
0124     if (net_eq(sock_net(sk), net) && inet->inet_num == num  &&
0125         !(inet->inet_daddr && inet->inet_daddr != raddr)    &&
0126         !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
0127         raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
0128         return true;
0129     return false;
0130 }
0131 EXPORT_SYMBOL_GPL(raw_v4_match);
0132 
0133 /*
0134  *  0 - deliver
0135  *  1 - block
0136  */
0137 static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
0138 {
0139     struct icmphdr _hdr;
0140     const struct icmphdr *hdr;
0141 
0142     hdr = skb_header_pointer(skb, skb_transport_offset(skb),
0143                  sizeof(_hdr), &_hdr);
0144     if (!hdr)
0145         return 1;
0146 
0147     if (hdr->type < 32) {
0148         __u32 data = raw_sk(sk)->filter.data;
0149 
0150         return ((1U << hdr->type) & data) != 0;
0151     }
0152 
0153     /* Do not block unknown ICMP types */
0154     return 0;
0155 }
0156 
0157 /* IP input processing comes here for RAW socket delivery.
0158  * Caller owns SKB, so we must make clones.
0159  *
0160  * RFC 1122: SHOULD pass TOS value up to the transport layer.
0161  * -> It does. And not only TOS, but all IP header.
0162  */
0163 static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
0164 {
0165     struct net *net = dev_net(skb->dev);
0166     struct hlist_nulls_head *hlist;
0167     struct hlist_nulls_node *hnode;
0168     int sdif = inet_sdif(skb);
0169     int dif = inet_iif(skb);
0170     int delivered = 0;
0171     struct sock *sk;
0172 
0173     hlist = &raw_v4_hashinfo.ht[hash];
0174     rcu_read_lock();
0175     sk_nulls_for_each(sk, hnode, hlist) {
0176         if (!raw_v4_match(net, sk, iph->protocol,
0177                   iph->saddr, iph->daddr, dif, sdif))
0178             continue;
0179         delivered = 1;
0180         if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
0181             ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
0182                    skb->dev->ifindex, sdif)) {
0183             struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
0184 
0185             /* Not releasing hash table! */
0186             if (clone)
0187                 raw_rcv(sk, clone);
0188         }
0189     }
0190     rcu_read_unlock();
0191     return delivered;
0192 }
0193 
0194 int raw_local_deliver(struct sk_buff *skb, int protocol)
0195 {
0196     int hash = protocol & (RAW_HTABLE_SIZE - 1);
0197 
0198     return raw_v4_input(skb, ip_hdr(skb), hash);
0199 }
0200 
0201 static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
0202 {
0203     struct inet_sock *inet = inet_sk(sk);
0204     const int type = icmp_hdr(skb)->type;
0205     const int code = icmp_hdr(skb)->code;
0206     int err = 0;
0207     int harderr = 0;
0208 
0209     if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
0210         ipv4_sk_update_pmtu(skb, sk, info);
0211     else if (type == ICMP_REDIRECT) {
0212         ipv4_sk_redirect(skb, sk);
0213         return;
0214     }
0215 
0216     /* Report error on raw socket, if:
0217        1. User requested ip_recverr.
0218        2. Socket is connected (otherwise the error indication
0219           is useless without ip_recverr and error is hard.
0220      */
0221     if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
0222         return;
0223 
0224     switch (type) {
0225     default:
0226     case ICMP_TIME_EXCEEDED:
0227         err = EHOSTUNREACH;
0228         break;
0229     case ICMP_SOURCE_QUENCH:
0230         return;
0231     case ICMP_PARAMETERPROB:
0232         err = EPROTO;
0233         harderr = 1;
0234         break;
0235     case ICMP_DEST_UNREACH:
0236         err = EHOSTUNREACH;
0237         if (code > NR_ICMP_UNREACH)
0238             break;
0239         if (code == ICMP_FRAG_NEEDED) {
0240             harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
0241             err = EMSGSIZE;
0242         } else {
0243             err = icmp_err_convert[code].errno;
0244             harderr = icmp_err_convert[code].fatal;
0245         }
0246     }
0247 
0248     if (inet->recverr) {
0249         const struct iphdr *iph = (const struct iphdr *)skb->data;
0250         u8 *payload = skb->data + (iph->ihl << 2);
0251 
0252         if (inet->hdrincl)
0253             payload = skb->data;
0254         ip_icmp_error(sk, skb, err, 0, info, payload);
0255     }
0256 
0257     if (inet->recverr || harderr) {
0258         sk->sk_err = err;
0259         sk_error_report(sk);
0260     }
0261 }
0262 
0263 void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
0264 {
0265     struct net *net = dev_net(skb->dev);
0266     struct hlist_nulls_head *hlist;
0267     struct hlist_nulls_node *hnode;
0268     int dif = skb->dev->ifindex;
0269     int sdif = inet_sdif(skb);
0270     const struct iphdr *iph;
0271     struct sock *sk;
0272     int hash;
0273 
0274     hash = protocol & (RAW_HTABLE_SIZE - 1);
0275     hlist = &raw_v4_hashinfo.ht[hash];
0276 
0277     rcu_read_lock();
0278     sk_nulls_for_each(sk, hnode, hlist) {
0279         iph = (const struct iphdr *)skb->data;
0280         if (!raw_v4_match(net, sk, iph->protocol,
0281                   iph->daddr, iph->saddr, dif, sdif))
0282             continue;
0283         raw_err(sk, skb, info);
0284     }
0285     rcu_read_unlock();
0286 }
0287 
0288 static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
0289 {
0290     /* Charge it to the socket. */
0291 
0292     ipv4_pktinfo_prepare(sk, skb);
0293     if (sock_queue_rcv_skb(sk, skb) < 0) {
0294         kfree_skb(skb);
0295         return NET_RX_DROP;
0296     }
0297 
0298     return NET_RX_SUCCESS;
0299 }
0300 
0301 int raw_rcv(struct sock *sk, struct sk_buff *skb)
0302 {
0303     if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
0304         atomic_inc(&sk->sk_drops);
0305         kfree_skb(skb);
0306         return NET_RX_DROP;
0307     }
0308     nf_reset_ct(skb);
0309 
0310     skb_push(skb, skb->data - skb_network_header(skb));
0311 
0312     raw_rcv_skb(sk, skb);
0313     return 0;
0314 }
0315 
0316 static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
0317                struct msghdr *msg, size_t length,
0318                struct rtable **rtp, unsigned int flags,
0319                const struct sockcm_cookie *sockc)
0320 {
0321     struct inet_sock *inet = inet_sk(sk);
0322     struct net *net = sock_net(sk);
0323     struct iphdr *iph;
0324     struct sk_buff *skb;
0325     unsigned int iphlen;
0326     int err;
0327     struct rtable *rt = *rtp;
0328     int hlen, tlen;
0329 
0330     if (length > rt->dst.dev->mtu) {
0331         ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
0332                    rt->dst.dev->mtu);
0333         return -EMSGSIZE;
0334     }
0335     if (length < sizeof(struct iphdr))
0336         return -EINVAL;
0337 
0338     if (flags&MSG_PROBE)
0339         goto out;
0340 
0341     hlen = LL_RESERVED_SPACE(rt->dst.dev);
0342     tlen = rt->dst.dev->needed_tailroom;
0343     skb = sock_alloc_send_skb(sk,
0344                   length + hlen + tlen + 15,
0345                   flags & MSG_DONTWAIT, &err);
0346     if (!skb)
0347         goto error;
0348     skb_reserve(skb, hlen);
0349 
0350     skb->priority = sk->sk_priority;
0351     skb->mark = sockc->mark;
0352     skb->tstamp = sockc->transmit_time;
0353     skb_dst_set(skb, &rt->dst);
0354     *rtp = NULL;
0355 
0356     skb_reset_network_header(skb);
0357     iph = ip_hdr(skb);
0358     skb_put(skb, length);
0359 
0360     skb->ip_summed = CHECKSUM_NONE;
0361 
0362     skb_setup_tx_timestamp(skb, sockc->tsflags);
0363 
0364     if (flags & MSG_CONFIRM)
0365         skb_set_dst_pending_confirm(skb, 1);
0366 
0367     skb->transport_header = skb->network_header;
0368     err = -EFAULT;
0369     if (memcpy_from_msg(iph, msg, length))
0370         goto error_free;
0371 
0372     iphlen = iph->ihl * 4;
0373 
0374     /*
0375      * We don't want to modify the ip header, but we do need to
0376      * be sure that it won't cause problems later along the network
0377      * stack.  Specifically we want to make sure that iph->ihl is a
0378      * sane value.  If ihl points beyond the length of the buffer passed
0379      * in, reject the frame as invalid
0380      */
0381     err = -EINVAL;
0382     if (iphlen > length)
0383         goto error_free;
0384 
0385     if (iphlen >= sizeof(*iph)) {
0386         if (!iph->saddr)
0387             iph->saddr = fl4->saddr;
0388         iph->check   = 0;
0389         iph->tot_len = htons(length);
0390         if (!iph->id)
0391             ip_select_ident(net, skb, NULL);
0392 
0393         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
0394         skb->transport_header += iphlen;
0395         if (iph->protocol == IPPROTO_ICMP &&
0396             length >= iphlen + sizeof(struct icmphdr))
0397             icmp_out_count(net, ((struct icmphdr *)
0398                 skb_transport_header(skb))->type);
0399     }
0400 
0401     err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
0402               net, sk, skb, NULL, rt->dst.dev,
0403               dst_output);
0404     if (err > 0)
0405         err = net_xmit_errno(err);
0406     if (err)
0407         goto error;
0408 out:
0409     return 0;
0410 
0411 error_free:
0412     kfree_skb(skb);
0413 error:
0414     IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
0415     if (err == -ENOBUFS && !inet->recverr)
0416         err = 0;
0417     return err;
0418 }
0419 
0420 static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4)
0421 {
0422     int err;
0423 
0424     if (fl4->flowi4_proto != IPPROTO_ICMP)
0425         return 0;
0426 
0427     /* We only need the first two bytes. */
0428     rfv->hlen = 2;
0429 
0430     err = memcpy_from_msg(rfv->hdr.c, rfv->msg, rfv->hlen);
0431     if (err)
0432         return err;
0433 
0434     fl4->fl4_icmp_type = rfv->hdr.icmph.type;
0435     fl4->fl4_icmp_code = rfv->hdr.icmph.code;
0436 
0437     return 0;
0438 }
0439 
0440 static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
0441                struct sk_buff *skb)
0442 {
0443     struct raw_frag_vec *rfv = from;
0444 
0445     if (offset < rfv->hlen) {
0446         int copy = min(rfv->hlen - offset, len);
0447 
0448         if (skb->ip_summed == CHECKSUM_PARTIAL)
0449             memcpy(to, rfv->hdr.c + offset, copy);
0450         else
0451             skb->csum = csum_block_add(
0452                 skb->csum,
0453                 csum_partial_copy_nocheck(rfv->hdr.c + offset,
0454                               to, copy),
0455                 odd);
0456 
0457         odd = 0;
0458         offset += copy;
0459         to += copy;
0460         len -= copy;
0461 
0462         if (!len)
0463             return 0;
0464     }
0465 
0466     offset -= rfv->hlen;
0467 
0468     return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
0469 }
0470 
0471 static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
0472 {
0473     struct inet_sock *inet = inet_sk(sk);
0474     struct net *net = sock_net(sk);
0475     struct ipcm_cookie ipc;
0476     struct rtable *rt = NULL;
0477     struct flowi4 fl4;
0478     int free = 0;
0479     __be32 daddr;
0480     __be32 saddr;
0481     u8  tos;
0482     int err;
0483     struct ip_options_data opt_copy;
0484     struct raw_frag_vec rfv;
0485     int hdrincl;
0486 
0487     err = -EMSGSIZE;
0488     if (len > 0xFFFF)
0489         goto out;
0490 
0491     /* hdrincl should be READ_ONCE(inet->hdrincl)
0492      * but READ_ONCE() doesn't work with bit fields.
0493      * Doing this indirectly yields the same result.
0494      */
0495     hdrincl = inet->hdrincl;
0496     hdrincl = READ_ONCE(hdrincl);
0497     /*
0498      *  Check the flags.
0499      */
0500 
0501     err = -EOPNOTSUPP;
0502     if (msg->msg_flags & MSG_OOB)   /* Mirror BSD error message */
0503         goto out;               /* compatibility */
0504 
0505     /*
0506      *  Get and verify the address.
0507      */
0508 
0509     if (msg->msg_namelen) {
0510         DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
0511         err = -EINVAL;
0512         if (msg->msg_namelen < sizeof(*usin))
0513             goto out;
0514         if (usin->sin_family != AF_INET) {
0515             pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n",
0516                      __func__, current->comm);
0517             err = -EAFNOSUPPORT;
0518             if (usin->sin_family)
0519                 goto out;
0520         }
0521         daddr = usin->sin_addr.s_addr;
0522         /* ANK: I did not forget to get protocol from port field.
0523          * I just do not know, who uses this weirdness.
0524          * IP_HDRINCL is much more convenient.
0525          */
0526     } else {
0527         err = -EDESTADDRREQ;
0528         if (sk->sk_state != TCP_ESTABLISHED)
0529             goto out;
0530         daddr = inet->inet_daddr;
0531     }
0532 
0533     ipcm_init_sk(&ipc, inet);
0534 
0535     if (msg->msg_controllen) {
0536         err = ip_cmsg_send(sk, msg, &ipc, false);
0537         if (unlikely(err)) {
0538             kfree(ipc.opt);
0539             goto out;
0540         }
0541         if (ipc.opt)
0542             free = 1;
0543     }
0544 
0545     saddr = ipc.addr;
0546     ipc.addr = daddr;
0547 
0548     if (!ipc.opt) {
0549         struct ip_options_rcu *inet_opt;
0550 
0551         rcu_read_lock();
0552         inet_opt = rcu_dereference(inet->inet_opt);
0553         if (inet_opt) {
0554             memcpy(&opt_copy, inet_opt,
0555                    sizeof(*inet_opt) + inet_opt->opt.optlen);
0556             ipc.opt = &opt_copy.opt;
0557         }
0558         rcu_read_unlock();
0559     }
0560 
0561     if (ipc.opt) {
0562         err = -EINVAL;
0563         /* Linux does not mangle headers on raw sockets,
0564          * so that IP options + IP_HDRINCL is non-sense.
0565          */
0566         if (hdrincl)
0567             goto done;
0568         if (ipc.opt->opt.srr) {
0569             if (!daddr)
0570                 goto done;
0571             daddr = ipc.opt->opt.faddr;
0572         }
0573     }
0574     tos = get_rtconn_flags(&ipc, sk);
0575     if (msg->msg_flags & MSG_DONTROUTE)
0576         tos |= RTO_ONLINK;
0577 
0578     if (ipv4_is_multicast(daddr)) {
0579         if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
0580             ipc.oif = inet->mc_index;
0581         if (!saddr)
0582             saddr = inet->mc_addr;
0583     } else if (!ipc.oif) {
0584         ipc.oif = inet->uc_index;
0585     } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
0586         /* oif is set, packet is to local broadcast
0587          * and uc_index is set. oif is most likely set
0588          * by sk_bound_dev_if. If uc_index != oif check if the
0589          * oif is an L3 master and uc_index is an L3 slave.
0590          * If so, we want to allow the send using the uc_index.
0591          */
0592         if (ipc.oif != inet->uc_index &&
0593             ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
0594                                   inet->uc_index)) {
0595             ipc.oif = inet->uc_index;
0596         }
0597     }
0598 
0599     flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
0600                RT_SCOPE_UNIVERSE,
0601                hdrincl ? IPPROTO_RAW : sk->sk_protocol,
0602                inet_sk_flowi_flags(sk) |
0603                 (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
0604                daddr, saddr, 0, 0, sk->sk_uid);
0605 
0606     if (!hdrincl) {
0607         rfv.msg = msg;
0608         rfv.hlen = 0;
0609 
0610         err = raw_probe_proto_opt(&rfv, &fl4);
0611         if (err)
0612             goto done;
0613     }
0614 
0615     security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4));
0616     rt = ip_route_output_flow(net, &fl4, sk);
0617     if (IS_ERR(rt)) {
0618         err = PTR_ERR(rt);
0619         rt = NULL;
0620         goto done;
0621     }
0622 
0623     err = -EACCES;
0624     if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
0625         goto done;
0626 
0627     if (msg->msg_flags & MSG_CONFIRM)
0628         goto do_confirm;
0629 back_from_confirm:
0630 
0631     if (hdrincl)
0632         err = raw_send_hdrinc(sk, &fl4, msg, len,
0633                       &rt, msg->msg_flags, &ipc.sockc);
0634 
0635      else {
0636         if (!ipc.addr)
0637             ipc.addr = fl4.daddr;
0638         lock_sock(sk);
0639         err = ip_append_data(sk, &fl4, raw_getfrag,
0640                      &rfv, len, 0,
0641                      &ipc, &rt, msg->msg_flags);
0642         if (err)
0643             ip_flush_pending_frames(sk);
0644         else if (!(msg->msg_flags & MSG_MORE)) {
0645             err = ip_push_pending_frames(sk, &fl4);
0646             if (err == -ENOBUFS && !inet->recverr)
0647                 err = 0;
0648         }
0649         release_sock(sk);
0650     }
0651 done:
0652     if (free)
0653         kfree(ipc.opt);
0654     ip_rt_put(rt);
0655 
0656 out:
0657     if (err < 0)
0658         return err;
0659     return len;
0660 
0661 do_confirm:
0662     if (msg->msg_flags & MSG_PROBE)
0663         dst_confirm_neigh(&rt->dst, &fl4.daddr);
0664     if (!(msg->msg_flags & MSG_PROBE) || len)
0665         goto back_from_confirm;
0666     err = 0;
0667     goto done;
0668 }
0669 
0670 static void raw_close(struct sock *sk, long timeout)
0671 {
0672     /*
0673      * Raw sockets may have direct kernel references. Kill them.
0674      */
0675     ip_ra_control(sk, 0, NULL);
0676 
0677     sk_common_release(sk);
0678 }
0679 
0680 static void raw_destroy(struct sock *sk)
0681 {
0682     lock_sock(sk);
0683     ip_flush_pending_frames(sk);
0684     release_sock(sk);
0685 }
0686 
0687 /* This gets rid of all the nasties in af_inet. -DaveM */
0688 static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
0689 {
0690     struct inet_sock *inet = inet_sk(sk);
0691     struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
0692     struct net *net = sock_net(sk);
0693     u32 tb_id = RT_TABLE_LOCAL;
0694     int ret = -EINVAL;
0695     int chk_addr_ret;
0696 
0697     lock_sock(sk);
0698     if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
0699         goto out;
0700 
0701     if (sk->sk_bound_dev_if)
0702         tb_id = l3mdev_fib_table_by_index(net,
0703                           sk->sk_bound_dev_if) ? : tb_id;
0704 
0705     chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
0706 
0707     ret = -EADDRNOTAVAIL;
0708     if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr,
0709                      chk_addr_ret))
0710         goto out;
0711 
0712     inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
0713     if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
0714         inet->inet_saddr = 0;  /* Use device */
0715     sk_dst_reset(sk);
0716     ret = 0;
0717 out:
0718     release_sock(sk);
0719     return ret;
0720 }
0721 
0722 /*
0723  *  This should be easy, if there is something there
0724  *  we return it, otherwise we block.
0725  */
0726 
0727 static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
0728                int flags, int *addr_len)
0729 {
0730     struct inet_sock *inet = inet_sk(sk);
0731     size_t copied = 0;
0732     int err = -EOPNOTSUPP;
0733     DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
0734     struct sk_buff *skb;
0735 
0736     if (flags & MSG_OOB)
0737         goto out;
0738 
0739     if (flags & MSG_ERRQUEUE) {
0740         err = ip_recv_error(sk, msg, len, addr_len);
0741         goto out;
0742     }
0743 
0744     skb = skb_recv_datagram(sk, flags, &err);
0745     if (!skb)
0746         goto out;
0747 
0748     copied = skb->len;
0749     if (len < copied) {
0750         msg->msg_flags |= MSG_TRUNC;
0751         copied = len;
0752     }
0753 
0754     err = skb_copy_datagram_msg(skb, 0, msg, copied);
0755     if (err)
0756         goto done;
0757 
0758     sock_recv_cmsgs(msg, sk, skb);
0759 
0760     /* Copy the address. */
0761     if (sin) {
0762         sin->sin_family = AF_INET;
0763         sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
0764         sin->sin_port = 0;
0765         memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
0766         *addr_len = sizeof(*sin);
0767     }
0768     if (inet->cmsg_flags)
0769         ip_cmsg_recv(msg, skb);
0770     if (flags & MSG_TRUNC)
0771         copied = skb->len;
0772 done:
0773     skb_free_datagram(sk, skb);
0774 out:
0775     if (err)
0776         return err;
0777     return copied;
0778 }
0779 
0780 static int raw_sk_init(struct sock *sk)
0781 {
0782     struct raw_sock *rp = raw_sk(sk);
0783 
0784     if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
0785         memset(&rp->filter, 0, sizeof(rp->filter));
0786     return 0;
0787 }
0788 
0789 static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen)
0790 {
0791     if (optlen > sizeof(struct icmp_filter))
0792         optlen = sizeof(struct icmp_filter);
0793     if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen))
0794         return -EFAULT;
0795     return 0;
0796 }
0797 
0798 static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen)
0799 {
0800     int len, ret = -EFAULT;
0801 
0802     if (get_user(len, optlen))
0803         goto out;
0804     ret = -EINVAL;
0805     if (len < 0)
0806         goto out;
0807     if (len > sizeof(struct icmp_filter))
0808         len = sizeof(struct icmp_filter);
0809     ret = -EFAULT;
0810     if (put_user(len, optlen) ||
0811         copy_to_user(optval, &raw_sk(sk)->filter, len))
0812         goto out;
0813     ret = 0;
0814 out:    return ret;
0815 }
0816 
0817 static int do_raw_setsockopt(struct sock *sk, int level, int optname,
0818                  sockptr_t optval, unsigned int optlen)
0819 {
0820     if (optname == ICMP_FILTER) {
0821         if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
0822             return -EOPNOTSUPP;
0823         else
0824             return raw_seticmpfilter(sk, optval, optlen);
0825     }
0826     return -ENOPROTOOPT;
0827 }
0828 
0829 static int raw_setsockopt(struct sock *sk, int level, int optname,
0830               sockptr_t optval, unsigned int optlen)
0831 {
0832     if (level != SOL_RAW)
0833         return ip_setsockopt(sk, level, optname, optval, optlen);
0834     return do_raw_setsockopt(sk, level, optname, optval, optlen);
0835 }
0836 
0837 static int do_raw_getsockopt(struct sock *sk, int level, int optname,
0838               char __user *optval, int __user *optlen)
0839 {
0840     if (optname == ICMP_FILTER) {
0841         if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
0842             return -EOPNOTSUPP;
0843         else
0844             return raw_geticmpfilter(sk, optval, optlen);
0845     }
0846     return -ENOPROTOOPT;
0847 }
0848 
0849 static int raw_getsockopt(struct sock *sk, int level, int optname,
0850               char __user *optval, int __user *optlen)
0851 {
0852     if (level != SOL_RAW)
0853         return ip_getsockopt(sk, level, optname, optval, optlen);
0854     return do_raw_getsockopt(sk, level, optname, optval, optlen);
0855 }
0856 
0857 static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
0858 {
0859     switch (cmd) {
0860     case SIOCOUTQ: {
0861         int amount = sk_wmem_alloc_get(sk);
0862 
0863         return put_user(amount, (int __user *)arg);
0864     }
0865     case SIOCINQ: {
0866         struct sk_buff *skb;
0867         int amount = 0;
0868 
0869         spin_lock_bh(&sk->sk_receive_queue.lock);
0870         skb = skb_peek(&sk->sk_receive_queue);
0871         if (skb)
0872             amount = skb->len;
0873         spin_unlock_bh(&sk->sk_receive_queue.lock);
0874         return put_user(amount, (int __user *)arg);
0875     }
0876 
0877     default:
0878 #ifdef CONFIG_IP_MROUTE
0879         return ipmr_ioctl(sk, cmd, (void __user *)arg);
0880 #else
0881         return -ENOIOCTLCMD;
0882 #endif
0883     }
0884 }
0885 
0886 #ifdef CONFIG_COMPAT
0887 static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
0888 {
0889     switch (cmd) {
0890     case SIOCOUTQ:
0891     case SIOCINQ:
0892         return -ENOIOCTLCMD;
0893     default:
0894 #ifdef CONFIG_IP_MROUTE
0895         return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));
0896 #else
0897         return -ENOIOCTLCMD;
0898 #endif
0899     }
0900 }
0901 #endif
0902 
0903 int raw_abort(struct sock *sk, int err)
0904 {
0905     lock_sock(sk);
0906 
0907     sk->sk_err = err;
0908     sk_error_report(sk);
0909     __udp_disconnect(sk, 0);
0910 
0911     release_sock(sk);
0912 
0913     return 0;
0914 }
0915 EXPORT_SYMBOL_GPL(raw_abort);
0916 
0917 struct proto raw_prot = {
0918     .name          = "RAW",
0919     .owner         = THIS_MODULE,
0920     .close         = raw_close,
0921     .destroy       = raw_destroy,
0922     .connect       = ip4_datagram_connect,
0923     .disconnect    = __udp_disconnect,
0924     .ioctl         = raw_ioctl,
0925     .init          = raw_sk_init,
0926     .setsockopt    = raw_setsockopt,
0927     .getsockopt    = raw_getsockopt,
0928     .sendmsg       = raw_sendmsg,
0929     .recvmsg       = raw_recvmsg,
0930     .bind          = raw_bind,
0931     .backlog_rcv       = raw_rcv_skb,
0932     .release_cb    = ip4_datagram_release_cb,
0933     .hash          = raw_hash_sk,
0934     .unhash        = raw_unhash_sk,
0935     .obj_size      = sizeof(struct raw_sock),
0936     .useroffset    = offsetof(struct raw_sock, filter),
0937     .usersize      = sizeof_field(struct raw_sock, filter),
0938     .h.raw_hash    = &raw_v4_hashinfo,
0939 #ifdef CONFIG_COMPAT
0940     .compat_ioctl      = compat_raw_ioctl,
0941 #endif
0942     .diag_destroy      = raw_abort,
0943 };
0944 
0945 #ifdef CONFIG_PROC_FS
0946 static struct sock *raw_get_first(struct seq_file *seq, int bucket)
0947 {
0948     struct raw_hashinfo *h = pde_data(file_inode(seq->file));
0949     struct raw_iter_state *state = raw_seq_private(seq);
0950     struct hlist_nulls_head *hlist;
0951     struct hlist_nulls_node *hnode;
0952     struct sock *sk;
0953 
0954     for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE;
0955             ++state->bucket) {
0956         hlist = &h->ht[state->bucket];
0957         sk_nulls_for_each(sk, hnode, hlist) {
0958             if (sock_net(sk) == seq_file_net(seq))
0959                 return sk;
0960         }
0961     }
0962     return NULL;
0963 }
0964 
0965 static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
0966 {
0967     struct raw_iter_state *state = raw_seq_private(seq);
0968 
0969     do {
0970         sk = sk_nulls_next(sk);
0971     } while (sk && sock_net(sk) != seq_file_net(seq));
0972 
0973     if (!sk)
0974         return raw_get_first(seq, state->bucket + 1);
0975     return sk;
0976 }
0977 
0978 static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
0979 {
0980     struct sock *sk = raw_get_first(seq, 0);
0981 
0982     if (sk)
0983         while (pos && (sk = raw_get_next(seq, sk)) != NULL)
0984             --pos;
0985     return pos ? NULL : sk;
0986 }
0987 
0988 void *raw_seq_start(struct seq_file *seq, loff_t *pos)
0989     __acquires(RCU)
0990 {
0991     rcu_read_lock();
0992     return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
0993 }
0994 EXPORT_SYMBOL_GPL(raw_seq_start);
0995 
0996 void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0997 {
0998     struct sock *sk;
0999 
1000     if (v == SEQ_START_TOKEN)
1001         sk = raw_get_first(seq, 0);
1002     else
1003         sk = raw_get_next(seq, v);
1004     ++*pos;
1005     return sk;
1006 }
1007 EXPORT_SYMBOL_GPL(raw_seq_next);
1008 
1009 void raw_seq_stop(struct seq_file *seq, void *v)
1010     __releases(RCU)
1011 {
1012     rcu_read_unlock();
1013 }
1014 EXPORT_SYMBOL_GPL(raw_seq_stop);
1015 
1016 static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1017 {
1018     struct inet_sock *inet = inet_sk(sp);
1019     __be32 dest = inet->inet_daddr,
1020            src = inet->inet_rcv_saddr;
1021     __u16 destp = 0,
1022           srcp  = inet->inet_num;
1023 
1024     seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
1025         " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
1026         i, src, srcp, dest, destp, sp->sk_state,
1027         sk_wmem_alloc_get(sp),
1028         sk_rmem_alloc_get(sp),
1029         0, 0L, 0,
1030         from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1031         0, sock_i_ino(sp),
1032         refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1033 }
1034 
1035 static int raw_seq_show(struct seq_file *seq, void *v)
1036 {
1037     if (v == SEQ_START_TOKEN)
1038         seq_printf(seq, "  sl  local_address rem_address   st tx_queue "
1039                 "rx_queue tr tm->when retrnsmt   uid  timeout "
1040                 "inode ref pointer drops\n");
1041     else
1042         raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1043     return 0;
1044 }
1045 
1046 static const struct seq_operations raw_seq_ops = {
1047     .start = raw_seq_start,
1048     .next  = raw_seq_next,
1049     .stop  = raw_seq_stop,
1050     .show  = raw_seq_show,
1051 };
1052 
1053 static __net_init int raw_init_net(struct net *net)
1054 {
1055     if (!proc_create_net_data("raw", 0444, net->proc_net, &raw_seq_ops,
1056             sizeof(struct raw_iter_state), &raw_v4_hashinfo))
1057         return -ENOMEM;
1058 
1059     return 0;
1060 }
1061 
1062 static __net_exit void raw_exit_net(struct net *net)
1063 {
1064     remove_proc_entry("raw", net->proc_net);
1065 }
1066 
1067 static __net_initdata struct pernet_operations raw_net_ops = {
1068     .init = raw_init_net,
1069     .exit = raw_exit_net,
1070 };
1071 
1072 int __init raw_proc_init(void)
1073 {
1074 
1075     return register_pernet_subsys(&raw_net_ops);
1076 }
1077 
1078 void __init raw_proc_exit(void)
1079 {
1080     unregister_pernet_subsys(&raw_net_ops);
1081 }
1082 #endif /* CONFIG_PROC_FS */
1083 
1084 static void raw_sysctl_init_net(struct net *net)
1085 {
1086 #ifdef CONFIG_NET_L3_MASTER_DEV
1087     net->ipv4.sysctl_raw_l3mdev_accept = 1;
1088 #endif
1089 }
1090 
1091 static int __net_init raw_sysctl_init(struct net *net)
1092 {
1093     raw_sysctl_init_net(net);
1094     return 0;
1095 }
1096 
1097 static struct pernet_operations __net_initdata raw_sysctl_ops = {
1098     .init   = raw_sysctl_init,
1099 };
1100 
1101 void __init raw_init(void)
1102 {
1103     raw_sysctl_init_net(&init_net);
1104     if (register_pernet_subsys(&raw_sysctl_ops))
1105         panic("RAW: failed to init sysctl parameters.\n");
1106 }