Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the  BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  *      The IP to API glue.
0008  *
0009  * Authors: see ip.c
0010  *
0011  * Fixes:
0012  *      Many        :   Split from ip.c , see ip.c for history.
0013  *      Martin Mares    :   TOS setting fixed.
0014  *      Alan Cox    :   Fixed a couple of oopses in Martin's
0015  *                  TOS tweaks.
0016  *      Mike McLagan    :   Routing by source
0017  */
0018 
0019 #include <linux/module.h>
0020 #include <linux/types.h>
0021 #include <linux/mm.h>
0022 #include <linux/skbuff.h>
0023 #include <linux/ip.h>
0024 #include <linux/icmp.h>
0025 #include <linux/inetdevice.h>
0026 #include <linux/netdevice.h>
0027 #include <linux/slab.h>
0028 #include <net/sock.h>
0029 #include <net/ip.h>
0030 #include <net/icmp.h>
0031 #include <net/tcp_states.h>
0032 #include <linux/udp.h>
0033 #include <linux/igmp.h>
0034 #include <linux/netfilter.h>
0035 #include <linux/route.h>
0036 #include <linux/mroute.h>
0037 #include <net/inet_ecn.h>
0038 #include <net/route.h>
0039 #include <net/xfrm.h>
0040 #include <net/compat.h>
0041 #include <net/checksum.h>
0042 #if IS_ENABLED(CONFIG_IPV6)
0043 #include <net/transp_v6.h>
0044 #endif
0045 #include <net/ip_fib.h>
0046 
0047 #include <linux/errqueue.h>
0048 #include <linux/uaccess.h>
0049 
0050 #include <linux/bpfilter.h>
0051 
0052 /*
0053  *  SOL_IP control messages.
0054  */
0055 
0056 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
0057 {
0058     struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
0059 
0060     info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
0061 
0062     put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
0063 }
0064 
0065 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
0066 {
0067     int ttl = ip_hdr(skb)->ttl;
0068     put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
0069 }
0070 
0071 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
0072 {
0073     put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
0074 }
0075 
0076 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
0077 {
0078     if (IPCB(skb)->opt.optlen == 0)
0079         return;
0080 
0081     put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
0082          ip_hdr(skb) + 1);
0083 }
0084 
0085 
0086 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
0087                  struct sk_buff *skb)
0088 {
0089     unsigned char optbuf[sizeof(struct ip_options) + 40];
0090     struct ip_options *opt = (struct ip_options *)optbuf;
0091 
0092     if (IPCB(skb)->opt.optlen == 0)
0093         return;
0094 
0095     if (ip_options_echo(net, opt, skb)) {
0096         msg->msg_flags |= MSG_CTRUNC;
0097         return;
0098     }
0099     ip_options_undo(opt);
0100 
0101     put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
0102 }
0103 
0104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
0105 {
0106     int val;
0107 
0108     if (IPCB(skb)->frag_max_size == 0)
0109         return;
0110 
0111     val = IPCB(skb)->frag_max_size;
0112     put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
0113 }
0114 
0115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
0116                   int tlen, int offset)
0117 {
0118     __wsum csum = skb->csum;
0119 
0120     if (skb->ip_summed != CHECKSUM_COMPLETE)
0121         return;
0122 
0123     if (offset != 0) {
0124         int tend_off = skb_transport_offset(skb) + tlen;
0125         csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
0126     }
0127 
0128     put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
0129 }
0130 
0131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
0132 {
0133     char *secdata;
0134     u32 seclen, secid;
0135     int err;
0136 
0137     err = security_socket_getpeersec_dgram(NULL, skb, &secid);
0138     if (err)
0139         return;
0140 
0141     err = security_secid_to_secctx(secid, &secdata, &seclen);
0142     if (err)
0143         return;
0144 
0145     put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
0146     security_release_secctx(secdata, seclen);
0147 }
0148 
0149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
0150 {
0151     __be16 _ports[2], *ports;
0152     struct sockaddr_in sin;
0153 
0154     /* All current transport protocols have the port numbers in the
0155      * first four bytes of the transport header and this function is
0156      * written with this assumption in mind.
0157      */
0158     ports = skb_header_pointer(skb, skb_transport_offset(skb),
0159                    sizeof(_ports), &_ports);
0160     if (!ports)
0161         return;
0162 
0163     sin.sin_family = AF_INET;
0164     sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
0165     sin.sin_port = ports[1];
0166     memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
0167 
0168     put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
0169 }
0170 
0171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
0172              struct sk_buff *skb, int tlen, int offset)
0173 {
0174     struct inet_sock *inet = inet_sk(sk);
0175     unsigned int flags = inet->cmsg_flags;
0176 
0177     /* Ordered by supposed usage frequency */
0178     if (flags & IP_CMSG_PKTINFO) {
0179         ip_cmsg_recv_pktinfo(msg, skb);
0180 
0181         flags &= ~IP_CMSG_PKTINFO;
0182         if (!flags)
0183             return;
0184     }
0185 
0186     if (flags & IP_CMSG_TTL) {
0187         ip_cmsg_recv_ttl(msg, skb);
0188 
0189         flags &= ~IP_CMSG_TTL;
0190         if (!flags)
0191             return;
0192     }
0193 
0194     if (flags & IP_CMSG_TOS) {
0195         ip_cmsg_recv_tos(msg, skb);
0196 
0197         flags &= ~IP_CMSG_TOS;
0198         if (!flags)
0199             return;
0200     }
0201 
0202     if (flags & IP_CMSG_RECVOPTS) {
0203         ip_cmsg_recv_opts(msg, skb);
0204 
0205         flags &= ~IP_CMSG_RECVOPTS;
0206         if (!flags)
0207             return;
0208     }
0209 
0210     if (flags & IP_CMSG_RETOPTS) {
0211         ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
0212 
0213         flags &= ~IP_CMSG_RETOPTS;
0214         if (!flags)
0215             return;
0216     }
0217 
0218     if (flags & IP_CMSG_PASSSEC) {
0219         ip_cmsg_recv_security(msg, skb);
0220 
0221         flags &= ~IP_CMSG_PASSSEC;
0222         if (!flags)
0223             return;
0224     }
0225 
0226     if (flags & IP_CMSG_ORIGDSTADDR) {
0227         ip_cmsg_recv_dstaddr(msg, skb);
0228 
0229         flags &= ~IP_CMSG_ORIGDSTADDR;
0230         if (!flags)
0231             return;
0232     }
0233 
0234     if (flags & IP_CMSG_CHECKSUM)
0235         ip_cmsg_recv_checksum(msg, skb, tlen, offset);
0236 
0237     if (flags & IP_CMSG_RECVFRAGSIZE)
0238         ip_cmsg_recv_fragsize(msg, skb);
0239 }
0240 EXPORT_SYMBOL(ip_cmsg_recv_offset);
0241 
0242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
0243          bool allow_ipv6)
0244 {
0245     int err, val;
0246     struct cmsghdr *cmsg;
0247     struct net *net = sock_net(sk);
0248 
0249     for_each_cmsghdr(cmsg, msg) {
0250         if (!CMSG_OK(msg, cmsg))
0251             return -EINVAL;
0252 #if IS_ENABLED(CONFIG_IPV6)
0253         if (allow_ipv6 &&
0254             cmsg->cmsg_level == SOL_IPV6 &&
0255             cmsg->cmsg_type == IPV6_PKTINFO) {
0256             struct in6_pktinfo *src_info;
0257 
0258             if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
0259                 return -EINVAL;
0260             src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
0261             if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
0262                 return -EINVAL;
0263             if (src_info->ipi6_ifindex)
0264                 ipc->oif = src_info->ipi6_ifindex;
0265             ipc->addr = src_info->ipi6_addr.s6_addr32[3];
0266             continue;
0267         }
0268 #endif
0269         if (cmsg->cmsg_level == SOL_SOCKET) {
0270             err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
0271             if (err)
0272                 return err;
0273             continue;
0274         }
0275 
0276         if (cmsg->cmsg_level != SOL_IP)
0277             continue;
0278         switch (cmsg->cmsg_type) {
0279         case IP_RETOPTS:
0280             err = cmsg->cmsg_len - sizeof(struct cmsghdr);
0281 
0282             /* Our caller is responsible for freeing ipc->opt */
0283             err = ip_options_get(net, &ipc->opt,
0284                          KERNEL_SOCKPTR(CMSG_DATA(cmsg)),
0285                          err < 40 ? err : 40);
0286             if (err)
0287                 return err;
0288             break;
0289         case IP_PKTINFO:
0290         {
0291             struct in_pktinfo *info;
0292             if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
0293                 return -EINVAL;
0294             info = (struct in_pktinfo *)CMSG_DATA(cmsg);
0295             if (info->ipi_ifindex)
0296                 ipc->oif = info->ipi_ifindex;
0297             ipc->addr = info->ipi_spec_dst.s_addr;
0298             break;
0299         }
0300         case IP_TTL:
0301             if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
0302                 return -EINVAL;
0303             val = *(int *)CMSG_DATA(cmsg);
0304             if (val < 1 || val > 255)
0305                 return -EINVAL;
0306             ipc->ttl = val;
0307             break;
0308         case IP_TOS:
0309             if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
0310                 val = *(int *)CMSG_DATA(cmsg);
0311             else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
0312                 val = *(u8 *)CMSG_DATA(cmsg);
0313             else
0314                 return -EINVAL;
0315             if (val < 0 || val > 255)
0316                 return -EINVAL;
0317             ipc->tos = val;
0318             ipc->priority = rt_tos2priority(ipc->tos);
0319             break;
0320 
0321         default:
0322             return -EINVAL;
0323         }
0324     }
0325     return 0;
0326 }
0327 
0328 static void ip_ra_destroy_rcu(struct rcu_head *head)
0329 {
0330     struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
0331 
0332     sock_put(ra->saved_sk);
0333     kfree(ra);
0334 }
0335 
0336 int ip_ra_control(struct sock *sk, unsigned char on,
0337           void (*destructor)(struct sock *))
0338 {
0339     struct ip_ra_chain *ra, *new_ra;
0340     struct ip_ra_chain __rcu **rap;
0341     struct net *net = sock_net(sk);
0342 
0343     if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
0344         return -EINVAL;
0345 
0346     new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
0347     if (on && !new_ra)
0348         return -ENOMEM;
0349 
0350     mutex_lock(&net->ipv4.ra_mutex);
0351     for (rap = &net->ipv4.ra_chain;
0352          (ra = rcu_dereference_protected(*rap,
0353             lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
0354          rap = &ra->next) {
0355         if (ra->sk == sk) {
0356             if (on) {
0357                 mutex_unlock(&net->ipv4.ra_mutex);
0358                 kfree(new_ra);
0359                 return -EADDRINUSE;
0360             }
0361             /* dont let ip_call_ra_chain() use sk again */
0362             ra->sk = NULL;
0363             RCU_INIT_POINTER(*rap, ra->next);
0364             mutex_unlock(&net->ipv4.ra_mutex);
0365 
0366             if (ra->destructor)
0367                 ra->destructor(sk);
0368             /*
0369              * Delay sock_put(sk) and kfree(ra) after one rcu grace
0370              * period. This guarantee ip_call_ra_chain() dont need
0371              * to mess with socket refcounts.
0372              */
0373             ra->saved_sk = sk;
0374             call_rcu(&ra->rcu, ip_ra_destroy_rcu);
0375             return 0;
0376         }
0377     }
0378     if (!new_ra) {
0379         mutex_unlock(&net->ipv4.ra_mutex);
0380         return -ENOBUFS;
0381     }
0382     new_ra->sk = sk;
0383     new_ra->destructor = destructor;
0384 
0385     RCU_INIT_POINTER(new_ra->next, ra);
0386     rcu_assign_pointer(*rap, new_ra);
0387     sock_hold(sk);
0388     mutex_unlock(&net->ipv4.ra_mutex);
0389 
0390     return 0;
0391 }
0392 
0393 static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb,
0394                     struct sock_ee_data_rfc4884 *out)
0395 {
0396     switch (icmp_hdr(skb)->type) {
0397     case ICMP_DEST_UNREACH:
0398     case ICMP_TIME_EXCEEDED:
0399     case ICMP_PARAMETERPROB:
0400         ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr),
0401                       icmp_hdr(skb)->un.reserved[1] * 4);
0402     }
0403 }
0404 
0405 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
0406            __be16 port, u32 info, u8 *payload)
0407 {
0408     struct sock_exterr_skb *serr;
0409 
0410     skb = skb_clone(skb, GFP_ATOMIC);
0411     if (!skb)
0412         return;
0413 
0414     serr = SKB_EXT_ERR(skb);
0415     serr->ee.ee_errno = err;
0416     serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
0417     serr->ee.ee_type = icmp_hdr(skb)->type;
0418     serr->ee.ee_code = icmp_hdr(skb)->code;
0419     serr->ee.ee_pad = 0;
0420     serr->ee.ee_info = info;
0421     serr->ee.ee_data = 0;
0422     serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
0423                    skb_network_header(skb);
0424     serr->port = port;
0425 
0426     if (skb_pull(skb, payload - skb->data)) {
0427         if (inet_sk(sk)->recverr_rfc4884)
0428             ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
0429 
0430         skb_reset_transport_header(skb);
0431         if (sock_queue_err_skb(sk, skb) == 0)
0432             return;
0433     }
0434     kfree_skb(skb);
0435 }
0436 
0437 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
0438 {
0439     struct inet_sock *inet = inet_sk(sk);
0440     struct sock_exterr_skb *serr;
0441     struct iphdr *iph;
0442     struct sk_buff *skb;
0443 
0444     if (!inet->recverr)
0445         return;
0446 
0447     skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
0448     if (!skb)
0449         return;
0450 
0451     skb_put(skb, sizeof(struct iphdr));
0452     skb_reset_network_header(skb);
0453     iph = ip_hdr(skb);
0454     iph->daddr = daddr;
0455 
0456     serr = SKB_EXT_ERR(skb);
0457     serr->ee.ee_errno = err;
0458     serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
0459     serr->ee.ee_type = 0;
0460     serr->ee.ee_code = 0;
0461     serr->ee.ee_pad = 0;
0462     serr->ee.ee_info = info;
0463     serr->ee.ee_data = 0;
0464     serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
0465     serr->port = port;
0466 
0467     __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
0468     skb_reset_transport_header(skb);
0469 
0470     if (sock_queue_err_skb(sk, skb))
0471         kfree_skb(skb);
0472 }
0473 
0474 /* For some errors we have valid addr_offset even with zero payload and
0475  * zero port. Also, addr_offset should be supported if port is set.
0476  */
0477 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
0478 {
0479     return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
0480            serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
0481 }
0482 
0483 /* IPv4 supports cmsg on all imcp errors and some timestamps
0484  *
0485  * Timestamp code paths do not initialize the fields expected by cmsg:
0486  * the PKTINFO fields in skb->cb[]. Fill those in here.
0487  */
0488 static bool ipv4_datagram_support_cmsg(const struct sock *sk,
0489                        struct sk_buff *skb,
0490                        int ee_origin)
0491 {
0492     struct in_pktinfo *info;
0493 
0494     if (ee_origin == SO_EE_ORIGIN_ICMP)
0495         return true;
0496 
0497     if (ee_origin == SO_EE_ORIGIN_LOCAL)
0498         return false;
0499 
0500     /* Support IP_PKTINFO on tstamp packets if requested, to correlate
0501      * timestamp with egress dev. Not possible for packets without iif
0502      * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
0503      */
0504     info = PKTINFO_SKB_CB(skb);
0505     if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
0506         !info->ipi_ifindex)
0507         return false;
0508 
0509     info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
0510     return true;
0511 }
0512 
0513 /*
0514  *  Handle MSG_ERRQUEUE
0515  */
0516 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
0517 {
0518     struct sock_exterr_skb *serr;
0519     struct sk_buff *skb;
0520     DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
0521     struct {
0522         struct sock_extended_err ee;
0523         struct sockaddr_in   offender;
0524     } errhdr;
0525     int err;
0526     int copied;
0527 
0528     err = -EAGAIN;
0529     skb = sock_dequeue_err_skb(sk);
0530     if (!skb)
0531         goto out;
0532 
0533     copied = skb->len;
0534     if (copied > len) {
0535         msg->msg_flags |= MSG_TRUNC;
0536         copied = len;
0537     }
0538     err = skb_copy_datagram_msg(skb, 0, msg, copied);
0539     if (unlikely(err)) {
0540         kfree_skb(skb);
0541         return err;
0542     }
0543     sock_recv_timestamp(msg, sk, skb);
0544 
0545     serr = SKB_EXT_ERR(skb);
0546 
0547     if (sin && ipv4_datagram_support_addr(serr)) {
0548         sin->sin_family = AF_INET;
0549         sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
0550                            serr->addr_offset);
0551         sin->sin_port = serr->port;
0552         memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
0553         *addr_len = sizeof(*sin);
0554     }
0555 
0556     memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
0557     sin = &errhdr.offender;
0558     memset(sin, 0, sizeof(*sin));
0559 
0560     if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
0561         sin->sin_family = AF_INET;
0562         sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
0563         if (inet_sk(sk)->cmsg_flags)
0564             ip_cmsg_recv(msg, skb);
0565     }
0566 
0567     put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
0568 
0569     /* Now we could try to dump offended packet options */
0570 
0571     msg->msg_flags |= MSG_ERRQUEUE;
0572     err = copied;
0573 
0574     consume_skb(skb);
0575 out:
0576     return err;
0577 }
0578 
0579 void __ip_sock_set_tos(struct sock *sk, int val)
0580 {
0581     if (sk->sk_type == SOCK_STREAM) {
0582         val &= ~INET_ECN_MASK;
0583         val |= inet_sk(sk)->tos & INET_ECN_MASK;
0584     }
0585     if (inet_sk(sk)->tos != val) {
0586         inet_sk(sk)->tos = val;
0587         sk->sk_priority = rt_tos2priority(val);
0588         sk_dst_reset(sk);
0589     }
0590 }
0591 
0592 void ip_sock_set_tos(struct sock *sk, int val)
0593 {
0594     lock_sock(sk);
0595     __ip_sock_set_tos(sk, val);
0596     release_sock(sk);
0597 }
0598 EXPORT_SYMBOL(ip_sock_set_tos);
0599 
0600 void ip_sock_set_freebind(struct sock *sk)
0601 {
0602     lock_sock(sk);
0603     inet_sk(sk)->freebind = true;
0604     release_sock(sk);
0605 }
0606 EXPORT_SYMBOL(ip_sock_set_freebind);
0607 
0608 void ip_sock_set_recverr(struct sock *sk)
0609 {
0610     lock_sock(sk);
0611     inet_sk(sk)->recverr = true;
0612     release_sock(sk);
0613 }
0614 EXPORT_SYMBOL(ip_sock_set_recverr);
0615 
0616 int ip_sock_set_mtu_discover(struct sock *sk, int val)
0617 {
0618     if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
0619         return -EINVAL;
0620     lock_sock(sk);
0621     inet_sk(sk)->pmtudisc = val;
0622     release_sock(sk);
0623     return 0;
0624 }
0625 EXPORT_SYMBOL(ip_sock_set_mtu_discover);
0626 
0627 void ip_sock_set_pktinfo(struct sock *sk)
0628 {
0629     lock_sock(sk);
0630     inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
0631     release_sock(sk);
0632 }
0633 EXPORT_SYMBOL(ip_sock_set_pktinfo);
0634 
0635 /*
0636  *  Socket option code for IP. This is the end of the line after any
0637  *  TCP,UDP etc options on an IP socket.
0638  */
0639 static bool setsockopt_needs_rtnl(int optname)
0640 {
0641     switch (optname) {
0642     case IP_ADD_MEMBERSHIP:
0643     case IP_ADD_SOURCE_MEMBERSHIP:
0644     case IP_BLOCK_SOURCE:
0645     case IP_DROP_MEMBERSHIP:
0646     case IP_DROP_SOURCE_MEMBERSHIP:
0647     case IP_MSFILTER:
0648     case IP_UNBLOCK_SOURCE:
0649     case MCAST_BLOCK_SOURCE:
0650     case MCAST_MSFILTER:
0651     case MCAST_JOIN_GROUP:
0652     case MCAST_JOIN_SOURCE_GROUP:
0653     case MCAST_LEAVE_GROUP:
0654     case MCAST_LEAVE_SOURCE_GROUP:
0655     case MCAST_UNBLOCK_SOURCE:
0656         return true;
0657     }
0658     return false;
0659 }
0660 
0661 static int set_mcast_msfilter(struct sock *sk, int ifindex,
0662                   int numsrc, int fmode,
0663                   struct sockaddr_storage *group,
0664                   struct sockaddr_storage *list)
0665 {
0666     struct ip_msfilter *msf;
0667     struct sockaddr_in *psin;
0668     int err, i;
0669 
0670     msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
0671     if (!msf)
0672         return -ENOBUFS;
0673 
0674     psin = (struct sockaddr_in *)group;
0675     if (psin->sin_family != AF_INET)
0676         goto Eaddrnotavail;
0677     msf->imsf_multiaddr = psin->sin_addr.s_addr;
0678     msf->imsf_interface = 0;
0679     msf->imsf_fmode = fmode;
0680     msf->imsf_numsrc = numsrc;
0681     for (i = 0; i < numsrc; ++i) {
0682         psin = (struct sockaddr_in *)&list[i];
0683 
0684         if (psin->sin_family != AF_INET)
0685             goto Eaddrnotavail;
0686         msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
0687     }
0688     err = ip_mc_msfilter(sk, msf, ifindex);
0689     kfree(msf);
0690     return err;
0691 
0692 Eaddrnotavail:
0693     kfree(msf);
0694     return -EADDRNOTAVAIL;
0695 }
0696 
0697 static int copy_group_source_from_sockptr(struct group_source_req *greqs,
0698         sockptr_t optval, int optlen)
0699 {
0700     if (in_compat_syscall()) {
0701         struct compat_group_source_req gr32;
0702 
0703         if (optlen != sizeof(gr32))
0704             return -EINVAL;
0705         if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
0706             return -EFAULT;
0707         greqs->gsr_interface = gr32.gsr_interface;
0708         greqs->gsr_group = gr32.gsr_group;
0709         greqs->gsr_source = gr32.gsr_source;
0710     } else {
0711         if (optlen != sizeof(*greqs))
0712             return -EINVAL;
0713         if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
0714             return -EFAULT;
0715     }
0716 
0717     return 0;
0718 }
0719 
0720 static int do_mcast_group_source(struct sock *sk, int optname,
0721         sockptr_t optval, int optlen)
0722 {
0723     struct group_source_req greqs;
0724     struct ip_mreq_source mreqs;
0725     struct sockaddr_in *psin;
0726     int omode, add, err;
0727 
0728     err = copy_group_source_from_sockptr(&greqs, optval, optlen);
0729     if (err)
0730         return err;
0731 
0732     if (greqs.gsr_group.ss_family != AF_INET ||
0733         greqs.gsr_source.ss_family != AF_INET)
0734         return -EADDRNOTAVAIL;
0735 
0736     psin = (struct sockaddr_in *)&greqs.gsr_group;
0737     mreqs.imr_multiaddr = psin->sin_addr.s_addr;
0738     psin = (struct sockaddr_in *)&greqs.gsr_source;
0739     mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
0740     mreqs.imr_interface = 0; /* use index for mc_source */
0741 
0742     if (optname == MCAST_BLOCK_SOURCE) {
0743         omode = MCAST_EXCLUDE;
0744         add = 1;
0745     } else if (optname == MCAST_UNBLOCK_SOURCE) {
0746         omode = MCAST_EXCLUDE;
0747         add = 0;
0748     } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
0749         struct ip_mreqn mreq;
0750 
0751         psin = (struct sockaddr_in *)&greqs.gsr_group;
0752         mreq.imr_multiaddr = psin->sin_addr;
0753         mreq.imr_address.s_addr = 0;
0754         mreq.imr_ifindex = greqs.gsr_interface;
0755         err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
0756         if (err && err != -EADDRINUSE)
0757             return err;
0758         greqs.gsr_interface = mreq.imr_ifindex;
0759         omode = MCAST_INCLUDE;
0760         add = 1;
0761     } else /* MCAST_LEAVE_SOURCE_GROUP */ {
0762         omode = MCAST_INCLUDE;
0763         add = 0;
0764     }
0765     return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface);
0766 }
0767 
0768 static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
0769 {
0770     struct group_filter *gsf = NULL;
0771     int err;
0772 
0773     if (optlen < GROUP_FILTER_SIZE(0))
0774         return -EINVAL;
0775     if (optlen > READ_ONCE(sysctl_optmem_max))
0776         return -ENOBUFS;
0777 
0778     gsf = memdup_sockptr(optval, optlen);
0779     if (IS_ERR(gsf))
0780         return PTR_ERR(gsf);
0781 
0782     /* numsrc >= (4G-140)/128 overflow in 32 bits */
0783     err = -ENOBUFS;
0784     if (gsf->gf_numsrc >= 0x1ffffff ||
0785         gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
0786         goto out_free_gsf;
0787 
0788     err = -EINVAL;
0789     if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
0790         goto out_free_gsf;
0791 
0792     err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
0793                  gsf->gf_fmode, &gsf->gf_group,
0794                  gsf->gf_slist_flex);
0795 out_free_gsf:
0796     kfree(gsf);
0797     return err;
0798 }
0799 
0800 static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
0801         int optlen)
0802 {
0803     const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
0804     struct compat_group_filter *gf32;
0805     unsigned int n;
0806     void *p;
0807     int err;
0808 
0809     if (optlen < size0)
0810         return -EINVAL;
0811     if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
0812         return -ENOBUFS;
0813 
0814     p = kmalloc(optlen + 4, GFP_KERNEL);
0815     if (!p)
0816         return -ENOMEM;
0817     gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
0818 
0819     err = -EFAULT;
0820     if (copy_from_sockptr(gf32, optval, optlen))
0821         goto out_free_gsf;
0822 
0823     /* numsrc >= (4G-140)/128 overflow in 32 bits */
0824     n = gf32->gf_numsrc;
0825     err = -ENOBUFS;
0826     if (n >= 0x1ffffff)
0827         goto out_free_gsf;
0828 
0829     err = -EINVAL;
0830     if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
0831         goto out_free_gsf;
0832 
0833     /* numsrc >= (4G-140)/128 overflow in 32 bits */
0834     err = -ENOBUFS;
0835     if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
0836         goto out_free_gsf;
0837     err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
0838                  &gf32->gf_group, gf32->gf_slist_flex);
0839 out_free_gsf:
0840     kfree(p);
0841     return err;
0842 }
0843 
0844 static int ip_mcast_join_leave(struct sock *sk, int optname,
0845         sockptr_t optval, int optlen)
0846 {
0847     struct ip_mreqn mreq = { };
0848     struct sockaddr_in *psin;
0849     struct group_req greq;
0850 
0851     if (optlen < sizeof(struct group_req))
0852         return -EINVAL;
0853     if (copy_from_sockptr(&greq, optval, sizeof(greq)))
0854         return -EFAULT;
0855 
0856     psin = (struct sockaddr_in *)&greq.gr_group;
0857     if (psin->sin_family != AF_INET)
0858         return -EINVAL;
0859     mreq.imr_multiaddr = psin->sin_addr;
0860     mreq.imr_ifindex = greq.gr_interface;
0861     if (optname == MCAST_JOIN_GROUP)
0862         return ip_mc_join_group(sk, &mreq);
0863     return ip_mc_leave_group(sk, &mreq);
0864 }
0865 
0866 static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
0867         sockptr_t optval, int optlen)
0868 {
0869     struct compat_group_req greq;
0870     struct ip_mreqn mreq = { };
0871     struct sockaddr_in *psin;
0872 
0873     if (optlen < sizeof(struct compat_group_req))
0874         return -EINVAL;
0875     if (copy_from_sockptr(&greq, optval, sizeof(greq)))
0876         return -EFAULT;
0877 
0878     psin = (struct sockaddr_in *)&greq.gr_group;
0879     if (psin->sin_family != AF_INET)
0880         return -EINVAL;
0881     mreq.imr_multiaddr = psin->sin_addr;
0882     mreq.imr_ifindex = greq.gr_interface;
0883 
0884     if (optname == MCAST_JOIN_GROUP)
0885         return ip_mc_join_group(sk, &mreq);
0886     return ip_mc_leave_group(sk, &mreq);
0887 }
0888 
0889 DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
0890 
0891 static int do_ip_setsockopt(struct sock *sk, int level, int optname,
0892         sockptr_t optval, unsigned int optlen)
0893 {
0894     struct inet_sock *inet = inet_sk(sk);
0895     struct net *net = sock_net(sk);
0896     int val = 0, err;
0897     bool needs_rtnl = setsockopt_needs_rtnl(optname);
0898 
0899     switch (optname) {
0900     case IP_PKTINFO:
0901     case IP_RECVTTL:
0902     case IP_RECVOPTS:
0903     case IP_RECVTOS:
0904     case IP_RETOPTS:
0905     case IP_TOS:
0906     case IP_TTL:
0907     case IP_HDRINCL:
0908     case IP_MTU_DISCOVER:
0909     case IP_RECVERR:
0910     case IP_ROUTER_ALERT:
0911     case IP_FREEBIND:
0912     case IP_PASSSEC:
0913     case IP_TRANSPARENT:
0914     case IP_MINTTL:
0915     case IP_NODEFRAG:
0916     case IP_BIND_ADDRESS_NO_PORT:
0917     case IP_UNICAST_IF:
0918     case IP_MULTICAST_TTL:
0919     case IP_MULTICAST_ALL:
0920     case IP_MULTICAST_LOOP:
0921     case IP_RECVORIGDSTADDR:
0922     case IP_CHECKSUM:
0923     case IP_RECVFRAGSIZE:
0924     case IP_RECVERR_RFC4884:
0925         if (optlen >= sizeof(int)) {
0926             if (copy_from_sockptr(&val, optval, sizeof(val)))
0927                 return -EFAULT;
0928         } else if (optlen >= sizeof(char)) {
0929             unsigned char ucval;
0930 
0931             if (copy_from_sockptr(&ucval, optval, sizeof(ucval)))
0932                 return -EFAULT;
0933             val = (int) ucval;
0934         }
0935     }
0936 
0937     /* If optlen==0, it is equivalent to val == 0 */
0938 
0939     if (optname == IP_ROUTER_ALERT)
0940         return ip_ra_control(sk, val ? 1 : 0, NULL);
0941     if (ip_mroute_opt(optname))
0942         return ip_mroute_setsockopt(sk, optname, optval, optlen);
0943 
0944     err = 0;
0945     if (needs_rtnl)
0946         rtnl_lock();
0947     lock_sock(sk);
0948 
0949     switch (optname) {
0950     case IP_OPTIONS:
0951     {
0952         struct ip_options_rcu *old, *opt = NULL;
0953 
0954         if (optlen > 40)
0955             goto e_inval;
0956         err = ip_options_get(sock_net(sk), &opt, optval, optlen);
0957         if (err)
0958             break;
0959         old = rcu_dereference_protected(inet->inet_opt,
0960                         lockdep_sock_is_held(sk));
0961         if (inet->is_icsk) {
0962             struct inet_connection_sock *icsk = inet_csk(sk);
0963 #if IS_ENABLED(CONFIG_IPV6)
0964             if (sk->sk_family == PF_INET ||
0965                 (!((1 << sk->sk_state) &
0966                    (TCPF_LISTEN | TCPF_CLOSE)) &&
0967                  inet->inet_daddr != LOOPBACK4_IPV6)) {
0968 #endif
0969                 if (old)
0970                     icsk->icsk_ext_hdr_len -= old->opt.optlen;
0971                 if (opt)
0972                     icsk->icsk_ext_hdr_len += opt->opt.optlen;
0973                 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
0974 #if IS_ENABLED(CONFIG_IPV6)
0975             }
0976 #endif
0977         }
0978         rcu_assign_pointer(inet->inet_opt, opt);
0979         if (old)
0980             kfree_rcu(old, rcu);
0981         break;
0982     }
0983     case IP_PKTINFO:
0984         if (val)
0985             inet->cmsg_flags |= IP_CMSG_PKTINFO;
0986         else
0987             inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
0988         break;
0989     case IP_RECVTTL:
0990         if (val)
0991             inet->cmsg_flags |=  IP_CMSG_TTL;
0992         else
0993             inet->cmsg_flags &= ~IP_CMSG_TTL;
0994         break;
0995     case IP_RECVTOS:
0996         if (val)
0997             inet->cmsg_flags |=  IP_CMSG_TOS;
0998         else
0999             inet->cmsg_flags &= ~IP_CMSG_TOS;
1000         break;
1001     case IP_RECVOPTS:
1002         if (val)
1003             inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
1004         else
1005             inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
1006         break;
1007     case IP_RETOPTS:
1008         if (val)
1009             inet->cmsg_flags |= IP_CMSG_RETOPTS;
1010         else
1011             inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
1012         break;
1013     case IP_PASSSEC:
1014         if (val)
1015             inet->cmsg_flags |= IP_CMSG_PASSSEC;
1016         else
1017             inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
1018         break;
1019     case IP_RECVORIGDSTADDR:
1020         if (val)
1021             inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
1022         else
1023             inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
1024         break;
1025     case IP_CHECKSUM:
1026         if (val) {
1027             if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
1028                 inet_inc_convert_csum(sk);
1029                 inet->cmsg_flags |= IP_CMSG_CHECKSUM;
1030             }
1031         } else {
1032             if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
1033                 inet_dec_convert_csum(sk);
1034                 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
1035             }
1036         }
1037         break;
1038     case IP_RECVFRAGSIZE:
1039         if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
1040             goto e_inval;
1041         if (val)
1042             inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
1043         else
1044             inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
1045         break;
1046     case IP_TOS:    /* This sets both TOS and Precedence */
1047         __ip_sock_set_tos(sk, val);
1048         break;
1049     case IP_TTL:
1050         if (optlen < 1)
1051             goto e_inval;
1052         if (val != -1 && (val < 1 || val > 255))
1053             goto e_inval;
1054         inet->uc_ttl = val;
1055         break;
1056     case IP_HDRINCL:
1057         if (sk->sk_type != SOCK_RAW) {
1058             err = -ENOPROTOOPT;
1059             break;
1060         }
1061         inet->hdrincl = val ? 1 : 0;
1062         break;
1063     case IP_NODEFRAG:
1064         if (sk->sk_type != SOCK_RAW) {
1065             err = -ENOPROTOOPT;
1066             break;
1067         }
1068         inet->nodefrag = val ? 1 : 0;
1069         break;
1070     case IP_BIND_ADDRESS_NO_PORT:
1071         inet->bind_address_no_port = val ? 1 : 0;
1072         break;
1073     case IP_MTU_DISCOVER:
1074         if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
1075             goto e_inval;
1076         inet->pmtudisc = val;
1077         break;
1078     case IP_RECVERR:
1079         inet->recverr = !!val;
1080         if (!val)
1081             skb_queue_purge(&sk->sk_error_queue);
1082         break;
1083     case IP_RECVERR_RFC4884:
1084         if (val < 0 || val > 1)
1085             goto e_inval;
1086         inet->recverr_rfc4884 = !!val;
1087         break;
1088     case IP_MULTICAST_TTL:
1089         if (sk->sk_type == SOCK_STREAM)
1090             goto e_inval;
1091         if (optlen < 1)
1092             goto e_inval;
1093         if (val == -1)
1094             val = 1;
1095         if (val < 0 || val > 255)
1096             goto e_inval;
1097         inet->mc_ttl = val;
1098         break;
1099     case IP_MULTICAST_LOOP:
1100         if (optlen < 1)
1101             goto e_inval;
1102         inet->mc_loop = !!val;
1103         break;
1104     case IP_UNICAST_IF:
1105     {
1106         struct net_device *dev = NULL;
1107         int ifindex;
1108         int midx;
1109 
1110         if (optlen != sizeof(int))
1111             goto e_inval;
1112 
1113         ifindex = (__force int)ntohl((__force __be32)val);
1114         if (ifindex == 0) {
1115             inet->uc_index = 0;
1116             err = 0;
1117             break;
1118         }
1119 
1120         dev = dev_get_by_index(sock_net(sk), ifindex);
1121         err = -EADDRNOTAVAIL;
1122         if (!dev)
1123             break;
1124 
1125         midx = l3mdev_master_ifindex(dev);
1126         dev_put(dev);
1127 
1128         err = -EINVAL;
1129         if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if)
1130             break;
1131 
1132         inet->uc_index = ifindex;
1133         err = 0;
1134         break;
1135     }
1136     case IP_MULTICAST_IF:
1137     {
1138         struct ip_mreqn mreq;
1139         struct net_device *dev = NULL;
1140         int midx;
1141 
1142         if (sk->sk_type == SOCK_STREAM)
1143             goto e_inval;
1144         /*
1145          *  Check the arguments are allowable
1146          */
1147 
1148         if (optlen < sizeof(struct in_addr))
1149             goto e_inval;
1150 
1151         err = -EFAULT;
1152         if (optlen >= sizeof(struct ip_mreqn)) {
1153             if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
1154                 break;
1155         } else {
1156             memset(&mreq, 0, sizeof(mreq));
1157             if (optlen >= sizeof(struct ip_mreq)) {
1158                 if (copy_from_sockptr(&mreq, optval,
1159                               sizeof(struct ip_mreq)))
1160                     break;
1161             } else if (optlen >= sizeof(struct in_addr)) {
1162                 if (copy_from_sockptr(&mreq.imr_address, optval,
1163                               sizeof(struct in_addr)))
1164                     break;
1165             }
1166         }
1167 
1168         if (!mreq.imr_ifindex) {
1169             if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
1170                 inet->mc_index = 0;
1171                 inet->mc_addr  = 0;
1172                 err = 0;
1173                 break;
1174             }
1175             dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
1176             if (dev)
1177                 mreq.imr_ifindex = dev->ifindex;
1178         } else
1179             dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
1180 
1181 
1182         err = -EADDRNOTAVAIL;
1183         if (!dev)
1184             break;
1185 
1186         midx = l3mdev_master_ifindex(dev);
1187 
1188         dev_put(dev);
1189 
1190         err = -EINVAL;
1191         if (sk->sk_bound_dev_if &&
1192             mreq.imr_ifindex != sk->sk_bound_dev_if &&
1193             midx != sk->sk_bound_dev_if)
1194             break;
1195 
1196         inet->mc_index = mreq.imr_ifindex;
1197         inet->mc_addr  = mreq.imr_address.s_addr;
1198         err = 0;
1199         break;
1200     }
1201 
1202     case IP_ADD_MEMBERSHIP:
1203     case IP_DROP_MEMBERSHIP:
1204     {
1205         struct ip_mreqn mreq;
1206 
1207         err = -EPROTO;
1208         if (inet_sk(sk)->is_icsk)
1209             break;
1210 
1211         if (optlen < sizeof(struct ip_mreq))
1212             goto e_inval;
1213         err = -EFAULT;
1214         if (optlen >= sizeof(struct ip_mreqn)) {
1215             if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
1216                 break;
1217         } else {
1218             memset(&mreq, 0, sizeof(mreq));
1219             if (copy_from_sockptr(&mreq, optval,
1220                           sizeof(struct ip_mreq)))
1221                 break;
1222         }
1223 
1224         if (optname == IP_ADD_MEMBERSHIP)
1225             err = ip_mc_join_group(sk, &mreq);
1226         else
1227             err = ip_mc_leave_group(sk, &mreq);
1228         break;
1229     }
1230     case IP_MSFILTER:
1231     {
1232         struct ip_msfilter *msf;
1233 
1234         if (optlen < IP_MSFILTER_SIZE(0))
1235             goto e_inval;
1236         if (optlen > READ_ONCE(sysctl_optmem_max)) {
1237             err = -ENOBUFS;
1238             break;
1239         }
1240         msf = memdup_sockptr(optval, optlen);
1241         if (IS_ERR(msf)) {
1242             err = PTR_ERR(msf);
1243             break;
1244         }
1245         /* numsrc >= (1G-4) overflow in 32 bits */
1246         if (msf->imsf_numsrc >= 0x3ffffffcU ||
1247             msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
1248             kfree(msf);
1249             err = -ENOBUFS;
1250             break;
1251         }
1252         if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
1253             kfree(msf);
1254             err = -EINVAL;
1255             break;
1256         }
1257         err = ip_mc_msfilter(sk, msf, 0);
1258         kfree(msf);
1259         break;
1260     }
1261     case IP_BLOCK_SOURCE:
1262     case IP_UNBLOCK_SOURCE:
1263     case IP_ADD_SOURCE_MEMBERSHIP:
1264     case IP_DROP_SOURCE_MEMBERSHIP:
1265     {
1266         struct ip_mreq_source mreqs;
1267         int omode, add;
1268 
1269         if (optlen != sizeof(struct ip_mreq_source))
1270             goto e_inval;
1271         if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) {
1272             err = -EFAULT;
1273             break;
1274         }
1275         if (optname == IP_BLOCK_SOURCE) {
1276             omode = MCAST_EXCLUDE;
1277             add = 1;
1278         } else if (optname == IP_UNBLOCK_SOURCE) {
1279             omode = MCAST_EXCLUDE;
1280             add = 0;
1281         } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
1282             struct ip_mreqn mreq;
1283 
1284             mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
1285             mreq.imr_address.s_addr = mreqs.imr_interface;
1286             mreq.imr_ifindex = 0;
1287             err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
1288             if (err && err != -EADDRINUSE)
1289                 break;
1290             omode = MCAST_INCLUDE;
1291             add = 1;
1292         } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
1293             omode = MCAST_INCLUDE;
1294             add = 0;
1295         }
1296         err = ip_mc_source(add, omode, sk, &mreqs, 0);
1297         break;
1298     }
1299     case MCAST_JOIN_GROUP:
1300     case MCAST_LEAVE_GROUP:
1301         if (in_compat_syscall())
1302             err = compat_ip_mcast_join_leave(sk, optname, optval,
1303                              optlen);
1304         else
1305             err = ip_mcast_join_leave(sk, optname, optval, optlen);
1306         break;
1307     case MCAST_JOIN_SOURCE_GROUP:
1308     case MCAST_LEAVE_SOURCE_GROUP:
1309     case MCAST_BLOCK_SOURCE:
1310     case MCAST_UNBLOCK_SOURCE:
1311         err = do_mcast_group_source(sk, optname, optval, optlen);
1312         break;
1313     case MCAST_MSFILTER:
1314         if (in_compat_syscall())
1315             err = compat_ip_set_mcast_msfilter(sk, optval, optlen);
1316         else
1317             err = ip_set_mcast_msfilter(sk, optval, optlen);
1318         break;
1319     case IP_MULTICAST_ALL:
1320         if (optlen < 1)
1321             goto e_inval;
1322         if (val != 0 && val != 1)
1323             goto e_inval;
1324         inet->mc_all = val;
1325         break;
1326 
1327     case IP_FREEBIND:
1328         if (optlen < 1)
1329             goto e_inval;
1330         inet->freebind = !!val;
1331         break;
1332 
1333     case IP_IPSEC_POLICY:
1334     case IP_XFRM_POLICY:
1335         err = -EPERM;
1336         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1337             break;
1338         err = xfrm_user_policy(sk, optname, optval, optlen);
1339         break;
1340 
1341     case IP_TRANSPARENT:
1342         if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1343             !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1344             err = -EPERM;
1345             break;
1346         }
1347         if (optlen < 1)
1348             goto e_inval;
1349         inet->transparent = !!val;
1350         break;
1351 
1352     case IP_MINTTL:
1353         if (optlen < 1)
1354             goto e_inval;
1355         if (val < 0 || val > 255)
1356             goto e_inval;
1357 
1358         if (val)
1359             static_branch_enable(&ip4_min_ttl);
1360 
1361         /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
1362          * while we are changint it.
1363          */
1364         WRITE_ONCE(inet->min_ttl, val);
1365         break;
1366 
1367     default:
1368         err = -ENOPROTOOPT;
1369         break;
1370     }
1371     release_sock(sk);
1372     if (needs_rtnl)
1373         rtnl_unlock();
1374     return err;
1375 
1376 e_inval:
1377     release_sock(sk);
1378     if (needs_rtnl)
1379         rtnl_unlock();
1380     return -EINVAL;
1381 }
1382 
1383 /**
1384  * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1385  * @sk: socket
1386  * @skb: buffer
1387  *
1388  * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1389  * destination in skb->cb[] before dst drop.
1390  * This way, receiver doesn't make cache line misses to read rtable.
1391  */
1392 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1393 {
1394     struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1395     bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1396                ipv6_sk_rxinfo(sk);
1397 
1398     if (prepare && skb_rtable(skb)) {
1399         /* skb->cb is overloaded: prior to this point it is IP{6}CB
1400          * which has interface index (iif) as the first member of the
1401          * underlying inet{6}_skb_parm struct. This code then overlays
1402          * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1403          * element so the iif is picked up from the prior IPCB. If iif
1404          * is the loopback interface, then return the sending interface
1405          * (e.g., process binds socket to eth0 for Tx which is
1406          * redirected to loopback in the rtable/dst).
1407          */
1408         struct rtable *rt = skb_rtable(skb);
1409         bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
1410 
1411         if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
1412             pktinfo->ipi_ifindex = inet_iif(skb);
1413         else if (l3slave && rt && rt->rt_iif)
1414             pktinfo->ipi_ifindex = rt->rt_iif;
1415 
1416         pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1417     } else {
1418         pktinfo->ipi_ifindex = 0;
1419         pktinfo->ipi_spec_dst.s_addr = 0;
1420     }
1421     skb_dst_drop(skb);
1422 }
1423 
1424 int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
1425         unsigned int optlen)
1426 {
1427     int err;
1428 
1429     if (level != SOL_IP)
1430         return -ENOPROTOOPT;
1431 
1432     err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1433 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1434     if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
1435         optname < BPFILTER_IPT_SET_MAX)
1436         err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
1437 #endif
1438 #ifdef CONFIG_NETFILTER
1439     /* we need to exclude all possible ENOPROTOOPTs except default case */
1440     if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1441             optname != IP_IPSEC_POLICY &&
1442             optname != IP_XFRM_POLICY &&
1443             !ip_mroute_opt(optname))
1444         err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1445 #endif
1446     return err;
1447 }
1448 EXPORT_SYMBOL(ip_setsockopt);
1449 
1450 /*
1451  *  Get the options. Note for future reference. The GET of IP options gets
1452  *  the _received_ ones. The set sets the _sent_ ones.
1453  */
1454 
1455 static bool getsockopt_needs_rtnl(int optname)
1456 {
1457     switch (optname) {
1458     case IP_MSFILTER:
1459     case MCAST_MSFILTER:
1460         return true;
1461     }
1462     return false;
1463 }
1464 
1465 static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
1466         int __user *optlen, int len)
1467 {
1468     const int size0 = offsetof(struct group_filter, gf_slist_flex);
1469     struct group_filter __user *p = optval;
1470     struct group_filter gsf;
1471     int num;
1472     int err;
1473 
1474     if (len < size0)
1475         return -EINVAL;
1476     if (copy_from_user(&gsf, p, size0))
1477         return -EFAULT;
1478 
1479     num = gsf.gf_numsrc;
1480     err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
1481     if (err)
1482         return err;
1483     if (gsf.gf_numsrc < num)
1484         num = gsf.gf_numsrc;
1485     if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
1486         copy_to_user(p, &gsf, size0))
1487         return -EFAULT;
1488     return 0;
1489 }
1490 
1491 static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
1492         int __user *optlen, int len)
1493 {
1494     const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
1495     struct compat_group_filter __user *p = optval;
1496     struct compat_group_filter gf32;
1497     struct group_filter gf;
1498     int num;
1499     int err;
1500 
1501     if (len < size0)
1502         return -EINVAL;
1503     if (copy_from_user(&gf32, p, size0))
1504         return -EFAULT;
1505 
1506     gf.gf_interface = gf32.gf_interface;
1507     gf.gf_fmode = gf32.gf_fmode;
1508     num = gf.gf_numsrc = gf32.gf_numsrc;
1509     gf.gf_group = gf32.gf_group;
1510 
1511     err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
1512     if (err)
1513         return err;
1514     if (gf.gf_numsrc < num)
1515         num = gf.gf_numsrc;
1516     len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
1517     if (put_user(len, optlen) ||
1518         put_user(gf.gf_fmode, &p->gf_fmode) ||
1519         put_user(gf.gf_numsrc, &p->gf_numsrc))
1520         return -EFAULT;
1521     return 0;
1522 }
1523 
1524 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1525                 char __user *optval, int __user *optlen)
1526 {
1527     struct inet_sock *inet = inet_sk(sk);
1528     bool needs_rtnl = getsockopt_needs_rtnl(optname);
1529     int val, err = 0;
1530     int len;
1531 
1532     if (level != SOL_IP)
1533         return -EOPNOTSUPP;
1534 
1535     if (ip_mroute_opt(optname))
1536         return ip_mroute_getsockopt(sk, optname, optval, optlen);
1537 
1538     if (get_user(len, optlen))
1539         return -EFAULT;
1540     if (len < 0)
1541         return -EINVAL;
1542 
1543     if (needs_rtnl)
1544         rtnl_lock();
1545     lock_sock(sk);
1546 
1547     switch (optname) {
1548     case IP_OPTIONS:
1549     {
1550         unsigned char optbuf[sizeof(struct ip_options)+40];
1551         struct ip_options *opt = (struct ip_options *)optbuf;
1552         struct ip_options_rcu *inet_opt;
1553 
1554         inet_opt = rcu_dereference_protected(inet->inet_opt,
1555                              lockdep_sock_is_held(sk));
1556         opt->optlen = 0;
1557         if (inet_opt)
1558             memcpy(optbuf, &inet_opt->opt,
1559                    sizeof(struct ip_options) +
1560                    inet_opt->opt.optlen);
1561         release_sock(sk);
1562 
1563         if (opt->optlen == 0)
1564             return put_user(0, optlen);
1565 
1566         ip_options_undo(opt);
1567 
1568         len = min_t(unsigned int, len, opt->optlen);
1569         if (put_user(len, optlen))
1570             return -EFAULT;
1571         if (copy_to_user(optval, opt->__data, len))
1572             return -EFAULT;
1573         return 0;
1574     }
1575     case IP_PKTINFO:
1576         val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1577         break;
1578     case IP_RECVTTL:
1579         val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1580         break;
1581     case IP_RECVTOS:
1582         val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1583         break;
1584     case IP_RECVOPTS:
1585         val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1586         break;
1587     case IP_RETOPTS:
1588         val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1589         break;
1590     case IP_PASSSEC:
1591         val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1592         break;
1593     case IP_RECVORIGDSTADDR:
1594         val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1595         break;
1596     case IP_CHECKSUM:
1597         val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1598         break;
1599     case IP_RECVFRAGSIZE:
1600         val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1601         break;
1602     case IP_TOS:
1603         val = inet->tos;
1604         break;
1605     case IP_TTL:
1606     {
1607         struct net *net = sock_net(sk);
1608         val = (inet->uc_ttl == -1 ?
1609                READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
1610                inet->uc_ttl);
1611         break;
1612     }
1613     case IP_HDRINCL:
1614         val = inet->hdrincl;
1615         break;
1616     case IP_NODEFRAG:
1617         val = inet->nodefrag;
1618         break;
1619     case IP_BIND_ADDRESS_NO_PORT:
1620         val = inet->bind_address_no_port;
1621         break;
1622     case IP_MTU_DISCOVER:
1623         val = inet->pmtudisc;
1624         break;
1625     case IP_MTU:
1626     {
1627         struct dst_entry *dst;
1628         val = 0;
1629         dst = sk_dst_get(sk);
1630         if (dst) {
1631             val = dst_mtu(dst);
1632             dst_release(dst);
1633         }
1634         if (!val) {
1635             release_sock(sk);
1636             return -ENOTCONN;
1637         }
1638         break;
1639     }
1640     case IP_RECVERR:
1641         val = inet->recverr;
1642         break;
1643     case IP_RECVERR_RFC4884:
1644         val = inet->recverr_rfc4884;
1645         break;
1646     case IP_MULTICAST_TTL:
1647         val = inet->mc_ttl;
1648         break;
1649     case IP_MULTICAST_LOOP:
1650         val = inet->mc_loop;
1651         break;
1652     case IP_UNICAST_IF:
1653         val = (__force int)htonl((__u32) inet->uc_index);
1654         break;
1655     case IP_MULTICAST_IF:
1656     {
1657         struct in_addr addr;
1658         len = min_t(unsigned int, len, sizeof(struct in_addr));
1659         addr.s_addr = inet->mc_addr;
1660         release_sock(sk);
1661 
1662         if (put_user(len, optlen))
1663             return -EFAULT;
1664         if (copy_to_user(optval, &addr, len))
1665             return -EFAULT;
1666         return 0;
1667     }
1668     case IP_MSFILTER:
1669     {
1670         struct ip_msfilter msf;
1671 
1672         if (len < IP_MSFILTER_SIZE(0)) {
1673             err = -EINVAL;
1674             goto out;
1675         }
1676         if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1677             err = -EFAULT;
1678             goto out;
1679         }
1680         err = ip_mc_msfget(sk, &msf,
1681                    (struct ip_msfilter __user *)optval, optlen);
1682         goto out;
1683     }
1684     case MCAST_MSFILTER:
1685         if (in_compat_syscall())
1686             err = compat_ip_get_mcast_msfilter(sk, optval, optlen,
1687                                len);
1688         else
1689             err = ip_get_mcast_msfilter(sk, optval, optlen, len);
1690         goto out;
1691     case IP_MULTICAST_ALL:
1692         val = inet->mc_all;
1693         break;
1694     case IP_PKTOPTIONS:
1695     {
1696         struct msghdr msg;
1697 
1698         release_sock(sk);
1699 
1700         if (sk->sk_type != SOCK_STREAM)
1701             return -ENOPROTOOPT;
1702 
1703         msg.msg_control_is_user = true;
1704         msg.msg_control_user = optval;
1705         msg.msg_controllen = len;
1706         msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
1707 
1708         if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1709             struct in_pktinfo info;
1710 
1711             info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1712             info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1713             info.ipi_ifindex = inet->mc_index;
1714             put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1715         }
1716         if (inet->cmsg_flags & IP_CMSG_TTL) {
1717             int hlim = inet->mc_ttl;
1718             put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1719         }
1720         if (inet->cmsg_flags & IP_CMSG_TOS) {
1721             int tos = inet->rcv_tos;
1722             put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1723         }
1724         len -= msg.msg_controllen;
1725         return put_user(len, optlen);
1726     }
1727     case IP_FREEBIND:
1728         val = inet->freebind;
1729         break;
1730     case IP_TRANSPARENT:
1731         val = inet->transparent;
1732         break;
1733     case IP_MINTTL:
1734         val = inet->min_ttl;
1735         break;
1736     default:
1737         release_sock(sk);
1738         return -ENOPROTOOPT;
1739     }
1740     release_sock(sk);
1741 
1742     if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1743         unsigned char ucval = (unsigned char)val;
1744         len = 1;
1745         if (put_user(len, optlen))
1746             return -EFAULT;
1747         if (copy_to_user(optval, &ucval, 1))
1748             return -EFAULT;
1749     } else {
1750         len = min_t(unsigned int, sizeof(int), len);
1751         if (put_user(len, optlen))
1752             return -EFAULT;
1753         if (copy_to_user(optval, &val, len))
1754             return -EFAULT;
1755     }
1756     return 0;
1757 
1758 out:
1759     release_sock(sk);
1760     if (needs_rtnl)
1761         rtnl_unlock();
1762     return err;
1763 }
1764 
1765 int ip_getsockopt(struct sock *sk, int level,
1766           int optname, char __user *optval, int __user *optlen)
1767 {
1768     int err;
1769 
1770     err = do_ip_getsockopt(sk, level, optname, optval, optlen);
1771 
1772 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1773     if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1774         optname < BPFILTER_IPT_GET_MAX)
1775         err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1776 #endif
1777 #ifdef CONFIG_NETFILTER
1778     /* we need to exclude all possible ENOPROTOOPTs except default case */
1779     if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1780             !ip_mroute_opt(optname)) {
1781         int len;
1782 
1783         if (get_user(len, optlen))
1784             return -EFAULT;
1785 
1786         err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
1787         if (err >= 0)
1788             err = put_user(len, optlen);
1789         return err;
1790     }
1791 #endif
1792     return err;
1793 }
1794 EXPORT_SYMBOL(ip_getsockopt);