0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 #define pr_fmt(fmt) "IPv4: " fmt
0065
0066 #include <linux/err.h>
0067 #include <linux/errno.h>
0068 #include <linux/types.h>
0069 #include <linux/socket.h>
0070 #include <linux/in.h>
0071 #include <linux/kernel.h>
0072 #include <linux/kmod.h>
0073 #include <linux/sched.h>
0074 #include <linux/timer.h>
0075 #include <linux/string.h>
0076 #include <linux/sockios.h>
0077 #include <linux/net.h>
0078 #include <linux/capability.h>
0079 #include <linux/fcntl.h>
0080 #include <linux/mm.h>
0081 #include <linux/interrupt.h>
0082 #include <linux/stat.h>
0083 #include <linux/init.h>
0084 #include <linux/poll.h>
0085 #include <linux/netfilter_ipv4.h>
0086 #include <linux/random.h>
0087 #include <linux/slab.h>
0088
0089 #include <linux/uaccess.h>
0090
0091 #include <linux/inet.h>
0092 #include <linux/igmp.h>
0093 #include <linux/inetdevice.h>
0094 #include <linux/netdevice.h>
0095 #include <net/checksum.h>
0096 #include <net/ip.h>
0097 #include <net/protocol.h>
0098 #include <net/arp.h>
0099 #include <net/route.h>
0100 #include <net/ip_fib.h>
0101 #include <net/inet_connection_sock.h>
0102 #include <net/gro.h>
0103 #include <net/tcp.h>
0104 #include <net/udp.h>
0105 #include <net/udplite.h>
0106 #include <net/ping.h>
0107 #include <linux/skbuff.h>
0108 #include <net/sock.h>
0109 #include <net/raw.h>
0110 #include <net/icmp.h>
0111 #include <net/inet_common.h>
0112 #include <net/ip_tunnels.h>
0113 #include <net/xfrm.h>
0114 #include <net/net_namespace.h>
0115 #include <net/secure_seq.h>
0116 #ifdef CONFIG_IP_MROUTE
0117 #include <linux/mroute.h>
0118 #endif
0119 #include <net/l3mdev.h>
0120 #include <net/compat.h>
0121
0122 #include <trace/events/sock.h>
0123
0124
0125
0126
0127 static struct list_head inetsw[SOCK_MAX];
0128 static DEFINE_SPINLOCK(inetsw_lock);
0129
0130
0131
0132 void inet_sock_destruct(struct sock *sk)
0133 {
0134 struct inet_sock *inet = inet_sk(sk);
0135
0136 __skb_queue_purge(&sk->sk_receive_queue);
0137 __skb_queue_purge(&sk->sk_error_queue);
0138
0139 sk_mem_reclaim_final(sk);
0140
0141 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
0142 pr_err("Attempt to release TCP socket in state %d %p\n",
0143 sk->sk_state, sk);
0144 return;
0145 }
0146 if (!sock_flag(sk, SOCK_DEAD)) {
0147 pr_err("Attempt to release alive inet socket %p\n", sk);
0148 return;
0149 }
0150
0151 WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
0152 WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
0153 WARN_ON_ONCE(sk->sk_wmem_queued);
0154 WARN_ON_ONCE(sk_forward_alloc_get(sk));
0155
0156 kfree(rcu_dereference_protected(inet->inet_opt, 1));
0157 dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
0158 dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
0159 sk_refcnt_debug_dec(sk);
0160 }
0161 EXPORT_SYMBOL(inet_sock_destruct);
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173 static int inet_autobind(struct sock *sk)
0174 {
0175 struct inet_sock *inet;
0176
0177 lock_sock(sk);
0178 inet = inet_sk(sk);
0179 if (!inet->inet_num) {
0180 if (sk->sk_prot->get_port(sk, 0)) {
0181 release_sock(sk);
0182 return -EAGAIN;
0183 }
0184 inet->inet_sport = htons(inet->inet_num);
0185 }
0186 release_sock(sk);
0187 return 0;
0188 }
0189
0190
0191
0192
0193 int inet_listen(struct socket *sock, int backlog)
0194 {
0195 struct sock *sk = sock->sk;
0196 unsigned char old_state;
0197 int err, tcp_fastopen;
0198
0199 lock_sock(sk);
0200
0201 err = -EINVAL;
0202 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
0203 goto out;
0204
0205 old_state = sk->sk_state;
0206 if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
0207 goto out;
0208
0209 WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
0210
0211
0212
0213 if (old_state != TCP_LISTEN) {
0214
0215
0216
0217
0218
0219
0220 tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
0221 if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
0222 (tcp_fastopen & TFO_SERVER_ENABLE) &&
0223 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
0224 fastopen_queue_tune(sk, backlog);
0225 tcp_fastopen_init_key_once(sock_net(sk));
0226 }
0227
0228 err = inet_csk_listen_start(sk);
0229 if (err)
0230 goto out;
0231 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
0232 }
0233 err = 0;
0234
0235 out:
0236 release_sock(sk);
0237 return err;
0238 }
0239 EXPORT_SYMBOL(inet_listen);
0240
0241
0242
0243
0244
0245 static int inet_create(struct net *net, struct socket *sock, int protocol,
0246 int kern)
0247 {
0248 struct sock *sk;
0249 struct inet_protosw *answer;
0250 struct inet_sock *inet;
0251 struct proto *answer_prot;
0252 unsigned char answer_flags;
0253 int try_loading_module = 0;
0254 int err;
0255
0256 if (protocol < 0 || protocol >= IPPROTO_MAX)
0257 return -EINVAL;
0258
0259 sock->state = SS_UNCONNECTED;
0260
0261
0262 lookup_protocol:
0263 err = -ESOCKTNOSUPPORT;
0264 rcu_read_lock();
0265 list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
0266
0267 err = 0;
0268
0269 if (protocol == answer->protocol) {
0270 if (protocol != IPPROTO_IP)
0271 break;
0272 } else {
0273
0274 if (IPPROTO_IP == protocol) {
0275 protocol = answer->protocol;
0276 break;
0277 }
0278 if (IPPROTO_IP == answer->protocol)
0279 break;
0280 }
0281 err = -EPROTONOSUPPORT;
0282 }
0283
0284 if (unlikely(err)) {
0285 if (try_loading_module < 2) {
0286 rcu_read_unlock();
0287
0288
0289
0290
0291 if (++try_loading_module == 1)
0292 request_module("net-pf-%d-proto-%d-type-%d",
0293 PF_INET, protocol, sock->type);
0294
0295
0296
0297
0298 else
0299 request_module("net-pf-%d-proto-%d",
0300 PF_INET, protocol);
0301 goto lookup_protocol;
0302 } else
0303 goto out_rcu_unlock;
0304 }
0305
0306 err = -EPERM;
0307 if (sock->type == SOCK_RAW && !kern &&
0308 !ns_capable(net->user_ns, CAP_NET_RAW))
0309 goto out_rcu_unlock;
0310
0311 sock->ops = answer->ops;
0312 answer_prot = answer->prot;
0313 answer_flags = answer->flags;
0314 rcu_read_unlock();
0315
0316 WARN_ON(!answer_prot->slab);
0317
0318 err = -ENOMEM;
0319 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
0320 if (!sk)
0321 goto out;
0322
0323 err = 0;
0324 if (INET_PROTOSW_REUSE & answer_flags)
0325 sk->sk_reuse = SK_CAN_REUSE;
0326
0327 inet = inet_sk(sk);
0328 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
0329
0330 inet->nodefrag = 0;
0331
0332 if (SOCK_RAW == sock->type) {
0333 inet->inet_num = protocol;
0334 if (IPPROTO_RAW == protocol)
0335 inet->hdrincl = 1;
0336 }
0337
0338 if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
0339 inet->pmtudisc = IP_PMTUDISC_DONT;
0340 else
0341 inet->pmtudisc = IP_PMTUDISC_WANT;
0342
0343 inet->inet_id = 0;
0344
0345 sock_init_data(sock, sk);
0346
0347 sk->sk_destruct = inet_sock_destruct;
0348 sk->sk_protocol = protocol;
0349 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
0350
0351 inet->uc_ttl = -1;
0352 inet->mc_loop = 1;
0353 inet->mc_ttl = 1;
0354 inet->mc_all = 1;
0355 inet->mc_index = 0;
0356 inet->mc_list = NULL;
0357 inet->rcv_tos = 0;
0358
0359 sk_refcnt_debug_inc(sk);
0360
0361 if (inet->inet_num) {
0362
0363
0364
0365
0366
0367 inet->inet_sport = htons(inet->inet_num);
0368
0369 err = sk->sk_prot->hash(sk);
0370 if (err) {
0371 sk_common_release(sk);
0372 goto out;
0373 }
0374 }
0375
0376 if (sk->sk_prot->init) {
0377 err = sk->sk_prot->init(sk);
0378 if (err) {
0379 sk_common_release(sk);
0380 goto out;
0381 }
0382 }
0383
0384 if (!kern) {
0385 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
0386 if (err) {
0387 sk_common_release(sk);
0388 goto out;
0389 }
0390 }
0391 out:
0392 return err;
0393 out_rcu_unlock:
0394 rcu_read_unlock();
0395 goto out;
0396 }
0397
0398
0399
0400
0401
0402
0403
0404 int inet_release(struct socket *sock)
0405 {
0406 struct sock *sk = sock->sk;
0407
0408 if (sk) {
0409 long timeout;
0410
0411 if (!sk->sk_kern_sock)
0412 BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
0413
0414
0415 ip_mc_drop_socket(sk);
0416
0417
0418
0419
0420
0421
0422
0423
0424 timeout = 0;
0425 if (sock_flag(sk, SOCK_LINGER) &&
0426 !(current->flags & PF_EXITING))
0427 timeout = sk->sk_lingertime;
0428 sk->sk_prot->close(sk, timeout);
0429 sock->sk = NULL;
0430 }
0431 return 0;
0432 }
0433 EXPORT_SYMBOL(inet_release);
0434
0435 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
0436 {
0437 struct sock *sk = sock->sk;
0438 u32 flags = BIND_WITH_LOCK;
0439 int err;
0440
0441
0442 if (sk->sk_prot->bind) {
0443 return sk->sk_prot->bind(sk, uaddr, addr_len);
0444 }
0445 if (addr_len < sizeof(struct sockaddr_in))
0446 return -EINVAL;
0447
0448
0449
0450
0451 err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
0452 CGROUP_INET4_BIND, &flags);
0453 if (err)
0454 return err;
0455
0456 return __inet_bind(sk, uaddr, addr_len, flags);
0457 }
0458 EXPORT_SYMBOL(inet_bind);
0459
0460 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
0461 u32 flags)
0462 {
0463 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
0464 struct inet_sock *inet = inet_sk(sk);
0465 struct net *net = sock_net(sk);
0466 unsigned short snum;
0467 int chk_addr_ret;
0468 u32 tb_id = RT_TABLE_LOCAL;
0469 int err;
0470
0471 if (addr->sin_family != AF_INET) {
0472
0473
0474
0475 err = -EAFNOSUPPORT;
0476 if (addr->sin_family != AF_UNSPEC ||
0477 addr->sin_addr.s_addr != htonl(INADDR_ANY))
0478 goto out;
0479 }
0480
0481 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
0482 chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
0483
0484
0485
0486
0487
0488
0489
0490
0491 err = -EADDRNOTAVAIL;
0492 if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr,
0493 chk_addr_ret))
0494 goto out;
0495
0496 snum = ntohs(addr->sin_port);
0497 err = -EACCES;
0498 if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
0499 snum && inet_port_requires_bind_service(net, snum) &&
0500 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
0501 goto out;
0502
0503
0504
0505
0506
0507
0508
0509
0510 if (flags & BIND_WITH_LOCK)
0511 lock_sock(sk);
0512
0513
0514 err = -EINVAL;
0515 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
0516 goto out_release_sock;
0517
0518 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
0519 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
0520 inet->inet_saddr = 0;
0521
0522
0523 if (snum || !(inet->bind_address_no_port ||
0524 (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
0525 if (sk->sk_prot->get_port(sk, snum)) {
0526 inet->inet_saddr = inet->inet_rcv_saddr = 0;
0527 err = -EADDRINUSE;
0528 goto out_release_sock;
0529 }
0530 if (!(flags & BIND_FROM_BPF)) {
0531 err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
0532 if (err) {
0533 inet->inet_saddr = inet->inet_rcv_saddr = 0;
0534 if (sk->sk_prot->put_port)
0535 sk->sk_prot->put_port(sk);
0536 goto out_release_sock;
0537 }
0538 }
0539 }
0540
0541 if (inet->inet_rcv_saddr)
0542 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
0543 if (snum)
0544 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
0545 inet->inet_sport = htons(inet->inet_num);
0546 inet->inet_daddr = 0;
0547 inet->inet_dport = 0;
0548 sk_dst_reset(sk);
0549 err = 0;
0550 out_release_sock:
0551 if (flags & BIND_WITH_LOCK)
0552 release_sock(sk);
0553 out:
0554 return err;
0555 }
0556
0557 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
0558 int addr_len, int flags)
0559 {
0560 struct sock *sk = sock->sk;
0561 int err;
0562
0563 if (addr_len < sizeof(uaddr->sa_family))
0564 return -EINVAL;
0565 if (uaddr->sa_family == AF_UNSPEC)
0566 return sk->sk_prot->disconnect(sk, flags);
0567
0568 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
0569 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
0570 if (err)
0571 return err;
0572 }
0573
0574 if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
0575 return -EAGAIN;
0576 return sk->sk_prot->connect(sk, uaddr, addr_len);
0577 }
0578 EXPORT_SYMBOL(inet_dgram_connect);
0579
0580 static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
0581 {
0582 DEFINE_WAIT_FUNC(wait, woken_wake_function);
0583
0584 add_wait_queue(sk_sleep(sk), &wait);
0585 sk->sk_write_pending += writebias;
0586
0587
0588
0589
0590
0591
0592 while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
0593 release_sock(sk);
0594 timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
0595 lock_sock(sk);
0596 if (signal_pending(current) || !timeo)
0597 break;
0598 }
0599 remove_wait_queue(sk_sleep(sk), &wait);
0600 sk->sk_write_pending -= writebias;
0601 return timeo;
0602 }
0603
0604
0605
0606
0607
0608 int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
0609 int addr_len, int flags, int is_sendmsg)
0610 {
0611 struct sock *sk = sock->sk;
0612 int err;
0613 long timeo;
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624 if (uaddr) {
0625 if (addr_len < sizeof(uaddr->sa_family))
0626 return -EINVAL;
0627
0628 if (uaddr->sa_family == AF_UNSPEC) {
0629 err = sk->sk_prot->disconnect(sk, flags);
0630 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
0631 goto out;
0632 }
0633 }
0634
0635 switch (sock->state) {
0636 default:
0637 err = -EINVAL;
0638 goto out;
0639 case SS_CONNECTED:
0640 err = -EISCONN;
0641 goto out;
0642 case SS_CONNECTING:
0643 if (inet_sk(sk)->defer_connect)
0644 err = is_sendmsg ? -EINPROGRESS : -EISCONN;
0645 else
0646 err = -EALREADY;
0647
0648 break;
0649 case SS_UNCONNECTED:
0650 err = -EISCONN;
0651 if (sk->sk_state != TCP_CLOSE)
0652 goto out;
0653
0654 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
0655 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
0656 if (err)
0657 goto out;
0658 }
0659
0660 err = sk->sk_prot->connect(sk, uaddr, addr_len);
0661 if (err < 0)
0662 goto out;
0663
0664 sock->state = SS_CONNECTING;
0665
0666 if (!err && inet_sk(sk)->defer_connect)
0667 goto out;
0668
0669
0670
0671
0672
0673 err = -EINPROGRESS;
0674 break;
0675 }
0676
0677 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
0678
0679 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
0680 int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
0681 tcp_sk(sk)->fastopen_req &&
0682 tcp_sk(sk)->fastopen_req->data ? 1 : 0;
0683
0684
0685 if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
0686 goto out;
0687
0688 err = sock_intr_errno(timeo);
0689 if (signal_pending(current))
0690 goto out;
0691 }
0692
0693
0694
0695
0696 if (sk->sk_state == TCP_CLOSE)
0697 goto sock_error;
0698
0699
0700
0701
0702
0703
0704 sock->state = SS_CONNECTED;
0705 err = 0;
0706 out:
0707 return err;
0708
0709 sock_error:
0710 err = sock_error(sk) ? : -ECONNABORTED;
0711 sock->state = SS_UNCONNECTED;
0712 if (sk->sk_prot->disconnect(sk, flags))
0713 sock->state = SS_DISCONNECTING;
0714 goto out;
0715 }
0716 EXPORT_SYMBOL(__inet_stream_connect);
0717
0718 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
0719 int addr_len, int flags)
0720 {
0721 int err;
0722
0723 lock_sock(sock->sk);
0724 err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
0725 release_sock(sock->sk);
0726 return err;
0727 }
0728 EXPORT_SYMBOL(inet_stream_connect);
0729
0730
0731
0732
0733
0734 int inet_accept(struct socket *sock, struct socket *newsock, int flags,
0735 bool kern)
0736 {
0737 struct sock *sk1 = sock->sk;
0738 int err = -EINVAL;
0739 struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
0740
0741 if (!sk2)
0742 goto do_err;
0743
0744 lock_sock(sk2);
0745
0746 sock_rps_record_flow(sk2);
0747 WARN_ON(!((1 << sk2->sk_state) &
0748 (TCPF_ESTABLISHED | TCPF_SYN_RECV |
0749 TCPF_CLOSE_WAIT | TCPF_CLOSE)));
0750
0751 sock_graft(sk2, newsock);
0752
0753 newsock->state = SS_CONNECTED;
0754 err = 0;
0755 release_sock(sk2);
0756 do_err:
0757 return err;
0758 }
0759 EXPORT_SYMBOL(inet_accept);
0760
0761
0762
0763
0764 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
0765 int peer)
0766 {
0767 struct sock *sk = sock->sk;
0768 struct inet_sock *inet = inet_sk(sk);
0769 DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
0770
0771 sin->sin_family = AF_INET;
0772 lock_sock(sk);
0773 if (peer) {
0774 if (!inet->inet_dport ||
0775 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
0776 peer == 1)) {
0777 release_sock(sk);
0778 return -ENOTCONN;
0779 }
0780 sin->sin_port = inet->inet_dport;
0781 sin->sin_addr.s_addr = inet->inet_daddr;
0782 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
0783 CGROUP_INET4_GETPEERNAME);
0784 } else {
0785 __be32 addr = inet->inet_rcv_saddr;
0786 if (!addr)
0787 addr = inet->inet_saddr;
0788 sin->sin_port = inet->inet_sport;
0789 sin->sin_addr.s_addr = addr;
0790 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
0791 CGROUP_INET4_GETSOCKNAME);
0792 }
0793 release_sock(sk);
0794 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
0795 return sizeof(*sin);
0796 }
0797 EXPORT_SYMBOL(inet_getname);
0798
0799 int inet_send_prepare(struct sock *sk)
0800 {
0801 sock_rps_record_flow(sk);
0802
0803
0804 if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
0805 inet_autobind(sk))
0806 return -EAGAIN;
0807
0808 return 0;
0809 }
0810 EXPORT_SYMBOL_GPL(inet_send_prepare);
0811
0812 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
0813 {
0814 struct sock *sk = sock->sk;
0815
0816 if (unlikely(inet_send_prepare(sk)))
0817 return -EAGAIN;
0818
0819 return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
0820 sk, msg, size);
0821 }
0822 EXPORT_SYMBOL(inet_sendmsg);
0823
0824 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
0825 size_t size, int flags)
0826 {
0827 struct sock *sk = sock->sk;
0828
0829 if (unlikely(inet_send_prepare(sk)))
0830 return -EAGAIN;
0831
0832 if (sk->sk_prot->sendpage)
0833 return sk->sk_prot->sendpage(sk, page, offset, size, flags);
0834 return sock_no_sendpage(sock, page, offset, size, flags);
0835 }
0836 EXPORT_SYMBOL(inet_sendpage);
0837
0838 INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
0839 size_t, int, int *));
0840 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
0841 int flags)
0842 {
0843 struct sock *sk = sock->sk;
0844 int addr_len = 0;
0845 int err;
0846
0847 if (likely(!(flags & MSG_ERRQUEUE)))
0848 sock_rps_record_flow(sk);
0849
0850 err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
0851 sk, msg, size, flags, &addr_len);
0852 if (err >= 0)
0853 msg->msg_namelen = addr_len;
0854 return err;
0855 }
0856 EXPORT_SYMBOL(inet_recvmsg);
0857
0858 int inet_shutdown(struct socket *sock, int how)
0859 {
0860 struct sock *sk = sock->sk;
0861 int err = 0;
0862
0863
0864
0865
0866 how++;
0867
0868
0869 if ((how & ~SHUTDOWN_MASK) || !how)
0870 return -EINVAL;
0871
0872 lock_sock(sk);
0873 if (sock->state == SS_CONNECTING) {
0874 if ((1 << sk->sk_state) &
0875 (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
0876 sock->state = SS_DISCONNECTING;
0877 else
0878 sock->state = SS_CONNECTED;
0879 }
0880
0881 switch (sk->sk_state) {
0882 case TCP_CLOSE:
0883 err = -ENOTCONN;
0884
0885
0886 fallthrough;
0887 default:
0888 sk->sk_shutdown |= how;
0889 if (sk->sk_prot->shutdown)
0890 sk->sk_prot->shutdown(sk, how);
0891 break;
0892
0893
0894
0895
0896
0897 case TCP_LISTEN:
0898 if (!(how & RCV_SHUTDOWN))
0899 break;
0900 fallthrough;
0901 case TCP_SYN_SENT:
0902 err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
0903 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
0904 break;
0905 }
0906
0907
0908 sk->sk_state_change(sk);
0909 release_sock(sk);
0910 return err;
0911 }
0912 EXPORT_SYMBOL(inet_shutdown);
0913
0914
0915
0916
0917
0918
0919
0920
0921
0922
0923
0924 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
0925 {
0926 struct sock *sk = sock->sk;
0927 int err = 0;
0928 struct net *net = sock_net(sk);
0929 void __user *p = (void __user *)arg;
0930 struct ifreq ifr;
0931 struct rtentry rt;
0932
0933 switch (cmd) {
0934 case SIOCADDRT:
0935 case SIOCDELRT:
0936 if (copy_from_user(&rt, p, sizeof(struct rtentry)))
0937 return -EFAULT;
0938 err = ip_rt_ioctl(net, cmd, &rt);
0939 break;
0940 case SIOCRTMSG:
0941 err = -EINVAL;
0942 break;
0943 case SIOCDARP:
0944 case SIOCGARP:
0945 case SIOCSARP:
0946 err = arp_ioctl(net, cmd, (void __user *)arg);
0947 break;
0948 case SIOCGIFADDR:
0949 case SIOCGIFBRDADDR:
0950 case SIOCGIFNETMASK:
0951 case SIOCGIFDSTADDR:
0952 case SIOCGIFPFLAGS:
0953 if (get_user_ifreq(&ifr, NULL, p))
0954 return -EFAULT;
0955 err = devinet_ioctl(net, cmd, &ifr);
0956 if (!err && put_user_ifreq(&ifr, p))
0957 err = -EFAULT;
0958 break;
0959
0960 case SIOCSIFADDR:
0961 case SIOCSIFBRDADDR:
0962 case SIOCSIFNETMASK:
0963 case SIOCSIFDSTADDR:
0964 case SIOCSIFPFLAGS:
0965 case SIOCSIFFLAGS:
0966 if (get_user_ifreq(&ifr, NULL, p))
0967 return -EFAULT;
0968 err = devinet_ioctl(net, cmd, &ifr);
0969 break;
0970 default:
0971 if (sk->sk_prot->ioctl)
0972 err = sk->sk_prot->ioctl(sk, cmd, arg);
0973 else
0974 err = -ENOIOCTLCMD;
0975 break;
0976 }
0977 return err;
0978 }
0979 EXPORT_SYMBOL(inet_ioctl);
0980
0981 #ifdef CONFIG_COMPAT
0982 static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
0983 struct compat_rtentry __user *ur)
0984 {
0985 compat_uptr_t rtdev;
0986 struct rtentry rt;
0987
0988 if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
0989 3 * sizeof(struct sockaddr)) ||
0990 get_user(rt.rt_flags, &ur->rt_flags) ||
0991 get_user(rt.rt_metric, &ur->rt_metric) ||
0992 get_user(rt.rt_mtu, &ur->rt_mtu) ||
0993 get_user(rt.rt_window, &ur->rt_window) ||
0994 get_user(rt.rt_irtt, &ur->rt_irtt) ||
0995 get_user(rtdev, &ur->rt_dev))
0996 return -EFAULT;
0997
0998 rt.rt_dev = compat_ptr(rtdev);
0999 return ip_rt_ioctl(sock_net(sk), cmd, &rt);
1000 }
1001
1002 static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1003 {
1004 void __user *argp = compat_ptr(arg);
1005 struct sock *sk = sock->sk;
1006
1007 switch (cmd) {
1008 case SIOCADDRT:
1009 case SIOCDELRT:
1010 return inet_compat_routing_ioctl(sk, cmd, argp);
1011 default:
1012 if (!sk->sk_prot->compat_ioctl)
1013 return -ENOIOCTLCMD;
1014 return sk->sk_prot->compat_ioctl(sk, cmd, arg);
1015 }
1016 }
1017 #endif
1018
1019 const struct proto_ops inet_stream_ops = {
1020 .family = PF_INET,
1021 .owner = THIS_MODULE,
1022 .release = inet_release,
1023 .bind = inet_bind,
1024 .connect = inet_stream_connect,
1025 .socketpair = sock_no_socketpair,
1026 .accept = inet_accept,
1027 .getname = inet_getname,
1028 .poll = tcp_poll,
1029 .ioctl = inet_ioctl,
1030 .gettstamp = sock_gettstamp,
1031 .listen = inet_listen,
1032 .shutdown = inet_shutdown,
1033 .setsockopt = sock_common_setsockopt,
1034 .getsockopt = sock_common_getsockopt,
1035 .sendmsg = inet_sendmsg,
1036 .recvmsg = inet_recvmsg,
1037 #ifdef CONFIG_MMU
1038 .mmap = tcp_mmap,
1039 #endif
1040 .sendpage = inet_sendpage,
1041 .splice_read = tcp_splice_read,
1042 .read_sock = tcp_read_sock,
1043 .read_skb = tcp_read_skb,
1044 .sendmsg_locked = tcp_sendmsg_locked,
1045 .sendpage_locked = tcp_sendpage_locked,
1046 .peek_len = tcp_peek_len,
1047 #ifdef CONFIG_COMPAT
1048 .compat_ioctl = inet_compat_ioctl,
1049 #endif
1050 .set_rcvlowat = tcp_set_rcvlowat,
1051 };
1052 EXPORT_SYMBOL(inet_stream_ops);
1053
1054 const struct proto_ops inet_dgram_ops = {
1055 .family = PF_INET,
1056 .owner = THIS_MODULE,
1057 .release = inet_release,
1058 .bind = inet_bind,
1059 .connect = inet_dgram_connect,
1060 .socketpair = sock_no_socketpair,
1061 .accept = sock_no_accept,
1062 .getname = inet_getname,
1063 .poll = udp_poll,
1064 .ioctl = inet_ioctl,
1065 .gettstamp = sock_gettstamp,
1066 .listen = sock_no_listen,
1067 .shutdown = inet_shutdown,
1068 .setsockopt = sock_common_setsockopt,
1069 .getsockopt = sock_common_getsockopt,
1070 .sendmsg = inet_sendmsg,
1071 .read_skb = udp_read_skb,
1072 .recvmsg = inet_recvmsg,
1073 .mmap = sock_no_mmap,
1074 .sendpage = inet_sendpage,
1075 .set_peek_off = sk_set_peek_off,
1076 #ifdef CONFIG_COMPAT
1077 .compat_ioctl = inet_compat_ioctl,
1078 #endif
1079 };
1080 EXPORT_SYMBOL(inet_dgram_ops);
1081
1082
1083
1084
1085
1086 static const struct proto_ops inet_sockraw_ops = {
1087 .family = PF_INET,
1088 .owner = THIS_MODULE,
1089 .release = inet_release,
1090 .bind = inet_bind,
1091 .connect = inet_dgram_connect,
1092 .socketpair = sock_no_socketpair,
1093 .accept = sock_no_accept,
1094 .getname = inet_getname,
1095 .poll = datagram_poll,
1096 .ioctl = inet_ioctl,
1097 .gettstamp = sock_gettstamp,
1098 .listen = sock_no_listen,
1099 .shutdown = inet_shutdown,
1100 .setsockopt = sock_common_setsockopt,
1101 .getsockopt = sock_common_getsockopt,
1102 .sendmsg = inet_sendmsg,
1103 .recvmsg = inet_recvmsg,
1104 .mmap = sock_no_mmap,
1105 .sendpage = inet_sendpage,
1106 #ifdef CONFIG_COMPAT
1107 .compat_ioctl = inet_compat_ioctl,
1108 #endif
1109 };
1110
1111 static const struct net_proto_family inet_family_ops = {
1112 .family = PF_INET,
1113 .create = inet_create,
1114 .owner = THIS_MODULE,
1115 };
1116
1117
1118
1119
1120 static struct inet_protosw inetsw_array[] =
1121 {
1122 {
1123 .type = SOCK_STREAM,
1124 .protocol = IPPROTO_TCP,
1125 .prot = &tcp_prot,
1126 .ops = &inet_stream_ops,
1127 .flags = INET_PROTOSW_PERMANENT |
1128 INET_PROTOSW_ICSK,
1129 },
1130
1131 {
1132 .type = SOCK_DGRAM,
1133 .protocol = IPPROTO_UDP,
1134 .prot = &udp_prot,
1135 .ops = &inet_dgram_ops,
1136 .flags = INET_PROTOSW_PERMANENT,
1137 },
1138
1139 {
1140 .type = SOCK_DGRAM,
1141 .protocol = IPPROTO_ICMP,
1142 .prot = &ping_prot,
1143 .ops = &inet_sockraw_ops,
1144 .flags = INET_PROTOSW_REUSE,
1145 },
1146
1147 {
1148 .type = SOCK_RAW,
1149 .protocol = IPPROTO_IP,
1150 .prot = &raw_prot,
1151 .ops = &inet_sockraw_ops,
1152 .flags = INET_PROTOSW_REUSE,
1153 }
1154 };
1155
1156 #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)
1157
1158 void inet_register_protosw(struct inet_protosw *p)
1159 {
1160 struct list_head *lh;
1161 struct inet_protosw *answer;
1162 int protocol = p->protocol;
1163 struct list_head *last_perm;
1164
1165 spin_lock_bh(&inetsw_lock);
1166
1167 if (p->type >= SOCK_MAX)
1168 goto out_illegal;
1169
1170
1171 last_perm = &inetsw[p->type];
1172 list_for_each(lh, &inetsw[p->type]) {
1173 answer = list_entry(lh, struct inet_protosw, list);
1174
1175 if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
1176 break;
1177 if (protocol == answer->protocol)
1178 goto out_permanent;
1179 last_perm = lh;
1180 }
1181
1182
1183
1184
1185
1186
1187
1188 list_add_rcu(&p->list, last_perm);
1189 out:
1190 spin_unlock_bh(&inetsw_lock);
1191
1192 return;
1193
1194 out_permanent:
1195 pr_err("Attempt to override permanent protocol %d\n", protocol);
1196 goto out;
1197
1198 out_illegal:
1199 pr_err("Ignoring attempt to register invalid socket type %d\n",
1200 p->type);
1201 goto out;
1202 }
1203 EXPORT_SYMBOL(inet_register_protosw);
1204
1205 void inet_unregister_protosw(struct inet_protosw *p)
1206 {
1207 if (INET_PROTOSW_PERMANENT & p->flags) {
1208 pr_err("Attempt to unregister permanent protocol %d\n",
1209 p->protocol);
1210 } else {
1211 spin_lock_bh(&inetsw_lock);
1212 list_del_rcu(&p->list);
1213 spin_unlock_bh(&inetsw_lock);
1214
1215 synchronize_net();
1216 }
1217 }
1218 EXPORT_SYMBOL(inet_unregister_protosw);
1219
1220 static int inet_sk_reselect_saddr(struct sock *sk)
1221 {
1222 struct inet_sock *inet = inet_sk(sk);
1223 __be32 old_saddr = inet->inet_saddr;
1224 __be32 daddr = inet->inet_daddr;
1225 struct flowi4 *fl4;
1226 struct rtable *rt;
1227 __be32 new_saddr;
1228 struct ip_options_rcu *inet_opt;
1229
1230 inet_opt = rcu_dereference_protected(inet->inet_opt,
1231 lockdep_sock_is_held(sk));
1232 if (inet_opt && inet_opt->opt.srr)
1233 daddr = inet_opt->opt.faddr;
1234
1235
1236 fl4 = &inet->cork.fl.u.ip4;
1237 rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
1238 sk->sk_protocol, inet->inet_sport,
1239 inet->inet_dport, sk);
1240 if (IS_ERR(rt))
1241 return PTR_ERR(rt);
1242
1243 sk_setup_caps(sk, &rt->dst);
1244
1245 new_saddr = fl4->saddr;
1246
1247 if (new_saddr == old_saddr)
1248 return 0;
1249
1250 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
1251 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
1252 __func__, &old_saddr, &new_saddr);
1253 }
1254
1255 inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265 return __sk_prot_rehash(sk);
1266 }
1267
1268 int inet_sk_rebuild_header(struct sock *sk)
1269 {
1270 struct inet_sock *inet = inet_sk(sk);
1271 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1272 __be32 daddr;
1273 struct ip_options_rcu *inet_opt;
1274 struct flowi4 *fl4;
1275 int err;
1276
1277
1278 if (rt)
1279 return 0;
1280
1281
1282 rcu_read_lock();
1283 inet_opt = rcu_dereference(inet->inet_opt);
1284 daddr = inet->inet_daddr;
1285 if (inet_opt && inet_opt->opt.srr)
1286 daddr = inet_opt->opt.faddr;
1287 rcu_read_unlock();
1288 fl4 = &inet->cork.fl.u.ip4;
1289 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
1290 inet->inet_dport, inet->inet_sport,
1291 sk->sk_protocol, RT_CONN_FLAGS(sk),
1292 sk->sk_bound_dev_if);
1293 if (!IS_ERR(rt)) {
1294 err = 0;
1295 sk_setup_caps(sk, &rt->dst);
1296 } else {
1297 err = PTR_ERR(rt);
1298
1299
1300 sk->sk_route_caps = 0;
1301
1302
1303
1304
1305 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
1306 sk->sk_state != TCP_SYN_SENT ||
1307 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1308 (err = inet_sk_reselect_saddr(sk)) != 0)
1309 sk->sk_err_soft = -err;
1310 }
1311
1312 return err;
1313 }
1314 EXPORT_SYMBOL(inet_sk_rebuild_header);
1315
1316 void inet_sk_set_state(struct sock *sk, int state)
1317 {
1318 trace_inet_sock_set_state(sk, sk->sk_state, state);
1319 sk->sk_state = state;
1320 }
1321 EXPORT_SYMBOL(inet_sk_set_state);
1322
1323 void inet_sk_state_store(struct sock *sk, int newstate)
1324 {
1325 trace_inet_sock_set_state(sk, sk->sk_state, newstate);
1326 smp_store_release(&sk->sk_state, newstate);
1327 }
1328
1329 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1330 netdev_features_t features)
1331 {
1332 bool udpfrag = false, fixedid = false, gso_partial, encap;
1333 struct sk_buff *segs = ERR_PTR(-EINVAL);
1334 const struct net_offload *ops;
1335 unsigned int offset = 0;
1336 struct iphdr *iph;
1337 int proto, tot_len;
1338 int nhoff;
1339 int ihl;
1340 int id;
1341
1342 skb_reset_network_header(skb);
1343 nhoff = skb_network_header(skb) - skb_mac_header(skb);
1344 if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1345 goto out;
1346
1347 iph = ip_hdr(skb);
1348 ihl = iph->ihl * 4;
1349 if (ihl < sizeof(*iph))
1350 goto out;
1351
1352 id = ntohs(iph->id);
1353 proto = iph->protocol;
1354
1355
1356 if (unlikely(!pskb_may_pull(skb, ihl)))
1357 goto out;
1358 __skb_pull(skb, ihl);
1359
1360 encap = SKB_GSO_CB(skb)->encap_level > 0;
1361 if (encap)
1362 features &= skb->dev->hw_enc_features;
1363 SKB_GSO_CB(skb)->encap_level += ihl;
1364
1365 skb_reset_transport_header(skb);
1366
1367 segs = ERR_PTR(-EPROTONOSUPPORT);
1368
1369 if (!skb->encapsulation || encap) {
1370 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
1371 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
1372
1373
1374 if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
1375 goto out;
1376 }
1377
1378 ops = rcu_dereference(inet_offloads[proto]);
1379 if (likely(ops && ops->callbacks.gso_segment)) {
1380 segs = ops->callbacks.gso_segment(skb, features);
1381 if (!segs)
1382 skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
1383 }
1384
1385 if (IS_ERR_OR_NULL(segs))
1386 goto out;
1387
1388 gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
1389
1390 skb = segs;
1391 do {
1392 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
1393 if (udpfrag) {
1394 iph->frag_off = htons(offset >> 3);
1395 if (skb->next)
1396 iph->frag_off |= htons(IP_MF);
1397 offset += skb->len - nhoff - ihl;
1398 tot_len = skb->len - nhoff;
1399 } else if (skb_is_gso(skb)) {
1400 if (!fixedid) {
1401 iph->id = htons(id);
1402 id += skb_shinfo(skb)->gso_segs;
1403 }
1404
1405 if (gso_partial)
1406 tot_len = skb_shinfo(skb)->gso_size +
1407 SKB_GSO_CB(skb)->data_offset +
1408 skb->head - (unsigned char *)iph;
1409 else
1410 tot_len = skb->len - nhoff;
1411 } else {
1412 if (!fixedid)
1413 iph->id = htons(id++);
1414 tot_len = skb->len - nhoff;
1415 }
1416 iph->tot_len = htons(tot_len);
1417 ip_send_check(iph);
1418 if (encap)
1419 skb_reset_inner_headers(skb);
1420 skb->network_header = (u8 *)iph - skb->head;
1421 skb_reset_mac_len(skb);
1422 } while ((skb = skb->next));
1423
1424 out:
1425 return segs;
1426 }
1427
1428 static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
1429 netdev_features_t features)
1430 {
1431 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
1432 return ERR_PTR(-EINVAL);
1433
1434 return inet_gso_segment(skb, features);
1435 }
1436
1437 struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
1438 {
1439 const struct net_offload *ops;
1440 struct sk_buff *pp = NULL;
1441 const struct iphdr *iph;
1442 struct sk_buff *p;
1443 unsigned int hlen;
1444 unsigned int off;
1445 unsigned int id;
1446 int flush = 1;
1447 int proto;
1448
1449 off = skb_gro_offset(skb);
1450 hlen = off + sizeof(*iph);
1451 iph = skb_gro_header_fast(skb, off);
1452 if (skb_gro_header_hard(skb, hlen)) {
1453 iph = skb_gro_header_slow(skb, hlen, off);
1454 if (unlikely(!iph))
1455 goto out;
1456 }
1457
1458 proto = iph->protocol;
1459
1460 ops = rcu_dereference(inet_offloads[proto]);
1461 if (!ops || !ops->callbacks.gro_receive)
1462 goto out;
1463
1464 if (*(u8 *)iph != 0x45)
1465 goto out;
1466
1467 if (ip_is_fragment(iph))
1468 goto out;
1469
1470 if (unlikely(ip_fast_csum((u8 *)iph, 5)))
1471 goto out;
1472
1473 id = ntohl(*(__be32 *)&iph->id);
1474 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
1475 id >>= 16;
1476
1477 list_for_each_entry(p, head, list) {
1478 struct iphdr *iph2;
1479 u16 flush_id;
1480
1481 if (!NAPI_GRO_CB(p)->same_flow)
1482 continue;
1483
1484 iph2 = (struct iphdr *)(p->data + off);
1485
1486
1487
1488
1489
1490 if ((iph->protocol ^ iph2->protocol) |
1491 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1492 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1493 NAPI_GRO_CB(p)->same_flow = 0;
1494 continue;
1495 }
1496
1497
1498 NAPI_GRO_CB(p)->flush |=
1499 (iph->ttl ^ iph2->ttl) |
1500 (iph->tos ^ iph2->tos) |
1501 ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
1502
1503 NAPI_GRO_CB(p)->flush |= flush;
1504
1505
1506
1507
1508
1509 flush_id = (u16)(id - ntohs(iph2->id));
1510
1511
1512
1513
1514
1515
1516
1517 if (!NAPI_GRO_CB(p)->is_atomic ||
1518 !(iph->frag_off & htons(IP_DF))) {
1519 flush_id ^= NAPI_GRO_CB(p)->count;
1520 flush_id = flush_id ? 0xFFFF : 0;
1521 }
1522
1523
1524
1525
1526 if (NAPI_GRO_CB(skb)->is_atomic)
1527 NAPI_GRO_CB(p)->flush_id = flush_id;
1528 else
1529 NAPI_GRO_CB(p)->flush_id |= flush_id;
1530 }
1531
1532 NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
1533 NAPI_GRO_CB(skb)->flush |= flush;
1534 skb_set_network_header(skb, off);
1535
1536
1537
1538
1539
1540
1541
1542 skb_gro_pull(skb, sizeof(*iph));
1543 skb_set_transport_header(skb, skb_gro_offset(skb));
1544
1545 pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
1546 ops->callbacks.gro_receive, head, skb);
1547
1548 out:
1549 skb_gro_flush_final(skb, pp, flush);
1550
1551 return pp;
1552 }
1553
1554 static struct sk_buff *ipip_gro_receive(struct list_head *head,
1555 struct sk_buff *skb)
1556 {
1557 if (NAPI_GRO_CB(skb)->encap_mark) {
1558 NAPI_GRO_CB(skb)->flush = 1;
1559 return NULL;
1560 }
1561
1562 NAPI_GRO_CB(skb)->encap_mark = 1;
1563
1564 return inet_gro_receive(head, skb);
1565 }
1566
1567 #define SECONDS_PER_DAY 86400
1568
1569
1570
1571
1572
1573 __be32 inet_current_timestamp(void)
1574 {
1575 u32 secs;
1576 u32 msecs;
1577 struct timespec64 ts;
1578
1579 ktime_get_real_ts64(&ts);
1580
1581
1582 (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs);
1583
1584 msecs = secs * MSEC_PER_SEC;
1585
1586 msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
1587
1588
1589 return htonl(msecs);
1590 }
1591 EXPORT_SYMBOL(inet_current_timestamp);
1592
1593 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
1594 {
1595 if (sk->sk_family == AF_INET)
1596 return ip_recv_error(sk, msg, len, addr_len);
1597 #if IS_ENABLED(CONFIG_IPV6)
1598 if (sk->sk_family == AF_INET6)
1599 return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
1600 #endif
1601 return -EINVAL;
1602 }
1603
1604 int inet_gro_complete(struct sk_buff *skb, int nhoff)
1605 {
1606 __be16 newlen = htons(skb->len - nhoff);
1607 struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1608 const struct net_offload *ops;
1609 int proto = iph->protocol;
1610 int err = -ENOSYS;
1611
1612 if (skb->encapsulation) {
1613 skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
1614 skb_set_inner_network_header(skb, nhoff);
1615 }
1616
1617 csum_replace2(&iph->check, iph->tot_len, newlen);
1618 iph->tot_len = newlen;
1619
1620 ops = rcu_dereference(inet_offloads[proto]);
1621 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1622 goto out;
1623
1624
1625
1626
1627
1628 err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
1629 tcp4_gro_complete, udp4_gro_complete,
1630 skb, nhoff + sizeof(*iph));
1631
1632 out:
1633 return err;
1634 }
1635
1636 static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
1637 {
1638 skb->encapsulation = 1;
1639 skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
1640 return inet_gro_complete(skb, nhoff);
1641 }
1642
1643 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1644 unsigned short type, unsigned char protocol,
1645 struct net *net)
1646 {
1647 struct socket *sock;
1648 int rc = sock_create_kern(net, family, type, protocol, &sock);
1649
1650 if (rc == 0) {
1651 *sk = sock->sk;
1652 (*sk)->sk_allocation = GFP_ATOMIC;
1653
1654
1655
1656
1657 (*sk)->sk_prot->unhash(*sk);
1658 }
1659 return rc;
1660 }
1661 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1662
1663 unsigned long snmp_fold_field(void __percpu *mib, int offt)
1664 {
1665 unsigned long res = 0;
1666 int i;
1667
1668 for_each_possible_cpu(i)
1669 res += snmp_get_cpu_field(mib, i, offt);
1670 return res;
1671 }
1672 EXPORT_SYMBOL_GPL(snmp_fold_field);
1673
1674 #if BITS_PER_LONG==32
1675
1676 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt,
1677 size_t syncp_offset)
1678 {
1679 void *bhptr;
1680 struct u64_stats_sync *syncp;
1681 u64 v;
1682 unsigned int start;
1683
1684 bhptr = per_cpu_ptr(mib, cpu);
1685 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1686 do {
1687 start = u64_stats_fetch_begin_irq(syncp);
1688 v = *(((u64 *)bhptr) + offt);
1689 } while (u64_stats_fetch_retry_irq(syncp, start));
1690
1691 return v;
1692 }
1693 EXPORT_SYMBOL_GPL(snmp_get_cpu_field64);
1694
1695 u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
1696 {
1697 u64 res = 0;
1698 int cpu;
1699
1700 for_each_possible_cpu(cpu) {
1701 res += snmp_get_cpu_field64(mib, cpu, offt, syncp_offset);
1702 }
1703 return res;
1704 }
1705 EXPORT_SYMBOL_GPL(snmp_fold_field64);
1706 #endif
1707
1708 #ifdef CONFIG_IP_MULTICAST
1709 static const struct net_protocol igmp_protocol = {
1710 .handler = igmp_rcv,
1711 };
1712 #endif
1713
1714 static const struct net_protocol tcp_protocol = {
1715 .handler = tcp_v4_rcv,
1716 .err_handler = tcp_v4_err,
1717 .no_policy = 1,
1718 .icmp_strict_tag_validation = 1,
1719 };
1720
1721 static const struct net_protocol udp_protocol = {
1722 .handler = udp_rcv,
1723 .err_handler = udp_err,
1724 .no_policy = 1,
1725 };
1726
1727 static const struct net_protocol icmp_protocol = {
1728 .handler = icmp_rcv,
1729 .err_handler = icmp_err,
1730 .no_policy = 1,
1731 };
1732
1733 static __net_init int ipv4_mib_init_net(struct net *net)
1734 {
1735 int i;
1736
1737 net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
1738 if (!net->mib.tcp_statistics)
1739 goto err_tcp_mib;
1740 net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
1741 if (!net->mib.ip_statistics)
1742 goto err_ip_mib;
1743
1744 for_each_possible_cpu(i) {
1745 struct ipstats_mib *af_inet_stats;
1746 af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
1747 u64_stats_init(&af_inet_stats->syncp);
1748 }
1749
1750 net->mib.net_statistics = alloc_percpu(struct linux_mib);
1751 if (!net->mib.net_statistics)
1752 goto err_net_mib;
1753 net->mib.udp_statistics = alloc_percpu(struct udp_mib);
1754 if (!net->mib.udp_statistics)
1755 goto err_udp_mib;
1756 net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
1757 if (!net->mib.udplite_statistics)
1758 goto err_udplite_mib;
1759 net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
1760 if (!net->mib.icmp_statistics)
1761 goto err_icmp_mib;
1762 net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
1763 GFP_KERNEL);
1764 if (!net->mib.icmpmsg_statistics)
1765 goto err_icmpmsg_mib;
1766
1767 tcp_mib_init(net);
1768 return 0;
1769
1770 err_icmpmsg_mib:
1771 free_percpu(net->mib.icmp_statistics);
1772 err_icmp_mib:
1773 free_percpu(net->mib.udplite_statistics);
1774 err_udplite_mib:
1775 free_percpu(net->mib.udp_statistics);
1776 err_udp_mib:
1777 free_percpu(net->mib.net_statistics);
1778 err_net_mib:
1779 free_percpu(net->mib.ip_statistics);
1780 err_ip_mib:
1781 free_percpu(net->mib.tcp_statistics);
1782 err_tcp_mib:
1783 return -ENOMEM;
1784 }
1785
1786 static __net_exit void ipv4_mib_exit_net(struct net *net)
1787 {
1788 kfree(net->mib.icmpmsg_statistics);
1789 free_percpu(net->mib.icmp_statistics);
1790 free_percpu(net->mib.udplite_statistics);
1791 free_percpu(net->mib.udp_statistics);
1792 free_percpu(net->mib.net_statistics);
1793 free_percpu(net->mib.ip_statistics);
1794 free_percpu(net->mib.tcp_statistics);
1795 #ifdef CONFIG_MPTCP
1796
1797 free_percpu(net->mib.mptcp_statistics);
1798 #endif
1799 }
1800
1801 static __net_initdata struct pernet_operations ipv4_mib_ops = {
1802 .init = ipv4_mib_init_net,
1803 .exit = ipv4_mib_exit_net,
1804 };
1805
1806 static int __init init_ipv4_mibs(void)
1807 {
1808 return register_pernet_subsys(&ipv4_mib_ops);
1809 }
1810
1811 static __net_init int inet_init_net(struct net *net)
1812 {
1813
1814
1815
1816 seqlock_init(&net->ipv4.ip_local_ports.lock);
1817 net->ipv4.ip_local_ports.range[0] = 32768;
1818 net->ipv4.ip_local_ports.range[1] = 60999;
1819
1820 seqlock_init(&net->ipv4.ping_group_range.lock);
1821
1822
1823
1824
1825 net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
1826 net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
1827
1828
1829
1830
1831 net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
1832 net->ipv4.sysctl_ip_fwd_update_priority = 1;
1833 net->ipv4.sysctl_ip_dynaddr = 0;
1834 net->ipv4.sysctl_ip_early_demux = 1;
1835 net->ipv4.sysctl_udp_early_demux = 1;
1836 net->ipv4.sysctl_tcp_early_demux = 1;
1837 net->ipv4.sysctl_nexthop_compat_mode = 1;
1838 #ifdef CONFIG_SYSCTL
1839 net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
1840 #endif
1841
1842
1843 net->ipv4.sysctl_igmp_max_memberships = 20;
1844 net->ipv4.sysctl_igmp_max_msf = 10;
1845
1846 net->ipv4.sysctl_igmp_llm_reports = 1;
1847 net->ipv4.sysctl_igmp_qrv = 2;
1848
1849 net->ipv4.sysctl_fib_notify_on_flag_change = 0;
1850
1851 return 0;
1852 }
1853
1854 static __net_initdata struct pernet_operations af_inet_ops = {
1855 .init = inet_init_net,
1856 };
1857
1858 static int __init init_inet_pernet_ops(void)
1859 {
1860 return register_pernet_subsys(&af_inet_ops);
1861 }
1862
1863 static int ipv4_proc_init(void);
1864
1865
1866
1867
1868
1869 static struct packet_offload ip_packet_offload __read_mostly = {
1870 .type = cpu_to_be16(ETH_P_IP),
1871 .callbacks = {
1872 .gso_segment = inet_gso_segment,
1873 .gro_receive = inet_gro_receive,
1874 .gro_complete = inet_gro_complete,
1875 },
1876 };
1877
1878 static const struct net_offload ipip_offload = {
1879 .callbacks = {
1880 .gso_segment = ipip_gso_segment,
1881 .gro_receive = ipip_gro_receive,
1882 .gro_complete = ipip_gro_complete,
1883 },
1884 };
1885
1886 static int __init ipip_offload_init(void)
1887 {
1888 return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
1889 }
1890
1891 static int __init ipv4_offload_init(void)
1892 {
1893
1894
1895
1896 if (udpv4_offload_init() < 0)
1897 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1898 if (tcpv4_offload_init() < 0)
1899 pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
1900 if (ipip_offload_init() < 0)
1901 pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
1902
1903 dev_add_offload(&ip_packet_offload);
1904 return 0;
1905 }
1906
1907 fs_initcall(ipv4_offload_init);
1908
1909 static struct packet_type ip_packet_type __read_mostly = {
1910 .type = cpu_to_be16(ETH_P_IP),
1911 .func = ip_rcv,
1912 .list_func = ip_list_rcv,
1913 };
1914
1915 static int __init inet_init(void)
1916 {
1917 struct inet_protosw *q;
1918 struct list_head *r;
1919 int rc;
1920
1921 sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
1922
1923 raw_hashinfo_init(&raw_v4_hashinfo);
1924
1925 rc = proto_register(&tcp_prot, 1);
1926 if (rc)
1927 goto out;
1928
1929 rc = proto_register(&udp_prot, 1);
1930 if (rc)
1931 goto out_unregister_tcp_proto;
1932
1933 rc = proto_register(&raw_prot, 1);
1934 if (rc)
1935 goto out_unregister_udp_proto;
1936
1937 rc = proto_register(&ping_prot, 1);
1938 if (rc)
1939 goto out_unregister_raw_proto;
1940
1941
1942
1943
1944
1945 (void)sock_register(&inet_family_ops);
1946
1947 #ifdef CONFIG_SYSCTL
1948 ip_static_sysctl_init();
1949 #endif
1950
1951
1952
1953
1954
1955 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1956 pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1957 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
1958 pr_crit("%s: Cannot add UDP protocol\n", __func__);
1959 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
1960 pr_crit("%s: Cannot add TCP protocol\n", __func__);
1961 #ifdef CONFIG_IP_MULTICAST
1962 if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1963 pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1964 #endif
1965
1966
1967 for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
1968 INIT_LIST_HEAD(r);
1969
1970 for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
1971 inet_register_protosw(q);
1972
1973
1974
1975
1976
1977 arp_init();
1978
1979
1980
1981
1982
1983 ip_init();
1984
1985
1986 if (init_ipv4_mibs())
1987 panic("%s: Cannot init ipv4 mibs\n", __func__);
1988
1989
1990 tcp_init();
1991
1992
1993 udp_init();
1994
1995
1996 udplite4_register();
1997
1998 raw_init();
1999
2000 ping_init();
2001
2002
2003
2004
2005
2006 if (icmp_init() < 0)
2007 panic("Failed to create the ICMP control socket.\n");
2008
2009
2010
2011
2012 #if defined(CONFIG_IP_MROUTE)
2013 if (ip_mr_init())
2014 pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
2015 #endif
2016
2017 if (init_inet_pernet_ops())
2018 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
2019
2020 ipv4_proc_init();
2021
2022 ipfrag_init();
2023
2024 dev_add_pack(&ip_packet_type);
2025
2026 ip_tunnel_core_init();
2027
2028 rc = 0;
2029 out:
2030 return rc;
2031 out_unregister_raw_proto:
2032 proto_unregister(&raw_prot);
2033 out_unregister_udp_proto:
2034 proto_unregister(&udp_prot);
2035 out_unregister_tcp_proto:
2036 proto_unregister(&tcp_prot);
2037 goto out;
2038 }
2039
2040 fs_initcall(inet_init);
2041
2042
2043
2044 #ifdef CONFIG_PROC_FS
2045 static int __init ipv4_proc_init(void)
2046 {
2047 int rc = 0;
2048
2049 if (raw_proc_init())
2050 goto out_raw;
2051 if (tcp4_proc_init())
2052 goto out_tcp;
2053 if (udp4_proc_init())
2054 goto out_udp;
2055 if (ping_proc_init())
2056 goto out_ping;
2057 if (ip_misc_proc_init())
2058 goto out_misc;
2059 out:
2060 return rc;
2061 out_misc:
2062 ping_proc_exit();
2063 out_ping:
2064 udp4_proc_exit();
2065 out_udp:
2066 tcp4_proc_exit();
2067 out_tcp:
2068 raw_proc_exit();
2069 out_raw:
2070 rc = -ENOMEM;
2071 goto out;
2072 }
2073
2074 #else
2075 static int __init ipv4_proc_init(void)
2076 {
2077 return 0;
2078 }
2079 #endif