0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/module.h>
0013 #include <linux/jhash.h>
0014
0015 #include <net/inet_connection_sock.h>
0016 #include <net/inet_hashtables.h>
0017 #include <net/inet_timewait_sock.h>
0018 #include <net/ip.h>
0019 #include <net/route.h>
0020 #include <net/tcp_states.h>
0021 #include <net/xfrm.h>
0022 #include <net/tcp.h>
0023 #include <net/sock_reuseport.h>
0024 #include <net/addrconf.h>
0025
0026 #if IS_ENABLED(CONFIG_IPV6)
0027
0028
0029
0030
0031
0032
0033
0034 static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
0035 const struct in6_addr *sk2_rcv_saddr6,
0036 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
0037 bool sk1_ipv6only, bool sk2_ipv6only,
0038 bool match_sk1_wildcard,
0039 bool match_sk2_wildcard)
0040 {
0041 int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
0042 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
0043
0044
0045 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
0046 if (!sk2_ipv6only) {
0047 if (sk1_rcv_saddr == sk2_rcv_saddr)
0048 return true;
0049 return (match_sk1_wildcard && !sk1_rcv_saddr) ||
0050 (match_sk2_wildcard && !sk2_rcv_saddr);
0051 }
0052 return false;
0053 }
0054
0055 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
0056 return true;
0057
0058 if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
0059 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
0060 return true;
0061
0062 if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
0063 !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
0064 return true;
0065
0066 if (sk2_rcv_saddr6 &&
0067 ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
0068 return true;
0069
0070 return false;
0071 }
0072 #endif
0073
0074
0075
0076
0077
0078 static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
0079 bool sk2_ipv6only, bool match_sk1_wildcard,
0080 bool match_sk2_wildcard)
0081 {
0082 if (!sk2_ipv6only) {
0083 if (sk1_rcv_saddr == sk2_rcv_saddr)
0084 return true;
0085 return (match_sk1_wildcard && !sk1_rcv_saddr) ||
0086 (match_sk2_wildcard && !sk2_rcv_saddr);
0087 }
0088 return false;
0089 }
0090
0091 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
0092 bool match_wildcard)
0093 {
0094 #if IS_ENABLED(CONFIG_IPV6)
0095 if (sk->sk_family == AF_INET6)
0096 return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
0097 inet6_rcv_saddr(sk2),
0098 sk->sk_rcv_saddr,
0099 sk2->sk_rcv_saddr,
0100 ipv6_only_sock(sk),
0101 ipv6_only_sock(sk2),
0102 match_wildcard,
0103 match_wildcard);
0104 #endif
0105 return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
0106 ipv6_only_sock(sk2), match_wildcard,
0107 match_wildcard);
0108 }
0109 EXPORT_SYMBOL(inet_rcv_saddr_equal);
0110
0111 bool inet_rcv_saddr_any(const struct sock *sk)
0112 {
0113 #if IS_ENABLED(CONFIG_IPV6)
0114 if (sk->sk_family == AF_INET6)
0115 return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
0116 #endif
0117 return !sk->sk_rcv_saddr;
0118 }
0119
0120 void inet_get_local_port_range(struct net *net, int *low, int *high)
0121 {
0122 unsigned int seq;
0123
0124 do {
0125 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
0126
0127 *low = net->ipv4.ip_local_ports.range[0];
0128 *high = net->ipv4.ip_local_ports.range[1];
0129 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
0130 }
0131 EXPORT_SYMBOL(inet_get_local_port_range);
0132
0133 static int inet_csk_bind_conflict(const struct sock *sk,
0134 const struct inet_bind_bucket *tb,
0135 bool relax, bool reuseport_ok)
0136 {
0137 struct sock *sk2;
0138 bool reuseport_cb_ok;
0139 bool reuse = sk->sk_reuse;
0140 bool reuseport = !!sk->sk_reuseport;
0141 struct sock_reuseport *reuseport_cb;
0142 kuid_t uid = sock_i_uid((struct sock *)sk);
0143
0144 rcu_read_lock();
0145 reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
0146
0147 reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
0148 rcu_read_unlock();
0149
0150
0151
0152
0153
0154
0155
0156
0157 sk_for_each_bound(sk2, &tb->owners) {
0158 int bound_dev_if2;
0159
0160 if (sk == sk2)
0161 continue;
0162 bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
0163 if ((!sk->sk_bound_dev_if ||
0164 !bound_dev_if2 ||
0165 sk->sk_bound_dev_if == bound_dev_if2)) {
0166 if (reuse && sk2->sk_reuse &&
0167 sk2->sk_state != TCP_LISTEN) {
0168 if ((!relax ||
0169 (!reuseport_ok &&
0170 reuseport && sk2->sk_reuseport &&
0171 reuseport_cb_ok &&
0172 (sk2->sk_state == TCP_TIME_WAIT ||
0173 uid_eq(uid, sock_i_uid(sk2))))) &&
0174 inet_rcv_saddr_equal(sk, sk2, true))
0175 break;
0176 } else if (!reuseport_ok ||
0177 !reuseport || !sk2->sk_reuseport ||
0178 !reuseport_cb_ok ||
0179 (sk2->sk_state != TCP_TIME_WAIT &&
0180 !uid_eq(uid, sock_i_uid(sk2)))) {
0181 if (inet_rcv_saddr_equal(sk, sk2, true))
0182 break;
0183 }
0184 }
0185 }
0186 return sk2 != NULL;
0187 }
0188
0189
0190
0191
0192
0193 static struct inet_bind_hashbucket *
0194 inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
0195 {
0196 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
0197 int port = 0;
0198 struct inet_bind_hashbucket *head;
0199 struct net *net = sock_net(sk);
0200 bool relax = false;
0201 int i, low, high, attempt_half;
0202 struct inet_bind_bucket *tb;
0203 u32 remaining, offset;
0204 int l3mdev;
0205
0206 l3mdev = inet_sk_bound_l3mdev(sk);
0207 ports_exhausted:
0208 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
0209 other_half_scan:
0210 inet_get_local_port_range(net, &low, &high);
0211 high++;
0212 if (high - low < 4)
0213 attempt_half = 0;
0214 if (attempt_half) {
0215 int half = low + (((high - low) >> 2) << 1);
0216
0217 if (attempt_half == 1)
0218 high = half;
0219 else
0220 low = half;
0221 }
0222 remaining = high - low;
0223 if (likely(remaining > 1))
0224 remaining &= ~1U;
0225
0226 offset = prandom_u32() % remaining;
0227
0228
0229
0230 offset |= 1U;
0231
0232 other_parity_scan:
0233 port = low + offset;
0234 for (i = 0; i < remaining; i += 2, port += 2) {
0235 if (unlikely(port >= high))
0236 port -= remaining;
0237 if (inet_is_local_reserved_port(net, port))
0238 continue;
0239 head = &hinfo->bhash[inet_bhashfn(net, port,
0240 hinfo->bhash_size)];
0241 spin_lock_bh(&head->lock);
0242 inet_bind_bucket_for_each(tb, &head->chain)
0243 if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
0244 tb->port == port) {
0245 if (!inet_csk_bind_conflict(sk, tb, relax, false))
0246 goto success;
0247 goto next_port;
0248 }
0249 tb = NULL;
0250 goto success;
0251 next_port:
0252 spin_unlock_bh(&head->lock);
0253 cond_resched();
0254 }
0255
0256 offset--;
0257 if (!(offset & 1))
0258 goto other_parity_scan;
0259
0260 if (attempt_half == 1) {
0261
0262 attempt_half = 2;
0263 goto other_half_scan;
0264 }
0265
0266 if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
0267
0268 relax = true;
0269 goto ports_exhausted;
0270 }
0271 return NULL;
0272 success:
0273 *port_ret = port;
0274 *tb_ret = tb;
0275 return head;
0276 }
0277
0278 static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
0279 struct sock *sk)
0280 {
0281 kuid_t uid = sock_i_uid(sk);
0282
0283 if (tb->fastreuseport <= 0)
0284 return 0;
0285 if (!sk->sk_reuseport)
0286 return 0;
0287 if (rcu_access_pointer(sk->sk_reuseport_cb))
0288 return 0;
0289 if (!uid_eq(tb->fastuid, uid))
0290 return 0;
0291
0292
0293
0294
0295
0296 if (tb->fastreuseport == FASTREUSEPORT_ANY)
0297 return 1;
0298 #if IS_ENABLED(CONFIG_IPV6)
0299 if (tb->fast_sk_family == AF_INET6)
0300 return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
0301 inet6_rcv_saddr(sk),
0302 tb->fast_rcv_saddr,
0303 sk->sk_rcv_saddr,
0304 tb->fast_ipv6_only,
0305 ipv6_only_sock(sk), true, false);
0306 #endif
0307 return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
0308 ipv6_only_sock(sk), true, false);
0309 }
0310
0311 void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
0312 struct sock *sk)
0313 {
0314 kuid_t uid = sock_i_uid(sk);
0315 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
0316
0317 if (hlist_empty(&tb->owners)) {
0318 tb->fastreuse = reuse;
0319 if (sk->sk_reuseport) {
0320 tb->fastreuseport = FASTREUSEPORT_ANY;
0321 tb->fastuid = uid;
0322 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
0323 tb->fast_ipv6_only = ipv6_only_sock(sk);
0324 tb->fast_sk_family = sk->sk_family;
0325 #if IS_ENABLED(CONFIG_IPV6)
0326 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
0327 #endif
0328 } else {
0329 tb->fastreuseport = 0;
0330 }
0331 } else {
0332 if (!reuse)
0333 tb->fastreuse = 0;
0334 if (sk->sk_reuseport) {
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346 if (!sk_reuseport_match(tb, sk)) {
0347 tb->fastreuseport = FASTREUSEPORT_STRICT;
0348 tb->fastuid = uid;
0349 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
0350 tb->fast_ipv6_only = ipv6_only_sock(sk);
0351 tb->fast_sk_family = sk->sk_family;
0352 #if IS_ENABLED(CONFIG_IPV6)
0353 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
0354 #endif
0355 }
0356 } else {
0357 tb->fastreuseport = 0;
0358 }
0359 }
0360 }
0361
0362
0363
0364
0365
0366 int inet_csk_get_port(struct sock *sk, unsigned short snum)
0367 {
0368 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
0369 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
0370 int ret = 1, port = snum;
0371 struct inet_bind_hashbucket *head;
0372 struct net *net = sock_net(sk);
0373 struct inet_bind_bucket *tb = NULL;
0374 int l3mdev;
0375
0376 l3mdev = inet_sk_bound_l3mdev(sk);
0377
0378 if (!port) {
0379 head = inet_csk_find_open_port(sk, &tb, &port);
0380 if (!head)
0381 return ret;
0382 if (!tb)
0383 goto tb_not_found;
0384 goto success;
0385 }
0386 head = &hinfo->bhash[inet_bhashfn(net, port,
0387 hinfo->bhash_size)];
0388 spin_lock_bh(&head->lock);
0389 inet_bind_bucket_for_each(tb, &head->chain)
0390 if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
0391 tb->port == port)
0392 goto tb_found;
0393 tb_not_found:
0394 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
0395 net, head, port, l3mdev);
0396 if (!tb)
0397 goto fail_unlock;
0398 tb_found:
0399 if (!hlist_empty(&tb->owners)) {
0400 if (sk->sk_reuse == SK_FORCE_REUSE)
0401 goto success;
0402
0403 if ((tb->fastreuse > 0 && reuse) ||
0404 sk_reuseport_match(tb, sk))
0405 goto success;
0406 if (inet_csk_bind_conflict(sk, tb, true, true))
0407 goto fail_unlock;
0408 }
0409 success:
0410 inet_csk_update_fastreuse(tb, sk);
0411
0412 if (!inet_csk(sk)->icsk_bind_hash)
0413 inet_bind_hash(sk, tb, port);
0414 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
0415 ret = 0;
0416
0417 fail_unlock:
0418 spin_unlock_bh(&head->lock);
0419 return ret;
0420 }
0421 EXPORT_SYMBOL_GPL(inet_csk_get_port);
0422
0423
0424
0425
0426
0427 static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
0428 {
0429 struct inet_connection_sock *icsk = inet_csk(sk);
0430 DEFINE_WAIT(wait);
0431 int err;
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447 for (;;) {
0448 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
0449 TASK_INTERRUPTIBLE);
0450 release_sock(sk);
0451 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
0452 timeo = schedule_timeout(timeo);
0453 sched_annotate_sleep();
0454 lock_sock(sk);
0455 err = 0;
0456 if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
0457 break;
0458 err = -EINVAL;
0459 if (sk->sk_state != TCP_LISTEN)
0460 break;
0461 err = sock_intr_errno(timeo);
0462 if (signal_pending(current))
0463 break;
0464 err = -EAGAIN;
0465 if (!timeo)
0466 break;
0467 }
0468 finish_wait(sk_sleep(sk), &wait);
0469 return err;
0470 }
0471
0472
0473
0474
0475 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
0476 {
0477 struct inet_connection_sock *icsk = inet_csk(sk);
0478 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
0479 struct request_sock *req;
0480 struct sock *newsk;
0481 int error;
0482
0483 lock_sock(sk);
0484
0485
0486
0487
0488 error = -EINVAL;
0489 if (sk->sk_state != TCP_LISTEN)
0490 goto out_err;
0491
0492
0493 if (reqsk_queue_empty(queue)) {
0494 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
0495
0496
0497 error = -EAGAIN;
0498 if (!timeo)
0499 goto out_err;
0500
0501 error = inet_csk_wait_for_connect(sk, timeo);
0502 if (error)
0503 goto out_err;
0504 }
0505 req = reqsk_queue_remove(queue, sk);
0506 newsk = req->sk;
0507
0508 if (sk->sk_protocol == IPPROTO_TCP &&
0509 tcp_rsk(req)->tfo_listener) {
0510 spin_lock_bh(&queue->fastopenq.lock);
0511 if (tcp_rsk(req)->tfo_listener) {
0512
0513
0514
0515
0516
0517
0518 req->sk = NULL;
0519 req = NULL;
0520 }
0521 spin_unlock_bh(&queue->fastopenq.lock);
0522 }
0523
0524 out:
0525 release_sock(sk);
0526 if (newsk && mem_cgroup_sockets_enabled) {
0527 int amt;
0528
0529
0530
0531
0532 lock_sock(newsk);
0533
0534
0535
0536
0537 amt = sk_mem_pages(newsk->sk_forward_alloc +
0538 atomic_read(&newsk->sk_rmem_alloc));
0539 mem_cgroup_sk_alloc(newsk);
0540 if (newsk->sk_memcg && amt)
0541 mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
0542 GFP_KERNEL | __GFP_NOFAIL);
0543
0544 release_sock(newsk);
0545 }
0546 if (req)
0547 reqsk_put(req);
0548 return newsk;
0549 out_err:
0550 newsk = NULL;
0551 req = NULL;
0552 *err = error;
0553 goto out;
0554 }
0555 EXPORT_SYMBOL(inet_csk_accept);
0556
0557
0558
0559
0560
0561
0562 void inet_csk_init_xmit_timers(struct sock *sk,
0563 void (*retransmit_handler)(struct timer_list *t),
0564 void (*delack_handler)(struct timer_list *t),
0565 void (*keepalive_handler)(struct timer_list *t))
0566 {
0567 struct inet_connection_sock *icsk = inet_csk(sk);
0568
0569 timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
0570 timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
0571 timer_setup(&sk->sk_timer, keepalive_handler, 0);
0572 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
0573 }
0574 EXPORT_SYMBOL(inet_csk_init_xmit_timers);
0575
0576 void inet_csk_clear_xmit_timers(struct sock *sk)
0577 {
0578 struct inet_connection_sock *icsk = inet_csk(sk);
0579
0580 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
0581
0582 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
0583 sk_stop_timer(sk, &icsk->icsk_delack_timer);
0584 sk_stop_timer(sk, &sk->sk_timer);
0585 }
0586 EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
0587
0588 void inet_csk_delete_keepalive_timer(struct sock *sk)
0589 {
0590 sk_stop_timer(sk, &sk->sk_timer);
0591 }
0592 EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
0593
0594 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
0595 {
0596 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
0597 }
0598 EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
0599
0600 struct dst_entry *inet_csk_route_req(const struct sock *sk,
0601 struct flowi4 *fl4,
0602 const struct request_sock *req)
0603 {
0604 const struct inet_request_sock *ireq = inet_rsk(req);
0605 struct net *net = read_pnet(&ireq->ireq_net);
0606 struct ip_options_rcu *opt;
0607 struct rtable *rt;
0608
0609 rcu_read_lock();
0610 opt = rcu_dereference(ireq->ireq_opt);
0611
0612 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
0613 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
0614 sk->sk_protocol, inet_sk_flowi_flags(sk),
0615 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
0616 ireq->ir_loc_addr, ireq->ir_rmt_port,
0617 htons(ireq->ir_num), sk->sk_uid);
0618 security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
0619 rt = ip_route_output_flow(net, fl4, sk);
0620 if (IS_ERR(rt))
0621 goto no_route;
0622 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
0623 goto route_err;
0624 rcu_read_unlock();
0625 return &rt->dst;
0626
0627 route_err:
0628 ip_rt_put(rt);
0629 no_route:
0630 rcu_read_unlock();
0631 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
0632 return NULL;
0633 }
0634 EXPORT_SYMBOL_GPL(inet_csk_route_req);
0635
0636 struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
0637 struct sock *newsk,
0638 const struct request_sock *req)
0639 {
0640 const struct inet_request_sock *ireq = inet_rsk(req);
0641 struct net *net = read_pnet(&ireq->ireq_net);
0642 struct inet_sock *newinet = inet_sk(newsk);
0643 struct ip_options_rcu *opt;
0644 struct flowi4 *fl4;
0645 struct rtable *rt;
0646
0647 opt = rcu_dereference(ireq->ireq_opt);
0648 fl4 = &newinet->cork.fl.u.ip4;
0649
0650 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
0651 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
0652 sk->sk_protocol, inet_sk_flowi_flags(sk),
0653 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
0654 ireq->ir_loc_addr, ireq->ir_rmt_port,
0655 htons(ireq->ir_num), sk->sk_uid);
0656 security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
0657 rt = ip_route_output_flow(net, fl4, sk);
0658 if (IS_ERR(rt))
0659 goto no_route;
0660 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
0661 goto route_err;
0662 return &rt->dst;
0663
0664 route_err:
0665 ip_rt_put(rt);
0666 no_route:
0667 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
0668 return NULL;
0669 }
0670 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
0671
0672
0673 static void syn_ack_recalc(struct request_sock *req,
0674 const int max_syn_ack_retries,
0675 const u8 rskq_defer_accept,
0676 int *expire, int *resend)
0677 {
0678 if (!rskq_defer_accept) {
0679 *expire = req->num_timeout >= max_syn_ack_retries;
0680 *resend = 1;
0681 return;
0682 }
0683 *expire = req->num_timeout >= max_syn_ack_retries &&
0684 (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
0685
0686
0687
0688
0689 *resend = !inet_rsk(req)->acked ||
0690 req->num_timeout >= rskq_defer_accept - 1;
0691 }
0692
0693 int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
0694 {
0695 int err = req->rsk_ops->rtx_syn_ack(parent, req);
0696
0697 if (!err)
0698 req->num_retrans++;
0699 return err;
0700 }
0701 EXPORT_SYMBOL(inet_rtx_syn_ack);
0702
0703 static struct request_sock *inet_reqsk_clone(struct request_sock *req,
0704 struct sock *sk)
0705 {
0706 struct sock *req_sk, *nreq_sk;
0707 struct request_sock *nreq;
0708
0709 nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
0710 if (!nreq) {
0711 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
0712
0713
0714 sock_put(sk);
0715 return NULL;
0716 }
0717
0718 req_sk = req_to_sk(req);
0719 nreq_sk = req_to_sk(nreq);
0720
0721 memcpy(nreq_sk, req_sk,
0722 offsetof(struct sock, sk_dontcopy_begin));
0723 memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
0724 req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
0725
0726 sk_node_init(&nreq_sk->sk_node);
0727 nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
0728 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
0729 nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
0730 #endif
0731 nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
0732
0733 nreq->rsk_listener = sk;
0734
0735
0736
0737
0738 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
0739 rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
0740
0741 return nreq;
0742 }
0743
0744 static void reqsk_queue_migrated(struct request_sock_queue *queue,
0745 const struct request_sock *req)
0746 {
0747 if (req->num_timeout == 0)
0748 atomic_inc(&queue->young);
0749 atomic_inc(&queue->qlen);
0750 }
0751
0752 static void reqsk_migrate_reset(struct request_sock *req)
0753 {
0754 req->saved_syn = NULL;
0755 #if IS_ENABLED(CONFIG_IPV6)
0756 inet_rsk(req)->ipv6_opt = NULL;
0757 inet_rsk(req)->pktopts = NULL;
0758 #else
0759 inet_rsk(req)->ireq_opt = NULL;
0760 #endif
0761 }
0762
0763
0764 static bool reqsk_queue_unlink(struct request_sock *req)
0765 {
0766 struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
0767 bool found = false;
0768
0769 if (sk_hashed(req_to_sk(req))) {
0770 spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
0771
0772 spin_lock(lock);
0773 found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
0774 spin_unlock(lock);
0775 }
0776 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
0777 reqsk_put(req);
0778 return found;
0779 }
0780
0781 bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
0782 {
0783 bool unlinked = reqsk_queue_unlink(req);
0784
0785 if (unlinked) {
0786 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
0787 reqsk_put(req);
0788 }
0789 return unlinked;
0790 }
0791 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
0792
0793 void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
0794 {
0795 inet_csk_reqsk_queue_drop(sk, req);
0796 reqsk_put(req);
0797 }
0798 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
0799
0800 static void reqsk_timer_handler(struct timer_list *t)
0801 {
0802 struct request_sock *req = from_timer(req, t, rsk_timer);
0803 struct request_sock *nreq = NULL, *oreq = req;
0804 struct sock *sk_listener = req->rsk_listener;
0805 struct inet_connection_sock *icsk;
0806 struct request_sock_queue *queue;
0807 struct net *net;
0808 int max_syn_ack_retries, qlen, expire = 0, resend = 0;
0809
0810 if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
0811 struct sock *nsk;
0812
0813 nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
0814 if (!nsk)
0815 goto drop;
0816
0817 nreq = inet_reqsk_clone(req, nsk);
0818 if (!nreq)
0819 goto drop;
0820
0821
0822
0823
0824
0825
0826 refcount_set(&nreq->rsk_refcnt, 2 + 1);
0827 timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
0828 reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
0829
0830 req = nreq;
0831 sk_listener = nsk;
0832 }
0833
0834 icsk = inet_csk(sk_listener);
0835 net = sock_net(sk_listener);
0836 max_syn_ack_retries = icsk->icsk_syn_retries ? :
0837 READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855 queue = &icsk->icsk_accept_queue;
0856 qlen = reqsk_queue_len(queue);
0857 if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
0858 int young = reqsk_queue_len_young(queue) << 1;
0859
0860 while (max_syn_ack_retries > 2) {
0861 if (qlen < young)
0862 break;
0863 max_syn_ack_retries--;
0864 young <<= 1;
0865 }
0866 }
0867 syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
0868 &expire, &resend);
0869 req->rsk_ops->syn_ack_timeout(req);
0870 if (!expire &&
0871 (!resend ||
0872 !inet_rtx_syn_ack(sk_listener, req) ||
0873 inet_rsk(req)->acked)) {
0874 if (req->num_timeout++ == 0)
0875 atomic_dec(&queue->young);
0876 mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
0877
0878 if (!nreq)
0879 return;
0880
0881 if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
0882
0883 inet_csk_reqsk_queue_drop(sk_listener, nreq);
0884 goto no_ownership;
0885 }
0886
0887 __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
0888 reqsk_migrate_reset(oreq);
0889 reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
0890 reqsk_put(oreq);
0891
0892 reqsk_put(nreq);
0893 return;
0894 }
0895
0896
0897
0898
0899
0900 if (nreq) {
0901 __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
0902 no_ownership:
0903 reqsk_migrate_reset(nreq);
0904 reqsk_queue_removed(queue, nreq);
0905 __reqsk_free(nreq);
0906 }
0907
0908 drop:
0909 inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
0910 }
0911
0912 static void reqsk_queue_hash_req(struct request_sock *req,
0913 unsigned long timeout)
0914 {
0915 timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
0916 mod_timer(&req->rsk_timer, jiffies + timeout);
0917
0918 inet_ehash_insert(req_to_sk(req), NULL, NULL);
0919
0920
0921
0922 smp_wmb();
0923 refcount_set(&req->rsk_refcnt, 2 + 1);
0924 }
0925
0926 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
0927 unsigned long timeout)
0928 {
0929 reqsk_queue_hash_req(req, timeout);
0930 inet_csk_reqsk_queue_added(sk);
0931 }
0932 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
0933
0934 static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
0935 const gfp_t priority)
0936 {
0937 struct inet_connection_sock *icsk = inet_csk(newsk);
0938
0939 if (!icsk->icsk_ulp_ops)
0940 return;
0941
0942 if (icsk->icsk_ulp_ops->clone)
0943 icsk->icsk_ulp_ops->clone(req, newsk, priority);
0944 }
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954 struct sock *inet_csk_clone_lock(const struct sock *sk,
0955 const struct request_sock *req,
0956 const gfp_t priority)
0957 {
0958 struct sock *newsk = sk_clone_lock(sk, priority);
0959
0960 if (newsk) {
0961 struct inet_connection_sock *newicsk = inet_csk(newsk);
0962
0963 inet_sk_set_state(newsk, TCP_SYN_RECV);
0964 newicsk->icsk_bind_hash = NULL;
0965
0966 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
0967 inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
0968 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
0969
0970
0971 sock_reset_flag(newsk, SOCK_RCU_FREE);
0972
0973 inet_sk(newsk)->mc_list = NULL;
0974
0975 newsk->sk_mark = inet_rsk(req)->ir_mark;
0976 atomic64_set(&newsk->sk_cookie,
0977 atomic64_read(&inet_rsk(req)->ir_cookie));
0978
0979 newicsk->icsk_retransmits = 0;
0980 newicsk->icsk_backoff = 0;
0981 newicsk->icsk_probes_out = 0;
0982 newicsk->icsk_probes_tstamp = 0;
0983
0984
0985 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
0986
0987 inet_clone_ulp(req, newsk, priority);
0988
0989 security_inet_csk_clone(newsk, req);
0990 }
0991 return newsk;
0992 }
0993 EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
0994
0995
0996
0997
0998
0999
1000
1001 void inet_csk_destroy_sock(struct sock *sk)
1002 {
1003 WARN_ON(sk->sk_state != TCP_CLOSE);
1004 WARN_ON(!sock_flag(sk, SOCK_DEAD));
1005
1006
1007 WARN_ON(!sk_unhashed(sk));
1008
1009
1010 WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
1011
1012 sk->sk_prot->destroy(sk);
1013
1014 sk_stream_kill_queues(sk);
1015
1016 xfrm_sk_free_policy(sk);
1017
1018 sk_refcnt_debug_release(sk);
1019
1020 this_cpu_dec(*sk->sk_prot->orphan_count);
1021
1022 sock_put(sk);
1023 }
1024 EXPORT_SYMBOL(inet_csk_destroy_sock);
1025
1026
1027
1028
1029 void inet_csk_prepare_forced_close(struct sock *sk)
1030 __releases(&sk->sk_lock.slock)
1031 {
1032
1033 bh_unlock_sock(sk);
1034 sock_put(sk);
1035 inet_csk_prepare_for_destroy_sock(sk);
1036 inet_sk(sk)->inet_num = 0;
1037 }
1038 EXPORT_SYMBOL(inet_csk_prepare_forced_close);
1039
1040 int inet_csk_listen_start(struct sock *sk)
1041 {
1042 struct inet_connection_sock *icsk = inet_csk(sk);
1043 struct inet_sock *inet = inet_sk(sk);
1044 int err = -EADDRINUSE;
1045
1046 reqsk_queue_alloc(&icsk->icsk_accept_queue);
1047
1048 sk->sk_ack_backlog = 0;
1049 inet_csk_delack_init(sk);
1050
1051 if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
1052 sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1053
1054
1055
1056
1057
1058
1059 inet_sk_state_store(sk, TCP_LISTEN);
1060 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
1061 inet->inet_sport = htons(inet->inet_num);
1062
1063 sk_dst_reset(sk);
1064 err = sk->sk_prot->hash(sk);
1065
1066 if (likely(!err))
1067 return 0;
1068 }
1069
1070 inet_sk_set_state(sk, TCP_CLOSE);
1071 return err;
1072 }
1073 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
1074
1075 static void inet_child_forget(struct sock *sk, struct request_sock *req,
1076 struct sock *child)
1077 {
1078 sk->sk_prot->disconnect(child, O_NONBLOCK);
1079
1080 sock_orphan(child);
1081
1082 this_cpu_inc(*sk->sk_prot->orphan_count);
1083
1084 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
1085 BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
1086 BUG_ON(sk != req->rsk_listener);
1087
1088
1089
1090
1091
1092
1093
1094 RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
1095 }
1096 inet_csk_destroy_sock(child);
1097 }
1098
1099 struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
1100 struct request_sock *req,
1101 struct sock *child)
1102 {
1103 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1104
1105 spin_lock(&queue->rskq_lock);
1106 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1107 inet_child_forget(sk, req, child);
1108 child = NULL;
1109 } else {
1110 req->sk = child;
1111 req->dl_next = NULL;
1112 if (queue->rskq_accept_head == NULL)
1113 WRITE_ONCE(queue->rskq_accept_head, req);
1114 else
1115 queue->rskq_accept_tail->dl_next = req;
1116 queue->rskq_accept_tail = req;
1117 sk_acceptq_added(sk);
1118 }
1119 spin_unlock(&queue->rskq_lock);
1120 return child;
1121 }
1122 EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
1123
1124 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
1125 struct request_sock *req, bool own_req)
1126 {
1127 if (own_req) {
1128 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
1129 reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
1130
1131 if (sk != req->rsk_listener) {
1132
1133
1134
1135 struct request_sock *nreq;
1136
1137
1138
1139
1140 sock_hold(sk);
1141 nreq = inet_reqsk_clone(req, sk);
1142 if (!nreq) {
1143 inet_child_forget(sk, req, child);
1144 goto child_put;
1145 }
1146
1147 refcount_set(&nreq->rsk_refcnt, 1);
1148 if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
1149 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
1150 reqsk_migrate_reset(req);
1151 reqsk_put(req);
1152 return child;
1153 }
1154
1155 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
1156 reqsk_migrate_reset(nreq);
1157 __reqsk_free(nreq);
1158 } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
1159 return child;
1160 }
1161 }
1162
1163 child_put:
1164 bh_unlock_sock(child);
1165 sock_put(child);
1166 return NULL;
1167 }
1168 EXPORT_SYMBOL(inet_csk_complete_hashdance);
1169
1170
1171
1172
1173
1174 void inet_csk_listen_stop(struct sock *sk)
1175 {
1176 struct inet_connection_sock *icsk = inet_csk(sk);
1177 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
1178 struct request_sock *next, *req;
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188 while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
1189 struct sock *child = req->sk, *nsk;
1190 struct request_sock *nreq;
1191
1192 local_bh_disable();
1193 bh_lock_sock(child);
1194 WARN_ON(sock_owned_by_user(child));
1195 sock_hold(child);
1196
1197 nsk = reuseport_migrate_sock(sk, child, NULL);
1198 if (nsk) {
1199 nreq = inet_reqsk_clone(req, nsk);
1200 if (nreq) {
1201 refcount_set(&nreq->rsk_refcnt, 1);
1202
1203 if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
1204 __NET_INC_STATS(sock_net(nsk),
1205 LINUX_MIB_TCPMIGRATEREQSUCCESS);
1206 reqsk_migrate_reset(req);
1207 } else {
1208 __NET_INC_STATS(sock_net(nsk),
1209 LINUX_MIB_TCPMIGRATEREQFAILURE);
1210 reqsk_migrate_reset(nreq);
1211 __reqsk_free(nreq);
1212 }
1213
1214
1215
1216
1217 goto skip_child_forget;
1218 }
1219 }
1220
1221 inet_child_forget(sk, req, child);
1222 skip_child_forget:
1223 reqsk_put(req);
1224 bh_unlock_sock(child);
1225 local_bh_enable();
1226 sock_put(child);
1227
1228 cond_resched();
1229 }
1230 if (queue->fastopenq.rskq_rst_head) {
1231
1232 spin_lock_bh(&queue->fastopenq.lock);
1233 req = queue->fastopenq.rskq_rst_head;
1234 queue->fastopenq.rskq_rst_head = NULL;
1235 spin_unlock_bh(&queue->fastopenq.lock);
1236 while (req != NULL) {
1237 next = req->dl_next;
1238 reqsk_put(req);
1239 req = next;
1240 }
1241 }
1242 WARN_ON_ONCE(sk->sk_ack_backlog);
1243 }
1244 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
1245
1246 void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
1247 {
1248 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
1249 const struct inet_sock *inet = inet_sk(sk);
1250
1251 sin->sin_family = AF_INET;
1252 sin->sin_addr.s_addr = inet->inet_daddr;
1253 sin->sin_port = inet->inet_dport;
1254 }
1255 EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
1256
1257 static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
1258 {
1259 const struct inet_sock *inet = inet_sk(sk);
1260 const struct ip_options_rcu *inet_opt;
1261 __be32 daddr = inet->inet_daddr;
1262 struct flowi4 *fl4;
1263 struct rtable *rt;
1264
1265 rcu_read_lock();
1266 inet_opt = rcu_dereference(inet->inet_opt);
1267 if (inet_opt && inet_opt->opt.srr)
1268 daddr = inet_opt->opt.faddr;
1269 fl4 = &fl->u.ip4;
1270 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
1271 inet->inet_saddr, inet->inet_dport,
1272 inet->inet_sport, sk->sk_protocol,
1273 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
1274 if (IS_ERR(rt))
1275 rt = NULL;
1276 if (rt)
1277 sk_setup_caps(sk, &rt->dst);
1278 rcu_read_unlock();
1279
1280 return &rt->dst;
1281 }
1282
1283 struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
1284 {
1285 struct dst_entry *dst = __sk_dst_check(sk, 0);
1286 struct inet_sock *inet = inet_sk(sk);
1287
1288 if (!dst) {
1289 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
1290 if (!dst)
1291 goto out;
1292 }
1293 dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
1294
1295 dst = __sk_dst_check(sk, 0);
1296 if (!dst)
1297 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
1298 out:
1299 return dst;
1300 }
1301 EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);