0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 #define pr_fmt(fmt) "TCP: " fmt
0066
0067 #include <linux/mm.h>
0068 #include <linux/slab.h>
0069 #include <linux/module.h>
0070 #include <linux/sysctl.h>
0071 #include <linux/kernel.h>
0072 #include <linux/prefetch.h>
0073 #include <net/dst.h>
0074 #include <net/tcp.h>
0075 #include <net/inet_common.h>
0076 #include <linux/ipsec.h>
0077 #include <asm/unaligned.h>
0078 #include <linux/errqueue.h>
0079 #include <trace/events/tcp.h>
0080 #include <linux/jump_label_ratelimit.h>
0081 #include <net/busy_poll.h>
0082 #include <net/mptcp.h>
0083
0084 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
0085
0086 #define FLAG_DATA 0x01
0087 #define FLAG_WIN_UPDATE 0x02
0088 #define FLAG_DATA_ACKED 0x04
0089 #define FLAG_RETRANS_DATA_ACKED 0x08
0090 #define FLAG_SYN_ACKED 0x10
0091 #define FLAG_DATA_SACKED 0x20
0092 #define FLAG_ECE 0x40
0093 #define FLAG_LOST_RETRANS 0x80
0094 #define FLAG_SLOWPATH 0x100
0095 #define FLAG_ORIG_SACK_ACKED 0x200
0096 #define FLAG_SND_UNA_ADVANCED 0x400
0097 #define FLAG_DSACKING_ACK 0x800
0098 #define FLAG_SET_XMIT_TIMER 0x1000
0099 #define FLAG_SACK_RENEGING 0x2000
0100 #define FLAG_UPDATE_TS_RECENT 0x4000
0101 #define FLAG_NO_CHALLENGE_ACK 0x8000
0102 #define FLAG_ACK_MAYBE_DELAYED 0x10000
0103 #define FLAG_DSACK_TLP 0x20000
0104
0105 #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
0106 #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
0107 #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
0108 #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
0109
0110 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
0111 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
0112
0113 #define REXMIT_NONE 0
0114 #define REXMIT_LOST 1
0115 #define REXMIT_NEW 2
0116
0117 #if IS_ENABLED(CONFIG_TLS_DEVICE)
0118 static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
0119
0120 void clean_acked_data_enable(struct inet_connection_sock *icsk,
0121 void (*cad)(struct sock *sk, u32 ack_seq))
0122 {
0123 icsk->icsk_clean_acked = cad;
0124 static_branch_deferred_inc(&clean_acked_data_enabled);
0125 }
0126 EXPORT_SYMBOL_GPL(clean_acked_data_enable);
0127
0128 void clean_acked_data_disable(struct inet_connection_sock *icsk)
0129 {
0130 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
0131 icsk->icsk_clean_acked = NULL;
0132 }
0133 EXPORT_SYMBOL_GPL(clean_acked_data_disable);
0134
0135 void clean_acked_data_flush(void)
0136 {
0137 static_key_deferred_flush(&clean_acked_data_enabled);
0138 }
0139 EXPORT_SYMBOL_GPL(clean_acked_data_flush);
0140 #endif
0141
0142 #ifdef CONFIG_CGROUP_BPF
0143 static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
0144 {
0145 bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
0146 BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
0147 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
0148 bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
0149 BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
0150 struct bpf_sock_ops_kern sock_ops;
0151
0152 if (likely(!unknown_opt && !parse_all_opt))
0153 return;
0154
0155
0156
0157
0158
0159 switch (sk->sk_state) {
0160 case TCP_SYN_RECV:
0161 case TCP_SYN_SENT:
0162 case TCP_LISTEN:
0163 return;
0164 }
0165
0166 sock_owned_by_me(sk);
0167
0168 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
0169 sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
0170 sock_ops.is_fullsock = 1;
0171 sock_ops.sk = sk;
0172 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
0173
0174 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
0175 }
0176
0177 static void bpf_skops_established(struct sock *sk, int bpf_op,
0178 struct sk_buff *skb)
0179 {
0180 struct bpf_sock_ops_kern sock_ops;
0181
0182 sock_owned_by_me(sk);
0183
0184 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
0185 sock_ops.op = bpf_op;
0186 sock_ops.is_fullsock = 1;
0187 sock_ops.sk = sk;
0188
0189 if (skb)
0190 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
0191
0192 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
0193 }
0194 #else
0195 static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
0196 {
0197 }
0198
0199 static void bpf_skops_established(struct sock *sk, int bpf_op,
0200 struct sk_buff *skb)
0201 {
0202 }
0203 #endif
0204
0205 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
0206 unsigned int len)
0207 {
0208 static bool __once __read_mostly;
0209
0210 if (!__once) {
0211 struct net_device *dev;
0212
0213 __once = true;
0214
0215 rcu_read_lock();
0216 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
0217 if (!dev || len >= dev->mtu)
0218 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
0219 dev ? dev->name : "Unknown driver");
0220 rcu_read_unlock();
0221 }
0222 }
0223
0224
0225
0226
0227 static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
0228 {
0229 struct inet_connection_sock *icsk = inet_csk(sk);
0230 const unsigned int lss = icsk->icsk_ack.last_seg_size;
0231 unsigned int len;
0232
0233 icsk->icsk_ack.last_seg_size = 0;
0234
0235
0236
0237
0238 len = skb_shinfo(skb)->gso_size ? : skb->len;
0239 if (len >= icsk->icsk_ack.rcv_mss) {
0240 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
0241 tcp_sk(sk)->advmss);
0242
0243 if (unlikely(len > icsk->icsk_ack.rcv_mss +
0244 MAX_TCP_OPTION_SPACE))
0245 tcp_gro_dev_warn(sk, skb, len);
0246 } else {
0247
0248
0249
0250
0251
0252 len += skb->data - skb_transport_header(skb);
0253 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
0254
0255
0256
0257
0258
0259 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
0260 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
0261
0262
0263
0264
0265 len -= tcp_sk(sk)->tcp_header_len;
0266 icsk->icsk_ack.last_seg_size = len;
0267 if (len == lss) {
0268 icsk->icsk_ack.rcv_mss = len;
0269 return;
0270 }
0271 }
0272 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
0273 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
0274 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
0275 }
0276 }
0277
0278 static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
0279 {
0280 struct inet_connection_sock *icsk = inet_csk(sk);
0281 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
0282
0283 if (quickacks == 0)
0284 quickacks = 2;
0285 quickacks = min(quickacks, max_quickacks);
0286 if (quickacks > icsk->icsk_ack.quick)
0287 icsk->icsk_ack.quick = quickacks;
0288 }
0289
0290 void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
0291 {
0292 struct inet_connection_sock *icsk = inet_csk(sk);
0293
0294 tcp_incr_quickack(sk, max_quickacks);
0295 inet_csk_exit_pingpong_mode(sk);
0296 icsk->icsk_ack.ato = TCP_ATO_MIN;
0297 }
0298 EXPORT_SYMBOL(tcp_enter_quickack_mode);
0299
0300
0301
0302
0303
0304 static bool tcp_in_quickack_mode(struct sock *sk)
0305 {
0306 const struct inet_connection_sock *icsk = inet_csk(sk);
0307 const struct dst_entry *dst = __sk_dst_get(sk);
0308
0309 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
0310 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
0311 }
0312
0313 static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
0314 {
0315 if (tp->ecn_flags & TCP_ECN_OK)
0316 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
0317 }
0318
0319 static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
0320 {
0321 if (tcp_hdr(skb)->cwr) {
0322 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
0323
0324
0325
0326
0327
0328 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
0329 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
0330 }
0331 }
0332
0333 static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
0334 {
0335 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
0336 }
0337
0338 static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
0339 {
0340 struct tcp_sock *tp = tcp_sk(sk);
0341
0342 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
0343 case INET_ECN_NOT_ECT:
0344
0345
0346
0347
0348 if (tp->ecn_flags & TCP_ECN_SEEN)
0349 tcp_enter_quickack_mode(sk, 2);
0350 break;
0351 case INET_ECN_CE:
0352 if (tcp_ca_needs_ecn(sk))
0353 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
0354
0355 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
0356
0357 tcp_enter_quickack_mode(sk, 2);
0358 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
0359 }
0360 tp->ecn_flags |= TCP_ECN_SEEN;
0361 break;
0362 default:
0363 if (tcp_ca_needs_ecn(sk))
0364 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
0365 tp->ecn_flags |= TCP_ECN_SEEN;
0366 break;
0367 }
0368 }
0369
0370 static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
0371 {
0372 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
0373 __tcp_ecn_check_ce(sk, skb);
0374 }
0375
0376 static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
0377 {
0378 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
0379 tp->ecn_flags &= ~TCP_ECN_OK;
0380 }
0381
0382 static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
0383 {
0384 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
0385 tp->ecn_flags &= ~TCP_ECN_OK;
0386 }
0387
0388 static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
0389 {
0390 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
0391 return true;
0392 return false;
0393 }
0394
0395
0396
0397
0398
0399
0400 static void tcp_sndbuf_expand(struct sock *sk)
0401 {
0402 const struct tcp_sock *tp = tcp_sk(sk);
0403 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
0404 int sndmem, per_mss;
0405 u32 nr_segs;
0406
0407
0408
0409
0410 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
0411 MAX_TCP_HEADER +
0412 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
0413
0414 per_mss = roundup_pow_of_two(per_mss) +
0415 SKB_DATA_ALIGN(sizeof(struct sk_buff));
0416
0417 nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
0418 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
0419
0420
0421
0422
0423
0424 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
0425 sndmem *= nr_segs * per_mss;
0426
0427 if (sk->sk_sndbuf < sndmem)
0428 WRITE_ONCE(sk->sk_sndbuf,
0429 min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
0430 }
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458 static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
0459 unsigned int skbtruesize)
0460 {
0461 struct tcp_sock *tp = tcp_sk(sk);
0462
0463 int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
0464 int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
0465
0466 while (tp->rcv_ssthresh <= window) {
0467 if (truesize <= skb->len)
0468 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
0469
0470 truesize >>= 1;
0471 window >>= 1;
0472 }
0473 return 0;
0474 }
0475
0476
0477
0478
0479
0480
0481
0482 static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
0483 {
0484 u32 truesize = skb->truesize;
0485
0486 if (adjust && !skb_headlen(skb)) {
0487 truesize -= SKB_TRUESIZE(skb_end_offset(skb));
0488
0489 if (unlikely((int)truesize < (int)skb->len))
0490 truesize = skb->truesize;
0491 }
0492 return truesize;
0493 }
0494
0495 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
0496 bool adjust)
0497 {
0498 struct tcp_sock *tp = tcp_sk(sk);
0499 int room;
0500
0501 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
0502
0503 if (room <= 0)
0504 return;
0505
0506
0507 if (!tcp_under_memory_pressure(sk)) {
0508 unsigned int truesize = truesize_adjust(adjust, skb);
0509 int incr;
0510
0511
0512
0513
0514 if (tcp_win_from_space(sk, truesize) <= skb->len)
0515 incr = 2 * tp->advmss;
0516 else
0517 incr = __tcp_grow_window(sk, skb, truesize);
0518
0519 if (incr) {
0520 incr = max_t(int, incr, 2 * skb->len);
0521 tp->rcv_ssthresh += min(room, incr);
0522 inet_csk(sk)->icsk_ack.quick |= 1;
0523 }
0524 } else {
0525
0526
0527
0528 tcp_adjust_rcv_ssthresh(sk);
0529 }
0530 }
0531
0532
0533
0534
0535 static void tcp_init_buffer_space(struct sock *sk)
0536 {
0537 int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
0538 struct tcp_sock *tp = tcp_sk(sk);
0539 int maxwin;
0540
0541 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
0542 tcp_sndbuf_expand(sk);
0543
0544 tcp_mstamp_refresh(tp);
0545 tp->rcvq_space.time = tp->tcp_mstamp;
0546 tp->rcvq_space.seq = tp->copied_seq;
0547
0548 maxwin = tcp_full_space(sk);
0549
0550 if (tp->window_clamp >= maxwin) {
0551 tp->window_clamp = maxwin;
0552
0553 if (tcp_app_win && maxwin > 4 * tp->advmss)
0554 tp->window_clamp = max(maxwin -
0555 (maxwin >> tcp_app_win),
0556 4 * tp->advmss);
0557 }
0558
0559
0560 if (tcp_app_win &&
0561 tp->window_clamp > 2 * tp->advmss &&
0562 tp->window_clamp + tp->advmss > maxwin)
0563 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
0564
0565 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
0566 tp->snd_cwnd_stamp = tcp_jiffies32;
0567 tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
0568 (u32)TCP_INIT_CWND * tp->advmss);
0569 }
0570
0571
0572 static void tcp_clamp_window(struct sock *sk)
0573 {
0574 struct tcp_sock *tp = tcp_sk(sk);
0575 struct inet_connection_sock *icsk = inet_csk(sk);
0576 struct net *net = sock_net(sk);
0577 int rmem2;
0578
0579 icsk->icsk_ack.quick = 0;
0580 rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
0581
0582 if (sk->sk_rcvbuf < rmem2 &&
0583 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
0584 !tcp_under_memory_pressure(sk) &&
0585 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
0586 WRITE_ONCE(sk->sk_rcvbuf,
0587 min(atomic_read(&sk->sk_rmem_alloc), rmem2));
0588 }
0589 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
0590 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
0591 }
0592
0593
0594
0595
0596
0597
0598
0599
0600 void tcp_initialize_rcv_mss(struct sock *sk)
0601 {
0602 const struct tcp_sock *tp = tcp_sk(sk);
0603 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
0604
0605 hint = min(hint, tp->rcv_wnd / 2);
0606 hint = min(hint, TCP_MSS_DEFAULT);
0607 hint = max(hint, TCP_MIN_MSS);
0608
0609 inet_csk(sk)->icsk_ack.rcv_mss = hint;
0610 }
0611 EXPORT_SYMBOL(tcp_initialize_rcv_mss);
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624 static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
0625 {
0626 u32 new_sample = tp->rcv_rtt_est.rtt_us;
0627 long m = sample;
0628
0629 if (new_sample != 0) {
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640 if (!win_dep) {
0641 m -= (new_sample >> 3);
0642 new_sample += m;
0643 } else {
0644 m <<= 3;
0645 if (m < new_sample)
0646 new_sample = m;
0647 }
0648 } else {
0649
0650 new_sample = m << 3;
0651 }
0652
0653 tp->rcv_rtt_est.rtt_us = new_sample;
0654 }
0655
0656 static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
0657 {
0658 u32 delta_us;
0659
0660 if (tp->rcv_rtt_est.time == 0)
0661 goto new_measure;
0662 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
0663 return;
0664 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
0665 if (!delta_us)
0666 delta_us = 1;
0667 tcp_rcv_rtt_update(tp, delta_us, 1);
0668
0669 new_measure:
0670 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
0671 tp->rcv_rtt_est.time = tp->tcp_mstamp;
0672 }
0673
0674 static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
0675 const struct sk_buff *skb)
0676 {
0677 struct tcp_sock *tp = tcp_sk(sk);
0678
0679 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
0680 return;
0681 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
0682
0683 if (TCP_SKB_CB(skb)->end_seq -
0684 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
0685 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
0686 u32 delta_us;
0687
0688 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
0689 if (!delta)
0690 delta = 1;
0691 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
0692 tcp_rcv_rtt_update(tp, delta_us, 0);
0693 }
0694 }
0695 }
0696
0697
0698
0699
0700
0701 void tcp_rcv_space_adjust(struct sock *sk)
0702 {
0703 struct tcp_sock *tp = tcp_sk(sk);
0704 u32 copied;
0705 int time;
0706
0707 trace_tcp_rcv_space_adjust(sk);
0708
0709 tcp_mstamp_refresh(tp);
0710 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
0711 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
0712 return;
0713
0714
0715 copied = tp->copied_seq - tp->rcvq_space.seq;
0716 if (copied <= tp->rcvq_space.space)
0717 goto new_measure;
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
0729 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
0730 int rcvmem, rcvbuf;
0731 u64 rcvwin, grow;
0732
0733
0734
0735
0736 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
0737
0738
0739 grow = rcvwin * (copied - tp->rcvq_space.space);
0740 do_div(grow, tp->rcvq_space.space);
0741 rcvwin += (grow << 1);
0742
0743 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
0744 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
0745 rcvmem += 128;
0746
0747 do_div(rcvwin, tp->advmss);
0748 rcvbuf = min_t(u64, rcvwin * rcvmem,
0749 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
0750 if (rcvbuf > sk->sk_rcvbuf) {
0751 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
0752
0753
0754 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
0755 }
0756 }
0757 tp->rcvq_space.space = copied;
0758
0759 new_measure:
0760 tp->rcvq_space.seq = tp->copied_seq;
0761 tp->rcvq_space.time = tp->tcp_mstamp;
0762 }
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774 static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
0775 {
0776 struct tcp_sock *tp = tcp_sk(sk);
0777 struct inet_connection_sock *icsk = inet_csk(sk);
0778 u32 now;
0779
0780 inet_csk_schedule_ack(sk);
0781
0782 tcp_measure_rcv_mss(sk, skb);
0783
0784 tcp_rcv_rtt_measure(tp);
0785
0786 now = tcp_jiffies32;
0787
0788 if (!icsk->icsk_ack.ato) {
0789
0790
0791
0792 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
0793 icsk->icsk_ack.ato = TCP_ATO_MIN;
0794 } else {
0795 int m = now - icsk->icsk_ack.lrcvtime;
0796
0797 if (m <= TCP_ATO_MIN / 2) {
0798
0799 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
0800 } else if (m < icsk->icsk_ack.ato) {
0801 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
0802 if (icsk->icsk_ack.ato > icsk->icsk_rto)
0803 icsk->icsk_ack.ato = icsk->icsk_rto;
0804 } else if (m > icsk->icsk_rto) {
0805
0806
0807
0808 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
0809 }
0810 }
0811 icsk->icsk_ack.lrcvtime = now;
0812
0813 tcp_ecn_check_ce(sk, skb);
0814
0815 if (skb->len >= 128)
0816 tcp_grow_window(sk, skb, true);
0817 }
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828 static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
0829 {
0830 struct tcp_sock *tp = tcp_sk(sk);
0831 long m = mrtt_us;
0832 u32 srtt = tp->srtt_us;
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850 if (srtt != 0) {
0851 m -= (srtt >> 3);
0852 srtt += m;
0853 if (m < 0) {
0854 m = -m;
0855 m -= (tp->mdev_us >> 2);
0856
0857
0858
0859
0860
0861
0862
0863
0864 if (m > 0)
0865 m >>= 3;
0866 } else {
0867 m -= (tp->mdev_us >> 2);
0868 }
0869 tp->mdev_us += m;
0870 if (tp->mdev_us > tp->mdev_max_us) {
0871 tp->mdev_max_us = tp->mdev_us;
0872 if (tp->mdev_max_us > tp->rttvar_us)
0873 tp->rttvar_us = tp->mdev_max_us;
0874 }
0875 if (after(tp->snd_una, tp->rtt_seq)) {
0876 if (tp->mdev_max_us < tp->rttvar_us)
0877 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
0878 tp->rtt_seq = tp->snd_nxt;
0879 tp->mdev_max_us = tcp_rto_min_us(sk);
0880
0881 tcp_bpf_rtt(sk);
0882 }
0883 } else {
0884
0885 srtt = m << 3;
0886 tp->mdev_us = m << 1;
0887 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
0888 tp->mdev_max_us = tp->rttvar_us;
0889 tp->rtt_seq = tp->snd_nxt;
0890
0891 tcp_bpf_rtt(sk);
0892 }
0893 tp->srtt_us = max(1U, srtt);
0894 }
0895
0896 static void tcp_update_pacing_rate(struct sock *sk)
0897 {
0898 const struct tcp_sock *tp = tcp_sk(sk);
0899 u64 rate;
0900
0901
0902 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
0903
0904
0905
0906
0907
0908
0909
0910
0911
0912 if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
0913 rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
0914 else
0915 rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
0916
0917 rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
0918
0919 if (likely(tp->srtt_us))
0920 do_div(rate, tp->srtt_us);
0921
0922
0923
0924
0925
0926 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
0927 sk->sk_max_pacing_rate));
0928 }
0929
0930
0931
0932
0933 static void tcp_set_rto(struct sock *sk)
0934 {
0935 const struct tcp_sock *tp = tcp_sk(sk);
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957 tcp_bound_rto(sk);
0958 }
0959
0960 __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
0961 {
0962 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
0963
0964 if (!cwnd)
0965 cwnd = TCP_INIT_CWND;
0966 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
0967 }
0968
0969 struct tcp_sacktag_state {
0970
0971
0972
0973
0974 u64 first_sackt;
0975 u64 last_sackt;
0976 u32 reord;
0977 u32 sack_delivered;
0978 int flag;
0979 unsigned int mss_now;
0980 struct rate_sample *rate;
0981 };
0982
0983
0984
0985
0986
0987
0988
0989 static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
0990 u32 end_seq, struct tcp_sacktag_state *state)
0991 {
0992 u32 seq_len, dup_segs = 1;
0993
0994 if (!before(start_seq, end_seq))
0995 return 0;
0996
0997 seq_len = end_seq - start_seq;
0998
0999 if (seq_len > tp->max_window)
1000 return 0;
1001 if (seq_len > tp->mss_cache)
1002 dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
1003 else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
1004 state->flag |= FLAG_DSACK_TLP;
1005
1006 tp->dsack_dups += dup_segs;
1007
1008 if (tp->dsack_dups > tp->total_retrans)
1009 return 0;
1010
1011 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
1012
1013
1014
1015
1016
1017
1018 if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
1019 tp->rack.dsack_seen = 1;
1020
1021 state->flag |= FLAG_DSACKING_ACK;
1022
1023 state->sack_delivered += dup_segs;
1024
1025 return dup_segs;
1026 }
1027
1028
1029
1030
1031
1032 static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
1033 const int ts)
1034 {
1035 struct tcp_sock *tp = tcp_sk(sk);
1036 const u32 mss = tp->mss_cache;
1037 u32 fack, metric;
1038
1039 fack = tcp_highest_sack_seq(tp);
1040 if (!before(low_seq, fack))
1041 return;
1042
1043 metric = fack - low_seq;
1044 if ((metric > tp->reordering * mss) && mss) {
1045 #if FASTRETRANS_DEBUG > 1
1046 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
1047 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
1048 tp->reordering,
1049 0,
1050 tp->sacked_out,
1051 tp->undo_marker ? tp->undo_retrans : 0);
1052 #endif
1053 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
1054 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
1055 }
1056
1057
1058 tp->reord_seen++;
1059 NET_INC_STATS(sock_net(sk),
1060 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
1061 }
1062
1063
1064
1065
1066
1067
1068 static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
1069 {
1070 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
1071 (tp->retransmit_skb_hint &&
1072 before(TCP_SKB_CB(skb)->seq,
1073 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
1074 tp->retransmit_skb_hint = skb;
1075 }
1076
1077
1078
1079
1080 static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
1081 {
1082 tp->lost += tcp_skb_pcount(skb);
1083 }
1084
1085 void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
1086 {
1087 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1088 struct tcp_sock *tp = tcp_sk(sk);
1089
1090 if (sacked & TCPCB_SACKED_ACKED)
1091 return;
1092
1093 tcp_verify_retransmit_hint(tp, skb);
1094 if (sacked & TCPCB_LOST) {
1095 if (sacked & TCPCB_SACKED_RETRANS) {
1096
1097 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1098 tp->retrans_out -= tcp_skb_pcount(skb);
1099 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
1100 tcp_skb_pcount(skb));
1101 tcp_notify_skb_loss_event(tp, skb);
1102 }
1103 } else {
1104 tp->lost_out += tcp_skb_pcount(skb);
1105 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1106 tcp_notify_skb_loss_event(tp, skb);
1107 }
1108 }
1109
1110
1111 static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
1112 bool ece_ack)
1113 {
1114 tp->delivered += delivered;
1115 if (ece_ack)
1116 tp->delivered_ce += delivered;
1117 }
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1213 u32 start_seq, u32 end_seq)
1214 {
1215
1216 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1217 return false;
1218
1219
1220 if (!before(start_seq, tp->snd_nxt))
1221 return false;
1222
1223
1224
1225
1226 if (after(start_seq, tp->snd_una))
1227 return true;
1228
1229 if (!is_dsack || !tp->undo_marker)
1230 return false;
1231
1232
1233 if (after(end_seq, tp->snd_una))
1234 return false;
1235
1236 if (!before(start_seq, tp->undo_marker))
1237 return true;
1238
1239
1240 if (!after(end_seq, tp->undo_marker))
1241 return false;
1242
1243
1244
1245
1246 return !before(start_seq, end_seq - tp->max_window);
1247 }
1248
1249 static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1250 struct tcp_sack_block_wire *sp, int num_sacks,
1251 u32 prior_snd_una, struct tcp_sacktag_state *state)
1252 {
1253 struct tcp_sock *tp = tcp_sk(sk);
1254 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1255 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1256 u32 dup_segs;
1257
1258 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1259 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1260 } else if (num_sacks > 1) {
1261 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1262 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1263
1264 if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
1265 return false;
1266 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
1267 } else {
1268 return false;
1269 }
1270
1271 dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
1272 if (!dup_segs) {
1273 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
1274 return false;
1275 }
1276
1277 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
1278
1279
1280 if (tp->undo_marker && tp->undo_retrans > 0 &&
1281 !after(end_seq_0, prior_snd_una) &&
1282 after(end_seq_0, tp->undo_marker))
1283 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
1284
1285 return true;
1286 }
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1297 u32 start_seq, u32 end_seq)
1298 {
1299 int err;
1300 bool in_sack;
1301 unsigned int pkt_len;
1302 unsigned int mss;
1303
1304 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1305 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1306
1307 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1308 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1309 mss = tcp_skb_mss(skb);
1310 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1311
1312 if (!in_sack) {
1313 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1314 if (pkt_len < mss)
1315 pkt_len = mss;
1316 } else {
1317 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1318 if (pkt_len < mss)
1319 return -EINVAL;
1320 }
1321
1322
1323
1324
1325 if (pkt_len > mss) {
1326 unsigned int new_len = (pkt_len / mss) * mss;
1327 if (!in_sack && new_len < pkt_len)
1328 new_len += mss;
1329 pkt_len = new_len;
1330 }
1331
1332 if (pkt_len >= skb->len && !in_sack)
1333 return 0;
1334
1335 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1336 pkt_len, mss, GFP_ATOMIC);
1337 if (err < 0)
1338 return err;
1339 }
1340
1341 return in_sack;
1342 }
1343
1344
1345 static u8 tcp_sacktag_one(struct sock *sk,
1346 struct tcp_sacktag_state *state, u8 sacked,
1347 u32 start_seq, u32 end_seq,
1348 int dup_sack, int pcount,
1349 u64 xmit_time)
1350 {
1351 struct tcp_sock *tp = tcp_sk(sk);
1352
1353
1354 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1355 if (tp->undo_marker && tp->undo_retrans > 0 &&
1356 after(end_seq, tp->undo_marker))
1357 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
1358 if ((sacked & TCPCB_SACKED_ACKED) &&
1359 before(start_seq, state->reord))
1360 state->reord = start_seq;
1361 }
1362
1363
1364 if (!after(end_seq, tp->snd_una))
1365 return sacked;
1366
1367 if (!(sacked & TCPCB_SACKED_ACKED)) {
1368 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1369
1370 if (sacked & TCPCB_SACKED_RETRANS) {
1371
1372
1373
1374
1375 if (sacked & TCPCB_LOST) {
1376 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1377 tp->lost_out -= pcount;
1378 tp->retrans_out -= pcount;
1379 }
1380 } else {
1381 if (!(sacked & TCPCB_RETRANS)) {
1382
1383
1384
1385 if (before(start_seq,
1386 tcp_highest_sack_seq(tp)) &&
1387 before(start_seq, state->reord))
1388 state->reord = start_seq;
1389
1390 if (!after(end_seq, tp->high_seq))
1391 state->flag |= FLAG_ORIG_SACK_ACKED;
1392 if (state->first_sackt == 0)
1393 state->first_sackt = xmit_time;
1394 state->last_sackt = xmit_time;
1395 }
1396
1397 if (sacked & TCPCB_LOST) {
1398 sacked &= ~TCPCB_LOST;
1399 tp->lost_out -= pcount;
1400 }
1401 }
1402
1403 sacked |= TCPCB_SACKED_ACKED;
1404 state->flag |= FLAG_DATA_SACKED;
1405 tp->sacked_out += pcount;
1406
1407 state->sack_delivered += pcount;
1408
1409
1410 if (tp->lost_skb_hint &&
1411 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1412 tp->lost_cnt_hint += pcount;
1413 }
1414
1415
1416
1417
1418
1419 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1420 sacked &= ~TCPCB_SACKED_RETRANS;
1421 tp->retrans_out -= pcount;
1422 }
1423
1424 return sacked;
1425 }
1426
1427
1428
1429
1430 static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1431 struct sk_buff *skb,
1432 struct tcp_sacktag_state *state,
1433 unsigned int pcount, int shifted, int mss,
1434 bool dup_sack)
1435 {
1436 struct tcp_sock *tp = tcp_sk(sk);
1437 u32 start_seq = TCP_SKB_CB(skb)->seq;
1438 u32 end_seq = start_seq + shifted;
1439
1440 BUG_ON(!pcount);
1441
1442
1443
1444
1445
1446
1447
1448 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1449 start_seq, end_seq, dup_sack, pcount,
1450 tcp_skb_timestamp_us(skb));
1451 tcp_rate_skb_delivered(sk, skb, state->rate);
1452
1453 if (skb == tp->lost_skb_hint)
1454 tp->lost_cnt_hint += pcount;
1455
1456 TCP_SKB_CB(prev)->end_seq += shifted;
1457 TCP_SKB_CB(skb)->seq += shifted;
1458
1459 tcp_skb_pcount_add(prev, pcount);
1460 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1461 tcp_skb_pcount_add(skb, -pcount);
1462
1463
1464
1465
1466
1467
1468 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1469 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1470
1471
1472 if (tcp_skb_pcount(skb) <= 1)
1473 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1474
1475
1476 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1477
1478 if (skb->len > 0) {
1479 BUG_ON(!tcp_skb_pcount(skb));
1480 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1481 return false;
1482 }
1483
1484
1485
1486 if (skb == tp->retransmit_skb_hint)
1487 tp->retransmit_skb_hint = prev;
1488 if (skb == tp->lost_skb_hint) {
1489 tp->lost_skb_hint = prev;
1490 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1491 }
1492
1493 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1494 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1495 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1496 TCP_SKB_CB(prev)->end_seq++;
1497
1498 if (skb == tcp_highest_sack(sk))
1499 tcp_advance_highest_sack(sk, skb);
1500
1501 tcp_skb_collapse_tstamp(prev, skb);
1502 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1503 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1504
1505 tcp_rtx_queue_unlink_and_free(skb, sk);
1506
1507 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1508
1509 return true;
1510 }
1511
1512
1513
1514
1515 static int tcp_skb_seglen(const struct sk_buff *skb)
1516 {
1517 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1518 }
1519
1520
1521 static int skb_can_shift(const struct sk_buff *skb)
1522 {
1523 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1524 }
1525
1526 int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1527 int pcount, int shiftlen)
1528 {
1529
1530
1531
1532
1533
1534 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1535 return 0;
1536 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1537 return 0;
1538 return skb_shift(to, from, shiftlen);
1539 }
1540
1541
1542
1543
1544 static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1545 struct tcp_sacktag_state *state,
1546 u32 start_seq, u32 end_seq,
1547 bool dup_sack)
1548 {
1549 struct tcp_sock *tp = tcp_sk(sk);
1550 struct sk_buff *prev;
1551 int mss;
1552 int pcount = 0;
1553 int len;
1554 int in_sack;
1555
1556
1557 if (!dup_sack &&
1558 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1559 goto fallback;
1560 if (!skb_can_shift(skb))
1561 goto fallback;
1562
1563 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1564 goto fallback;
1565
1566
1567 prev = skb_rb_prev(skb);
1568 if (!prev)
1569 goto fallback;
1570
1571 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1572 goto fallback;
1573
1574 if (!tcp_skb_can_collapse(prev, skb))
1575 goto fallback;
1576
1577 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1578 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1579
1580 if (in_sack) {
1581 len = skb->len;
1582 pcount = tcp_skb_pcount(skb);
1583 mss = tcp_skb_seglen(skb);
1584
1585
1586
1587
1588 if (mss != tcp_skb_seglen(prev))
1589 goto fallback;
1590 } else {
1591 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1592 goto noop;
1593
1594
1595
1596
1597 if (tcp_skb_pcount(skb) <= 1)
1598 goto noop;
1599
1600 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1601 if (!in_sack) {
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613 goto fallback;
1614 }
1615
1616 len = end_seq - TCP_SKB_CB(skb)->seq;
1617 BUG_ON(len < 0);
1618 BUG_ON(len > skb->len);
1619
1620
1621
1622
1623
1624 mss = tcp_skb_mss(skb);
1625
1626
1627
1628
1629 if (mss != tcp_skb_seglen(prev))
1630 goto fallback;
1631
1632 if (len == mss) {
1633 pcount = 1;
1634 } else if (len < mss) {
1635 goto noop;
1636 } else {
1637 pcount = len / mss;
1638 len = pcount * mss;
1639 }
1640 }
1641
1642
1643 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1644 goto fallback;
1645
1646 if (!tcp_skb_shift(prev, skb, pcount, len))
1647 goto fallback;
1648 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1649 goto out;
1650
1651
1652
1653
1654 skb = skb_rb_next(prev);
1655 if (!skb)
1656 goto out;
1657
1658 if (!skb_can_shift(skb) ||
1659 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1660 (mss != tcp_skb_seglen(skb)))
1661 goto out;
1662
1663 if (!tcp_skb_can_collapse(prev, skb))
1664 goto out;
1665 len = skb->len;
1666 pcount = tcp_skb_pcount(skb);
1667 if (tcp_skb_shift(prev, skb, pcount, len))
1668 tcp_shifted_skb(sk, prev, skb, state, pcount,
1669 len, mss, 0);
1670
1671 out:
1672 return prev;
1673
1674 noop:
1675 return skb;
1676
1677 fallback:
1678 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1679 return NULL;
1680 }
1681
1682 static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1683 struct tcp_sack_block *next_dup,
1684 struct tcp_sacktag_state *state,
1685 u32 start_seq, u32 end_seq,
1686 bool dup_sack_in)
1687 {
1688 struct tcp_sock *tp = tcp_sk(sk);
1689 struct sk_buff *tmp;
1690
1691 skb_rbtree_walk_from(skb) {
1692 int in_sack = 0;
1693 bool dup_sack = dup_sack_in;
1694
1695
1696 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1697 break;
1698
1699 if (next_dup &&
1700 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1701 in_sack = tcp_match_skb_to_sack(sk, skb,
1702 next_dup->start_seq,
1703 next_dup->end_seq);
1704 if (in_sack > 0)
1705 dup_sack = true;
1706 }
1707
1708
1709
1710
1711
1712 if (in_sack <= 0) {
1713 tmp = tcp_shift_skb_data(sk, skb, state,
1714 start_seq, end_seq, dup_sack);
1715 if (tmp) {
1716 if (tmp != skb) {
1717 skb = tmp;
1718 continue;
1719 }
1720
1721 in_sack = 0;
1722 } else {
1723 in_sack = tcp_match_skb_to_sack(sk, skb,
1724 start_seq,
1725 end_seq);
1726 }
1727 }
1728
1729 if (unlikely(in_sack < 0))
1730 break;
1731
1732 if (in_sack) {
1733 TCP_SKB_CB(skb)->sacked =
1734 tcp_sacktag_one(sk,
1735 state,
1736 TCP_SKB_CB(skb)->sacked,
1737 TCP_SKB_CB(skb)->seq,
1738 TCP_SKB_CB(skb)->end_seq,
1739 dup_sack,
1740 tcp_skb_pcount(skb),
1741 tcp_skb_timestamp_us(skb));
1742 tcp_rate_skb_delivered(sk, skb, state->rate);
1743 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1744 list_del_init(&skb->tcp_tsorted_anchor);
1745
1746 if (!before(TCP_SKB_CB(skb)->seq,
1747 tcp_highest_sack_seq(tp)))
1748 tcp_advance_highest_sack(sk, skb);
1749 }
1750 }
1751 return skb;
1752 }
1753
1754 static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1755 {
1756 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1757 struct sk_buff *skb;
1758
1759 while (*p) {
1760 parent = *p;
1761 skb = rb_to_skb(parent);
1762 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1763 p = &parent->rb_left;
1764 continue;
1765 }
1766 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1767 p = &parent->rb_right;
1768 continue;
1769 }
1770 return skb;
1771 }
1772 return NULL;
1773 }
1774
1775 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1776 u32 skip_to_seq)
1777 {
1778 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1779 return skb;
1780
1781 return tcp_sacktag_bsearch(sk, skip_to_seq);
1782 }
1783
1784 static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1785 struct sock *sk,
1786 struct tcp_sack_block *next_dup,
1787 struct tcp_sacktag_state *state,
1788 u32 skip_to_seq)
1789 {
1790 if (!next_dup)
1791 return skb;
1792
1793 if (before(next_dup->start_seq, skip_to_seq)) {
1794 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1795 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1796 next_dup->start_seq, next_dup->end_seq,
1797 1);
1798 }
1799
1800 return skb;
1801 }
1802
1803 static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1804 {
1805 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1806 }
1807
1808 static int
1809 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1810 u32 prior_snd_una, struct tcp_sacktag_state *state)
1811 {
1812 struct tcp_sock *tp = tcp_sk(sk);
1813 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1814 TCP_SKB_CB(ack_skb)->sacked);
1815 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1816 struct tcp_sack_block sp[TCP_NUM_SACKS];
1817 struct tcp_sack_block *cache;
1818 struct sk_buff *skb;
1819 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1820 int used_sacks;
1821 bool found_dup_sack = false;
1822 int i, j;
1823 int first_sack_index;
1824
1825 state->flag = 0;
1826 state->reord = tp->snd_nxt;
1827
1828 if (!tp->sacked_out)
1829 tcp_highest_sack_reset(sk);
1830
1831 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1832 num_sacks, prior_snd_una, state);
1833
1834
1835
1836
1837
1838 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1839 return 0;
1840
1841 if (!tp->packets_out)
1842 goto out;
1843
1844 used_sacks = 0;
1845 first_sack_index = 0;
1846 for (i = 0; i < num_sacks; i++) {
1847 bool dup_sack = !i && found_dup_sack;
1848
1849 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1850 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1851
1852 if (!tcp_is_sackblock_valid(tp, dup_sack,
1853 sp[used_sacks].start_seq,
1854 sp[used_sacks].end_seq)) {
1855 int mib_idx;
1856
1857 if (dup_sack) {
1858 if (!tp->undo_marker)
1859 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1860 else
1861 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1862 } else {
1863
1864 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1865 !after(sp[used_sacks].end_seq, tp->snd_una))
1866 continue;
1867 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1868 }
1869
1870 NET_INC_STATS(sock_net(sk), mib_idx);
1871 if (i == 0)
1872 first_sack_index = -1;
1873 continue;
1874 }
1875
1876
1877 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1878 if (i == 0)
1879 first_sack_index = -1;
1880 continue;
1881 }
1882
1883 used_sacks++;
1884 }
1885
1886
1887 for (i = used_sacks - 1; i > 0; i--) {
1888 for (j = 0; j < i; j++) {
1889 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1890 swap(sp[j], sp[j + 1]);
1891
1892
1893 if (j == first_sack_index)
1894 first_sack_index = j + 1;
1895 }
1896 }
1897 }
1898
1899 state->mss_now = tcp_current_mss(sk);
1900 skb = NULL;
1901 i = 0;
1902
1903 if (!tp->sacked_out) {
1904
1905 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1906 } else {
1907 cache = tp->recv_sack_cache;
1908
1909 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1910 !cache->end_seq)
1911 cache++;
1912 }
1913
1914 while (i < used_sacks) {
1915 u32 start_seq = sp[i].start_seq;
1916 u32 end_seq = sp[i].end_seq;
1917 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1918 struct tcp_sack_block *next_dup = NULL;
1919
1920 if (found_dup_sack && ((i + 1) == first_sack_index))
1921 next_dup = &sp[i + 1];
1922
1923
1924 while (tcp_sack_cache_ok(tp, cache) &&
1925 !before(start_seq, cache->end_seq))
1926 cache++;
1927
1928
1929 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1930 after(end_seq, cache->start_seq)) {
1931
1932
1933 if (before(start_seq, cache->start_seq)) {
1934 skb = tcp_sacktag_skip(skb, sk, start_seq);
1935 skb = tcp_sacktag_walk(skb, sk, next_dup,
1936 state,
1937 start_seq,
1938 cache->start_seq,
1939 dup_sack);
1940 }
1941
1942
1943 if (!after(end_seq, cache->end_seq))
1944 goto advance_sp;
1945
1946 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1947 state,
1948 cache->end_seq);
1949
1950
1951 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1952
1953 skb = tcp_highest_sack(sk);
1954 if (!skb)
1955 break;
1956 cache++;
1957 goto walk;
1958 }
1959
1960 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1961
1962 cache++;
1963 continue;
1964 }
1965
1966 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1967 skb = tcp_highest_sack(sk);
1968 if (!skb)
1969 break;
1970 }
1971 skb = tcp_sacktag_skip(skb, sk, start_seq);
1972
1973 walk:
1974 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1975 start_seq, end_seq, dup_sack);
1976
1977 advance_sp:
1978 i++;
1979 }
1980
1981
1982 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1983 tp->recv_sack_cache[i].start_seq = 0;
1984 tp->recv_sack_cache[i].end_seq = 0;
1985 }
1986 for (j = 0; j < used_sacks; j++)
1987 tp->recv_sack_cache[i++] = sp[j];
1988
1989 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1990 tcp_check_sack_reordering(sk, state->reord, 0);
1991
1992 tcp_verify_left_out(tp);
1993 out:
1994
1995 #if FASTRETRANS_DEBUG > 0
1996 WARN_ON((int)tp->sacked_out < 0);
1997 WARN_ON((int)tp->lost_out < 0);
1998 WARN_ON((int)tp->retrans_out < 0);
1999 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
2000 #endif
2001 return state->flag;
2002 }
2003
2004
2005
2006
2007 static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
2008 {
2009 u32 holes;
2010
2011 holes = max(tp->lost_out, 1U);
2012 holes = min(holes, tp->packets_out);
2013
2014 if ((tp->sacked_out + holes) > tp->packets_out) {
2015 tp->sacked_out = tp->packets_out - holes;
2016 return true;
2017 }
2018 return false;
2019 }
2020
2021
2022
2023
2024
2025 static void tcp_check_reno_reordering(struct sock *sk, const int addend)
2026 {
2027 struct tcp_sock *tp = tcp_sk(sk);
2028
2029 if (!tcp_limit_reno_sacked(tp))
2030 return;
2031
2032 tp->reordering = min_t(u32, tp->packets_out + addend,
2033 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
2034 tp->reord_seen++;
2035 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
2036 }
2037
2038
2039
2040 static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
2041 {
2042 if (num_dupack) {
2043 struct tcp_sock *tp = tcp_sk(sk);
2044 u32 prior_sacked = tp->sacked_out;
2045 s32 delivered;
2046
2047 tp->sacked_out += num_dupack;
2048 tcp_check_reno_reordering(sk, 0);
2049 delivered = tp->sacked_out - prior_sacked;
2050 if (delivered > 0)
2051 tcp_count_delivered(tp, delivered, ece_ack);
2052 tcp_verify_left_out(tp);
2053 }
2054 }
2055
2056
2057
2058 static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
2059 {
2060 struct tcp_sock *tp = tcp_sk(sk);
2061
2062 if (acked > 0) {
2063
2064 tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
2065 ece_ack);
2066 if (acked - 1 >= tp->sacked_out)
2067 tp->sacked_out = 0;
2068 else
2069 tp->sacked_out -= acked - 1;
2070 }
2071 tcp_check_reno_reordering(sk, acked);
2072 tcp_verify_left_out(tp);
2073 }
2074
2075 static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2076 {
2077 tp->sacked_out = 0;
2078 }
2079
2080 void tcp_clear_retrans(struct tcp_sock *tp)
2081 {
2082 tp->retrans_out = 0;
2083 tp->lost_out = 0;
2084 tp->undo_marker = 0;
2085 tp->undo_retrans = -1;
2086 tp->sacked_out = 0;
2087 }
2088
2089 static inline void tcp_init_undo(struct tcp_sock *tp)
2090 {
2091 tp->undo_marker = tp->snd_una;
2092
2093 tp->undo_retrans = tp->retrans_out ? : -1;
2094 }
2095
2096 static bool tcp_is_rack(const struct sock *sk)
2097 {
2098 return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
2099 TCP_RACK_LOSS_DETECTION;
2100 }
2101
2102
2103
2104
2105
2106 static void tcp_timeout_mark_lost(struct sock *sk)
2107 {
2108 struct tcp_sock *tp = tcp_sk(sk);
2109 struct sk_buff *skb, *head;
2110 bool is_reneg;
2111
2112 head = tcp_rtx_queue_head(sk);
2113 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
2114 if (is_reneg) {
2115 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2116 tp->sacked_out = 0;
2117
2118 tp->is_sack_reneg = 1;
2119 } else if (tcp_is_reno(tp)) {
2120 tcp_reset_reno_sack(tp);
2121 }
2122
2123 skb = head;
2124 skb_rbtree_walk_from(skb) {
2125 if (is_reneg)
2126 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2127 else if (tcp_is_rack(sk) && skb != head &&
2128 tcp_rack_skb_timeout(tp, skb, 0) > 0)
2129 continue;
2130 tcp_mark_skb_lost(sk, skb);
2131 }
2132 tcp_verify_left_out(tp);
2133 tcp_clear_all_retrans_hints(tp);
2134 }
2135
2136
2137 void tcp_enter_loss(struct sock *sk)
2138 {
2139 const struct inet_connection_sock *icsk = inet_csk(sk);
2140 struct tcp_sock *tp = tcp_sk(sk);
2141 struct net *net = sock_net(sk);
2142 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
2143 u8 reordering;
2144
2145 tcp_timeout_mark_lost(sk);
2146
2147
2148 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2149 !after(tp->high_seq, tp->snd_una) ||
2150 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2151 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2152 tp->prior_cwnd = tcp_snd_cwnd(tp);
2153 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2154 tcp_ca_event(sk, CA_EVENT_LOSS);
2155 tcp_init_undo(tp);
2156 }
2157 tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
2158 tp->snd_cwnd_cnt = 0;
2159 tp->snd_cwnd_stamp = tcp_jiffies32;
2160
2161
2162
2163
2164 reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
2165 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2166 tp->sacked_out >= reordering)
2167 tp->reordering = min_t(unsigned int, tp->reordering,
2168 reordering);
2169
2170 tcp_set_ca_state(sk, TCP_CA_Loss);
2171 tp->high_seq = tp->snd_nxt;
2172 tcp_ecn_queue_cwr(tp);
2173
2174
2175
2176
2177
2178 tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
2179 (new_recovery || icsk->icsk_retransmits) &&
2180 !inet_csk(sk)->icsk_mtup.probe_size;
2181 }
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193 static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2194 {
2195 if (flag & FLAG_SACK_RENEGING) {
2196 struct tcp_sock *tp = tcp_sk(sk);
2197 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2198 msecs_to_jiffies(10));
2199
2200 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2201 delay, TCP_RTO_MAX);
2202 return true;
2203 }
2204 return false;
2205 }
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218 static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2219 {
2220 return tp->sacked_out + 1;
2221 }
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320 static bool tcp_time_to_recover(struct sock *sk, int flag)
2321 {
2322 struct tcp_sock *tp = tcp_sk(sk);
2323
2324
2325 if (tp->lost_out)
2326 return true;
2327
2328
2329 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2330 return true;
2331
2332 return false;
2333 }
2334
2335
2336
2337
2338
2339
2340 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2341 {
2342 struct tcp_sock *tp = tcp_sk(sk);
2343 struct sk_buff *skb;
2344 int cnt;
2345
2346 const u32 loss_high = tp->snd_nxt;
2347
2348 WARN_ON(packets > tp->packets_out);
2349 skb = tp->lost_skb_hint;
2350 if (skb) {
2351
2352 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2353 return;
2354 cnt = tp->lost_cnt_hint;
2355 } else {
2356 skb = tcp_rtx_queue_head(sk);
2357 cnt = 0;
2358 }
2359
2360 skb_rbtree_walk_from(skb) {
2361
2362
2363 tp->lost_skb_hint = skb;
2364 tp->lost_cnt_hint = cnt;
2365
2366 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2367 break;
2368
2369 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2370 cnt += tcp_skb_pcount(skb);
2371
2372 if (cnt > packets)
2373 break;
2374
2375 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
2376 tcp_mark_skb_lost(sk, skb);
2377
2378 if (mark_head)
2379 break;
2380 }
2381 tcp_verify_left_out(tp);
2382 }
2383
2384
2385
2386 static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2387 {
2388 struct tcp_sock *tp = tcp_sk(sk);
2389
2390 if (tcp_is_sack(tp)) {
2391 int sacked_upto = tp->sacked_out - tp->reordering;
2392 if (sacked_upto >= 0)
2393 tcp_mark_head_lost(sk, sacked_upto, 0);
2394 else if (fast_rexmit)
2395 tcp_mark_head_lost(sk, 1, 1);
2396 }
2397 }
2398
2399 static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2400 {
2401 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2402 before(tp->rx_opt.rcv_tsecr, when);
2403 }
2404
2405
2406
2407
2408 static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2409 const struct sk_buff *skb)
2410 {
2411 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2412 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2413 }
2414
2415
2416
2417
2418 static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2419 {
2420 return tp->retrans_stamp &&
2421 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2422 }
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440 static bool tcp_any_retrans_done(const struct sock *sk)
2441 {
2442 const struct tcp_sock *tp = tcp_sk(sk);
2443 struct sk_buff *skb;
2444
2445 if (tp->retrans_out)
2446 return true;
2447
2448 skb = tcp_rtx_queue_head(sk);
2449 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2450 return true;
2451
2452 return false;
2453 }
2454
2455 static void DBGUNDO(struct sock *sk, const char *msg)
2456 {
2457 #if FASTRETRANS_DEBUG > 1
2458 struct tcp_sock *tp = tcp_sk(sk);
2459 struct inet_sock *inet = inet_sk(sk);
2460
2461 if (sk->sk_family == AF_INET) {
2462 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2463 msg,
2464 &inet->inet_daddr, ntohs(inet->inet_dport),
2465 tcp_snd_cwnd(tp), tcp_left_out(tp),
2466 tp->snd_ssthresh, tp->prior_ssthresh,
2467 tp->packets_out);
2468 }
2469 #if IS_ENABLED(CONFIG_IPV6)
2470 else if (sk->sk_family == AF_INET6) {
2471 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2472 msg,
2473 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2474 tcp_snd_cwnd(tp), tcp_left_out(tp),
2475 tp->snd_ssthresh, tp->prior_ssthresh,
2476 tp->packets_out);
2477 }
2478 #endif
2479 #endif
2480 }
2481
2482 static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2483 {
2484 struct tcp_sock *tp = tcp_sk(sk);
2485
2486 if (unmark_loss) {
2487 struct sk_buff *skb;
2488
2489 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2490 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2491 }
2492 tp->lost_out = 0;
2493 tcp_clear_all_retrans_hints(tp);
2494 }
2495
2496 if (tp->prior_ssthresh) {
2497 const struct inet_connection_sock *icsk = inet_csk(sk);
2498
2499 tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
2500
2501 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2502 tp->snd_ssthresh = tp->prior_ssthresh;
2503 tcp_ecn_withdraw_cwr(tp);
2504 }
2505 }
2506 tp->snd_cwnd_stamp = tcp_jiffies32;
2507 tp->undo_marker = 0;
2508 tp->rack.advanced = 1;
2509 }
2510
2511 static inline bool tcp_may_undo(const struct tcp_sock *tp)
2512 {
2513 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2514 }
2515
2516 static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
2517 {
2518 struct tcp_sock *tp = tcp_sk(sk);
2519
2520 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2521
2522
2523
2524 if (!tcp_any_retrans_done(sk))
2525 tp->retrans_stamp = 0;
2526 return true;
2527 }
2528 return false;
2529 }
2530
2531
2532 static bool tcp_try_undo_recovery(struct sock *sk)
2533 {
2534 struct tcp_sock *tp = tcp_sk(sk);
2535
2536 if (tcp_may_undo(tp)) {
2537 int mib_idx;
2538
2539
2540
2541
2542 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2543 tcp_undo_cwnd_reduction(sk, false);
2544 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2545 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2546 else
2547 mib_idx = LINUX_MIB_TCPFULLUNDO;
2548
2549 NET_INC_STATS(sock_net(sk), mib_idx);
2550 } else if (tp->rack.reo_wnd_persist) {
2551 tp->rack.reo_wnd_persist--;
2552 }
2553 if (tcp_is_non_sack_preventing_reopen(sk))
2554 return true;
2555 tcp_set_ca_state(sk, TCP_CA_Open);
2556 tp->is_sack_reneg = 0;
2557 return false;
2558 }
2559
2560
2561 static bool tcp_try_undo_dsack(struct sock *sk)
2562 {
2563 struct tcp_sock *tp = tcp_sk(sk);
2564
2565 if (tp->undo_marker && !tp->undo_retrans) {
2566 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2567 tp->rack.reo_wnd_persist + 1);
2568 DBGUNDO(sk, "D-SACK");
2569 tcp_undo_cwnd_reduction(sk, false);
2570 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2571 return true;
2572 }
2573 return false;
2574 }
2575
2576
2577 static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2578 {
2579 struct tcp_sock *tp = tcp_sk(sk);
2580
2581 if (frto_undo || tcp_may_undo(tp)) {
2582 tcp_undo_cwnd_reduction(sk, true);
2583
2584 DBGUNDO(sk, "partial loss");
2585 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2586 if (frto_undo)
2587 NET_INC_STATS(sock_net(sk),
2588 LINUX_MIB_TCPSPURIOUSRTOS);
2589 inet_csk(sk)->icsk_retransmits = 0;
2590 if (tcp_is_non_sack_preventing_reopen(sk))
2591 return true;
2592 if (frto_undo || tcp_is_sack(tp)) {
2593 tcp_set_ca_state(sk, TCP_CA_Open);
2594 tp->is_sack_reneg = 0;
2595 }
2596 return true;
2597 }
2598 return false;
2599 }
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610 static void tcp_init_cwnd_reduction(struct sock *sk)
2611 {
2612 struct tcp_sock *tp = tcp_sk(sk);
2613
2614 tp->high_seq = tp->snd_nxt;
2615 tp->tlp_high_seq = 0;
2616 tp->snd_cwnd_cnt = 0;
2617 tp->prior_cwnd = tcp_snd_cwnd(tp);
2618 tp->prr_delivered = 0;
2619 tp->prr_out = 0;
2620 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2621 tcp_ecn_queue_cwr(tp);
2622 }
2623
2624 void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag)
2625 {
2626 struct tcp_sock *tp = tcp_sk(sk);
2627 int sndcnt = 0;
2628 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2629
2630 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2631 return;
2632
2633 tp->prr_delivered += newly_acked_sacked;
2634 if (delta < 0) {
2635 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2636 tp->prior_cwnd - 1;
2637 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2638 } else {
2639 sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
2640 newly_acked_sacked);
2641 if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
2642 sndcnt++;
2643 sndcnt = min(delta, sndcnt);
2644 }
2645
2646 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2647 tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
2648 }
2649
2650 static inline void tcp_end_cwnd_reduction(struct sock *sk)
2651 {
2652 struct tcp_sock *tp = tcp_sk(sk);
2653
2654 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2655 return;
2656
2657
2658 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2659 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2660 tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
2661 tp->snd_cwnd_stamp = tcp_jiffies32;
2662 }
2663 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2664 }
2665
2666
2667 void tcp_enter_cwr(struct sock *sk)
2668 {
2669 struct tcp_sock *tp = tcp_sk(sk);
2670
2671 tp->prior_ssthresh = 0;
2672 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2673 tp->undo_marker = 0;
2674 tcp_init_cwnd_reduction(sk);
2675 tcp_set_ca_state(sk, TCP_CA_CWR);
2676 }
2677 }
2678 EXPORT_SYMBOL(tcp_enter_cwr);
2679
2680 static void tcp_try_keep_open(struct sock *sk)
2681 {
2682 struct tcp_sock *tp = tcp_sk(sk);
2683 int state = TCP_CA_Open;
2684
2685 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2686 state = TCP_CA_Disorder;
2687
2688 if (inet_csk(sk)->icsk_ca_state != state) {
2689 tcp_set_ca_state(sk, state);
2690 tp->high_seq = tp->snd_nxt;
2691 }
2692 }
2693
2694 static void tcp_try_to_open(struct sock *sk, int flag)
2695 {
2696 struct tcp_sock *tp = tcp_sk(sk);
2697
2698 tcp_verify_left_out(tp);
2699
2700 if (!tcp_any_retrans_done(sk))
2701 tp->retrans_stamp = 0;
2702
2703 if (flag & FLAG_ECE)
2704 tcp_enter_cwr(sk);
2705
2706 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2707 tcp_try_keep_open(sk);
2708 }
2709 }
2710
2711 static void tcp_mtup_probe_failed(struct sock *sk)
2712 {
2713 struct inet_connection_sock *icsk = inet_csk(sk);
2714
2715 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2716 icsk->icsk_mtup.probe_size = 0;
2717 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2718 }
2719
2720 static void tcp_mtup_probe_success(struct sock *sk)
2721 {
2722 struct tcp_sock *tp = tcp_sk(sk);
2723 struct inet_connection_sock *icsk = inet_csk(sk);
2724 u64 val;
2725
2726 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2727
2728 val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
2729 do_div(val, icsk->icsk_mtup.probe_size);
2730 DEBUG_NET_WARN_ON_ONCE((u32)val != val);
2731 tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
2732
2733 tp->snd_cwnd_cnt = 0;
2734 tp->snd_cwnd_stamp = tcp_jiffies32;
2735 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2736
2737 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2738 icsk->icsk_mtup.probe_size = 0;
2739 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2740 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2741 }
2742
2743
2744
2745
2746
2747 void tcp_simple_retransmit(struct sock *sk)
2748 {
2749 const struct inet_connection_sock *icsk = inet_csk(sk);
2750 struct tcp_sock *tp = tcp_sk(sk);
2751 struct sk_buff *skb;
2752 int mss;
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764 if (tp->syn_data && sk->sk_state == TCP_SYN_SENT)
2765 mss = -1;
2766 else
2767 mss = tcp_current_mss(sk);
2768
2769 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2770 if (tcp_skb_seglen(skb) > mss)
2771 tcp_mark_skb_lost(sk, skb);
2772 }
2773
2774 tcp_clear_retrans_hints_partial(tp);
2775
2776 if (!tp->lost_out)
2777 return;
2778
2779 if (tcp_is_reno(tp))
2780 tcp_limit_reno_sacked(tp);
2781
2782 tcp_verify_left_out(tp);
2783
2784
2785
2786
2787
2788
2789 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2790 tp->high_seq = tp->snd_nxt;
2791 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2792 tp->prior_ssthresh = 0;
2793 tp->undo_marker = 0;
2794 tcp_set_ca_state(sk, TCP_CA_Loss);
2795 }
2796 tcp_xmit_retransmit_queue(sk);
2797 }
2798 EXPORT_SYMBOL(tcp_simple_retransmit);
2799
2800 void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2801 {
2802 struct tcp_sock *tp = tcp_sk(sk);
2803 int mib_idx;
2804
2805 if (tcp_is_reno(tp))
2806 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2807 else
2808 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2809
2810 NET_INC_STATS(sock_net(sk), mib_idx);
2811
2812 tp->prior_ssthresh = 0;
2813 tcp_init_undo(tp);
2814
2815 if (!tcp_in_cwnd_reduction(sk)) {
2816 if (!ece_ack)
2817 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2818 tcp_init_cwnd_reduction(sk);
2819 }
2820 tcp_set_ca_state(sk, TCP_CA_Recovery);
2821 }
2822
2823
2824
2825
2826 static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2827 int *rexmit)
2828 {
2829 struct tcp_sock *tp = tcp_sk(sk);
2830 bool recovered = !before(tp->snd_una, tp->high_seq);
2831
2832 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2833 tcp_try_undo_loss(sk, false))
2834 return;
2835
2836 if (tp->frto) {
2837
2838
2839
2840 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2841 tcp_try_undo_loss(sk, true))
2842 return;
2843
2844 if (after(tp->snd_nxt, tp->high_seq)) {
2845 if (flag & FLAG_DATA_SACKED || num_dupack)
2846 tp->frto = 0;
2847 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2848 tp->high_seq = tp->snd_nxt;
2849
2850
2851
2852
2853 if (!tcp_write_queue_empty(sk) &&
2854 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2855 *rexmit = REXMIT_NEW;
2856 return;
2857 }
2858 tp->frto = 0;
2859 }
2860 }
2861
2862 if (recovered) {
2863
2864 tcp_try_undo_recovery(sk);
2865 return;
2866 }
2867 if (tcp_is_reno(tp)) {
2868
2869
2870
2871 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2872 tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
2873 else if (flag & FLAG_SND_UNA_ADVANCED)
2874 tcp_reset_reno_sack(tp);
2875 }
2876 *rexmit = REXMIT_LOST;
2877 }
2878
2879 static bool tcp_force_fast_retransmit(struct sock *sk)
2880 {
2881 struct tcp_sock *tp = tcp_sk(sk);
2882
2883 return after(tcp_highest_sack_seq(tp),
2884 tp->snd_una + tp->reordering * tp->mss_cache);
2885 }
2886
2887
2888 static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
2889 bool *do_lost)
2890 {
2891 struct tcp_sock *tp = tcp_sk(sk);
2892
2893 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2894
2895
2896
2897 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2898
2899
2900
2901
2902
2903
2904 if (tp->retrans_out)
2905 return true;
2906
2907 if (!tcp_any_retrans_done(sk))
2908 tp->retrans_stamp = 0;
2909
2910 DBGUNDO(sk, "partial recovery");
2911 tcp_undo_cwnd_reduction(sk, true);
2912 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2913 tcp_try_keep_open(sk);
2914 } else {
2915
2916 *do_lost = tcp_force_fast_retransmit(sk);
2917 }
2918 return false;
2919 }
2920
2921 static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2922 {
2923 struct tcp_sock *tp = tcp_sk(sk);
2924
2925 if (tcp_rtx_queue_empty(sk))
2926 return;
2927
2928 if (unlikely(tcp_is_reno(tp))) {
2929 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2930 } else if (tcp_is_rack(sk)) {
2931 u32 prior_retrans = tp->retrans_out;
2932
2933 if (tcp_rack_mark_lost(sk))
2934 *ack_flag &= ~FLAG_SET_XMIT_TIMER;
2935 if (prior_retrans > tp->retrans_out)
2936 *ack_flag |= FLAG_LOST_RETRANS;
2937 }
2938 }
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952 static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2953 int num_dupack, int *ack_flag, int *rexmit)
2954 {
2955 struct inet_connection_sock *icsk = inet_csk(sk);
2956 struct tcp_sock *tp = tcp_sk(sk);
2957 int fast_rexmit = 0, flag = *ack_flag;
2958 bool ece_ack = flag & FLAG_ECE;
2959 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2960 tcp_force_fast_retransmit(sk));
2961
2962 if (!tp->packets_out && tp->sacked_out)
2963 tp->sacked_out = 0;
2964
2965
2966
2967 if (ece_ack)
2968 tp->prior_ssthresh = 0;
2969
2970
2971 if (tcp_check_sack_reneging(sk, flag))
2972 return;
2973
2974
2975 tcp_verify_left_out(tp);
2976
2977
2978
2979 if (icsk->icsk_ca_state == TCP_CA_Open) {
2980 WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
2981 tp->retrans_stamp = 0;
2982 } else if (!before(tp->snd_una, tp->high_seq)) {
2983 switch (icsk->icsk_ca_state) {
2984 case TCP_CA_CWR:
2985
2986
2987 if (tp->snd_una != tp->high_seq) {
2988 tcp_end_cwnd_reduction(sk);
2989 tcp_set_ca_state(sk, TCP_CA_Open);
2990 }
2991 break;
2992
2993 case TCP_CA_Recovery:
2994 if (tcp_is_reno(tp))
2995 tcp_reset_reno_sack(tp);
2996 if (tcp_try_undo_recovery(sk))
2997 return;
2998 tcp_end_cwnd_reduction(sk);
2999 break;
3000 }
3001 }
3002
3003
3004 switch (icsk->icsk_ca_state) {
3005 case TCP_CA_Recovery:
3006 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3007 if (tcp_is_reno(tp))
3008 tcp_add_reno_sack(sk, num_dupack, ece_ack);
3009 } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
3010 return;
3011
3012 if (tcp_try_undo_dsack(sk))
3013 tcp_try_keep_open(sk);
3014
3015 tcp_identify_packet_loss(sk, ack_flag);
3016 if (icsk->icsk_ca_state != TCP_CA_Recovery) {
3017 if (!tcp_time_to_recover(sk, flag))
3018 return;
3019
3020
3021
3022 tcp_enter_recovery(sk, ece_ack);
3023 }
3024 break;
3025 case TCP_CA_Loss:
3026 tcp_process_loss(sk, flag, num_dupack, rexmit);
3027 tcp_identify_packet_loss(sk, ack_flag);
3028 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
3029 (*ack_flag & FLAG_LOST_RETRANS)))
3030 return;
3031
3032 fallthrough;
3033 default:
3034 if (tcp_is_reno(tp)) {
3035 if (flag & FLAG_SND_UNA_ADVANCED)
3036 tcp_reset_reno_sack(tp);
3037 tcp_add_reno_sack(sk, num_dupack, ece_ack);
3038 }
3039
3040 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3041 tcp_try_undo_dsack(sk);
3042
3043 tcp_identify_packet_loss(sk, ack_flag);
3044 if (!tcp_time_to_recover(sk, flag)) {
3045 tcp_try_to_open(sk, flag);
3046 return;
3047 }
3048
3049
3050 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3051 icsk->icsk_mtup.probe_size &&
3052 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3053 tcp_mtup_probe_failed(sk);
3054
3055 tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
3056 tcp_simple_retransmit(sk);
3057 return;
3058 }
3059
3060
3061 tcp_enter_recovery(sk, ece_ack);
3062 fast_rexmit = 1;
3063 }
3064
3065 if (!tcp_is_rack(sk) && do_lost)
3066 tcp_update_scoreboard(sk, fast_rexmit);
3067 *rexmit = REXMIT_LOST;
3068 }
3069
3070 static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
3071 {
3072 u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
3073 struct tcp_sock *tp = tcp_sk(sk);
3074
3075 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
3076
3077
3078
3079
3080 return;
3081 }
3082 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
3083 rtt_us ? : jiffies_to_usecs(1));
3084 }
3085
3086 static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
3087 long seq_rtt_us, long sack_rtt_us,
3088 long ca_rtt_us, struct rate_sample *rs)
3089 {
3090 const struct tcp_sock *tp = tcp_sk(sk);
3091
3092
3093
3094
3095
3096
3097 if (seq_rtt_us < 0)
3098 seq_rtt_us = sack_rtt_us;
3099
3100
3101
3102
3103
3104
3105
3106 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
3107 flag & FLAG_ACKED) {
3108 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
3109
3110 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
3111 if (!delta)
3112 delta = 1;
3113 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
3114 ca_rtt_us = seq_rtt_us;
3115 }
3116 }
3117 rs->rtt_us = ca_rtt_us;
3118 if (seq_rtt_us < 0)
3119 return false;
3120
3121
3122
3123
3124
3125 tcp_update_rtt_min(sk, ca_rtt_us, flag);
3126 tcp_rtt_estimator(sk, seq_rtt_us);
3127 tcp_set_rto(sk);
3128
3129
3130 inet_csk(sk)->icsk_backoff = 0;
3131 return true;
3132 }
3133
3134
3135 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
3136 {
3137 struct rate_sample rs;
3138 long rtt_us = -1L;
3139
3140 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
3141 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
3142
3143 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
3144 }
3145
3146
3147 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3148 {
3149 const struct inet_connection_sock *icsk = inet_csk(sk);
3150
3151 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3152 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
3153 }
3154
3155
3156
3157
3158 void tcp_rearm_rto(struct sock *sk)
3159 {
3160 const struct inet_connection_sock *icsk = inet_csk(sk);
3161 struct tcp_sock *tp = tcp_sk(sk);
3162
3163
3164
3165
3166 if (rcu_access_pointer(tp->fastopen_rsk))
3167 return;
3168
3169 if (!tp->packets_out) {
3170 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3171 } else {
3172 u32 rto = inet_csk(sk)->icsk_rto;
3173
3174 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3175 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3176 s64 delta_us = tcp_rto_delta_us(sk);
3177
3178
3179
3180 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3181 }
3182 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3183 TCP_RTO_MAX);
3184 }
3185 }
3186
3187
3188 static void tcp_set_xmit_timer(struct sock *sk)
3189 {
3190 if (!tcp_schedule_loss_probe(sk, true))
3191 tcp_rearm_rto(sk);
3192 }
3193
3194
3195 static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3196 {
3197 struct tcp_sock *tp = tcp_sk(sk);
3198 u32 packets_acked;
3199
3200 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3201
3202 packets_acked = tcp_skb_pcount(skb);
3203 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3204 return 0;
3205 packets_acked -= tcp_skb_pcount(skb);
3206
3207 if (packets_acked) {
3208 BUG_ON(tcp_skb_pcount(skb) == 0);
3209 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3210 }
3211
3212 return packets_acked;
3213 }
3214
3215 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3216 const struct sk_buff *ack_skb, u32 prior_snd_una)
3217 {
3218 const struct skb_shared_info *shinfo;
3219
3220
3221 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3222 return;
3223
3224 shinfo = skb_shinfo(skb);
3225 if (!before(shinfo->tskey, prior_snd_una) &&
3226 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3227 tcp_skb_tsorted_save(skb) {
3228 __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
3229 } tcp_skb_tsorted_restore(skb);
3230 }
3231 }
3232
3233
3234
3235
3236
3237 static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
3238 u32 prior_fack, u32 prior_snd_una,
3239 struct tcp_sacktag_state *sack, bool ece_ack)
3240 {
3241 const struct inet_connection_sock *icsk = inet_csk(sk);
3242 u64 first_ackt, last_ackt;
3243 struct tcp_sock *tp = tcp_sk(sk);
3244 u32 prior_sacked = tp->sacked_out;
3245 u32 reord = tp->snd_nxt;
3246 struct sk_buff *skb, *next;
3247 bool fully_acked = true;
3248 long sack_rtt_us = -1L;
3249 long seq_rtt_us = -1L;
3250 long ca_rtt_us = -1L;
3251 u32 pkts_acked = 0;
3252 bool rtt_update;
3253 int flag = 0;
3254
3255 first_ackt = 0;
3256
3257 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3258 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3259 const u32 start_seq = scb->seq;
3260 u8 sacked = scb->sacked;
3261 u32 acked_pcount;
3262
3263
3264 if (after(scb->end_seq, tp->snd_una)) {
3265 if (tcp_skb_pcount(skb) == 1 ||
3266 !after(tp->snd_una, scb->seq))
3267 break;
3268
3269 acked_pcount = tcp_tso_acked(sk, skb);
3270 if (!acked_pcount)
3271 break;
3272 fully_acked = false;
3273 } else {
3274 acked_pcount = tcp_skb_pcount(skb);
3275 }
3276
3277 if (unlikely(sacked & TCPCB_RETRANS)) {
3278 if (sacked & TCPCB_SACKED_RETRANS)
3279 tp->retrans_out -= acked_pcount;
3280 flag |= FLAG_RETRANS_DATA_ACKED;
3281 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3282 last_ackt = tcp_skb_timestamp_us(skb);
3283 WARN_ON_ONCE(last_ackt == 0);
3284 if (!first_ackt)
3285 first_ackt = last_ackt;
3286
3287 if (before(start_seq, reord))
3288 reord = start_seq;
3289 if (!after(scb->end_seq, tp->high_seq))
3290 flag |= FLAG_ORIG_SACK_ACKED;
3291 }
3292
3293 if (sacked & TCPCB_SACKED_ACKED) {
3294 tp->sacked_out -= acked_pcount;
3295 } else if (tcp_is_sack(tp)) {
3296 tcp_count_delivered(tp, acked_pcount, ece_ack);
3297 if (!tcp_skb_spurious_retrans(tp, skb))
3298 tcp_rack_advance(tp, sacked, scb->end_seq,
3299 tcp_skb_timestamp_us(skb));
3300 }
3301 if (sacked & TCPCB_LOST)
3302 tp->lost_out -= acked_pcount;
3303
3304 tp->packets_out -= acked_pcount;
3305 pkts_acked += acked_pcount;
3306 tcp_rate_skb_delivered(sk, skb, sack->rate);
3307
3308
3309
3310
3311
3312
3313
3314
3315 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3316 flag |= FLAG_DATA_ACKED;
3317 } else {
3318 flag |= FLAG_SYN_ACKED;
3319 tp->retrans_stamp = 0;
3320 }
3321
3322 if (!fully_acked)
3323 break;
3324
3325 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3326
3327 next = skb_rb_next(skb);
3328 if (unlikely(skb == tp->retransmit_skb_hint))
3329 tp->retransmit_skb_hint = NULL;
3330 if (unlikely(skb == tp->lost_skb_hint))
3331 tp->lost_skb_hint = NULL;
3332 tcp_highest_sack_replace(sk, skb, next);
3333 tcp_rtx_queue_unlink_and_free(skb, sk);
3334 }
3335
3336 if (!skb)
3337 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3338
3339 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3340 tp->snd_up = tp->snd_una;
3341
3342 if (skb) {
3343 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3344 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
3345 flag |= FLAG_SACK_RENEGING;
3346 }
3347
3348 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3349 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3350 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3351
3352 if (pkts_acked == 1 && fully_acked && !prior_sacked &&
3353 (tp->snd_una - prior_snd_una) < tp->mss_cache &&
3354 sack->rate->prior_delivered + 1 == tp->delivered &&
3355 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3356
3357
3358
3359
3360 flag |= FLAG_ACK_MAYBE_DELAYED;
3361 }
3362 }
3363 if (sack->first_sackt) {
3364 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3365 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3366 }
3367 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3368 ca_rtt_us, sack->rate);
3369
3370 if (flag & FLAG_ACKED) {
3371 flag |= FLAG_SET_XMIT_TIMER;
3372 if (unlikely(icsk->icsk_mtup.probe_size &&
3373 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3374 tcp_mtup_probe_success(sk);
3375 }
3376
3377 if (tcp_is_reno(tp)) {
3378 tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
3379
3380
3381
3382
3383
3384
3385
3386 if (flag & FLAG_RETRANS_DATA_ACKED)
3387 flag &= ~FLAG_ORIG_SACK_ACKED;
3388 } else {
3389 int delta;
3390
3391
3392 if (before(reord, prior_fack))
3393 tcp_check_sack_reordering(sk, reord, 0);
3394
3395 delta = prior_sacked - tp->sacked_out;
3396 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3397 }
3398 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3399 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3400 tcp_skb_timestamp_us(skb))) {
3401
3402
3403
3404
3405 flag |= FLAG_SET_XMIT_TIMER;
3406 }
3407
3408 if (icsk->icsk_ca_ops->pkts_acked) {
3409 struct ack_sample sample = { .pkts_acked = pkts_acked,
3410 .rtt_us = sack->rate->rtt_us };
3411
3412 sample.in_flight = tp->mss_cache *
3413 (tp->delivered - sack->rate->prior_delivered);
3414 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3415 }
3416
3417 #if FASTRETRANS_DEBUG > 0
3418 WARN_ON((int)tp->sacked_out < 0);
3419 WARN_ON((int)tp->lost_out < 0);
3420 WARN_ON((int)tp->retrans_out < 0);
3421 if (!tp->packets_out && tcp_is_sack(tp)) {
3422 icsk = inet_csk(sk);
3423 if (tp->lost_out) {
3424 pr_debug("Leak l=%u %d\n",
3425 tp->lost_out, icsk->icsk_ca_state);
3426 tp->lost_out = 0;
3427 }
3428 if (tp->sacked_out) {
3429 pr_debug("Leak s=%u %d\n",
3430 tp->sacked_out, icsk->icsk_ca_state);
3431 tp->sacked_out = 0;
3432 }
3433 if (tp->retrans_out) {
3434 pr_debug("Leak r=%u %d\n",
3435 tp->retrans_out, icsk->icsk_ca_state);
3436 tp->retrans_out = 0;
3437 }
3438 }
3439 #endif
3440 return flag;
3441 }
3442
3443 static void tcp_ack_probe(struct sock *sk)
3444 {
3445 struct inet_connection_sock *icsk = inet_csk(sk);
3446 struct sk_buff *head = tcp_send_head(sk);
3447 const struct tcp_sock *tp = tcp_sk(sk);
3448
3449
3450 if (!head)
3451 return;
3452 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3453 icsk->icsk_backoff = 0;
3454 icsk->icsk_probes_tstamp = 0;
3455 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3456
3457
3458
3459 } else {
3460 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3461
3462 when = tcp_clamp_probe0_to_user_timeout(sk, when);
3463 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
3464 }
3465 }
3466
3467 static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3468 {
3469 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3470 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3471 }
3472
3473
3474 static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3475 {
3476
3477
3478
3479
3480
3481
3482 if (tcp_sk(sk)->reordering >
3483 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
3484 return flag & FLAG_FORWARD_PROGRESS;
3485
3486 return flag & FLAG_DATA_ACKED;
3487 }
3488
3489
3490
3491
3492
3493
3494 static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3495 int flag, const struct rate_sample *rs)
3496 {
3497 const struct inet_connection_sock *icsk = inet_csk(sk);
3498
3499 if (icsk->icsk_ca_ops->cong_control) {
3500 icsk->icsk_ca_ops->cong_control(sk, rs);
3501 return;
3502 }
3503
3504 if (tcp_in_cwnd_reduction(sk)) {
3505
3506 tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag);
3507 } else if (tcp_may_raise_cwnd(sk, flag)) {
3508
3509 tcp_cong_avoid(sk, ack, acked_sacked);
3510 }
3511 tcp_update_pacing_rate(sk);
3512 }
3513
3514
3515
3516
3517 static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3518 const u32 ack, const u32 ack_seq,
3519 const u32 nwin)
3520 {
3521 return after(ack, tp->snd_una) ||
3522 after(ack_seq, tp->snd_wl1) ||
3523 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3524 }
3525
3526
3527 static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3528 {
3529 u32 delta = ack - tp->snd_una;
3530
3531 sock_owned_by_me((struct sock *)tp);
3532 tp->bytes_acked += delta;
3533 tp->snd_una = ack;
3534 }
3535
3536
3537 static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3538 {
3539 u32 delta = seq - tp->rcv_nxt;
3540
3541 sock_owned_by_me((struct sock *)tp);
3542 tp->bytes_received += delta;
3543 WRITE_ONCE(tp->rcv_nxt, seq);
3544 }
3545
3546
3547
3548
3549
3550
3551 static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3552 u32 ack_seq)
3553 {
3554 struct tcp_sock *tp = tcp_sk(sk);
3555 int flag = 0;
3556 u32 nwin = ntohs(tcp_hdr(skb)->window);
3557
3558 if (likely(!tcp_hdr(skb)->syn))
3559 nwin <<= tp->rx_opt.snd_wscale;
3560
3561 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3562 flag |= FLAG_WIN_UPDATE;
3563 tcp_update_wl(tp, ack_seq);
3564
3565 if (tp->snd_wnd != nwin) {
3566 tp->snd_wnd = nwin;
3567
3568
3569
3570
3571 tp->pred_flags = 0;
3572 tcp_fast_path_check(sk);
3573
3574 if (!tcp_write_queue_empty(sk))
3575 tcp_slow_start_after_idle_check(sk);
3576
3577 if (nwin > tp->max_window) {
3578 tp->max_window = nwin;
3579 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3580 }
3581 }
3582 }
3583
3584 tcp_snd_una_update(tp, ack);
3585
3586 return flag;
3587 }
3588
3589 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3590 u32 *last_oow_ack_time)
3591 {
3592 if (*last_oow_ack_time) {
3593 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3594
3595 if (0 <= elapsed &&
3596 elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
3597 NET_INC_STATS(net, mib_idx);
3598 return true;
3599 }
3600 }
3601
3602 *last_oow_ack_time = tcp_jiffies32;
3603
3604 return false;
3605 }
3606
3607
3608
3609
3610
3611
3612
3613
3614 bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3615 int mib_idx, u32 *last_oow_ack_time)
3616 {
3617
3618 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3619 !tcp_hdr(skb)->syn)
3620 return false;
3621
3622 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3623 }
3624
3625
3626 static void tcp_send_challenge_ack(struct sock *sk)
3627 {
3628 struct tcp_sock *tp = tcp_sk(sk);
3629 struct net *net = sock_net(sk);
3630 u32 count, now, ack_limit;
3631
3632
3633 if (__tcp_oow_rate_limited(net,
3634 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3635 &tp->last_oow_ack_time))
3636 return;
3637
3638 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
3639 if (ack_limit == INT_MAX)
3640 goto send_ack;
3641
3642
3643 now = jiffies / HZ;
3644 if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) {
3645 u32 half = (ack_limit + 1) >> 1;
3646
3647 WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now);
3648 WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit));
3649 }
3650 count = READ_ONCE(net->ipv4.tcp_challenge_count);
3651 if (count > 0) {
3652 WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1);
3653 send_ack:
3654 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3655 tcp_send_ack(sk);
3656 }
3657 }
3658
3659 static void tcp_store_ts_recent(struct tcp_sock *tp)
3660 {
3661 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3662 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3663 }
3664
3665 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3666 {
3667 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3668
3669
3670
3671
3672
3673
3674
3675 if (tcp_paws_check(&tp->rx_opt, 0))
3676 tcp_store_ts_recent(tp);
3677 }
3678 }
3679
3680
3681
3682
3683 static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3684 {
3685 struct tcp_sock *tp = tcp_sk(sk);
3686
3687 if (before(ack, tp->tlp_high_seq))
3688 return;
3689
3690 if (!tp->tlp_retrans) {
3691
3692 tp->tlp_high_seq = 0;
3693 } else if (flag & FLAG_DSACK_TLP) {
3694
3695 tp->tlp_high_seq = 0;
3696 } else if (after(ack, tp->tlp_high_seq)) {
3697
3698
3699
3700 tcp_init_cwnd_reduction(sk);
3701 tcp_set_ca_state(sk, TCP_CA_CWR);
3702 tcp_end_cwnd_reduction(sk);
3703 tcp_try_keep_open(sk);
3704 NET_INC_STATS(sock_net(sk),
3705 LINUX_MIB_TCPLOSSPROBERECOVERY);
3706 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3707 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3708
3709 tp->tlp_high_seq = 0;
3710 }
3711 }
3712
3713 static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3714 {
3715 const struct inet_connection_sock *icsk = inet_csk(sk);
3716
3717 if (icsk->icsk_ca_ops->in_ack_event)
3718 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3719 }
3720
3721
3722
3723
3724
3725 static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3726 {
3727 struct tcp_sock *tp = tcp_sk(sk);
3728
3729 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3730 return;
3731
3732 if (unlikely(rexmit == REXMIT_NEW)) {
3733 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3734 TCP_NAGLE_OFF);
3735 if (after(tp->snd_nxt, tp->high_seq))
3736 return;
3737 tp->frto = 0;
3738 }
3739 tcp_xmit_retransmit_queue(sk);
3740 }
3741
3742
3743 static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3744 {
3745 const struct net *net = sock_net(sk);
3746 struct tcp_sock *tp = tcp_sk(sk);
3747 u32 delivered;
3748
3749 delivered = tp->delivered - prior_delivered;
3750 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3751 if (flag & FLAG_ECE)
3752 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3753
3754 return delivered;
3755 }
3756
3757
3758 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3759 {
3760 struct inet_connection_sock *icsk = inet_csk(sk);
3761 struct tcp_sock *tp = tcp_sk(sk);
3762 struct tcp_sacktag_state sack_state;
3763 struct rate_sample rs = { .prior_delivered = 0 };
3764 u32 prior_snd_una = tp->snd_una;
3765 bool is_sack_reneg = tp->is_sack_reneg;
3766 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3767 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3768 int num_dupack = 0;
3769 int prior_packets = tp->packets_out;
3770 u32 delivered = tp->delivered;
3771 u32 lost = tp->lost;
3772 int rexmit = REXMIT_NONE;
3773 u32 prior_fack;
3774
3775 sack_state.first_sackt = 0;
3776 sack_state.rate = &rs;
3777 sack_state.sack_delivered = 0;
3778
3779
3780 prefetch(sk->tcp_rtx_queue.rb_node);
3781
3782
3783
3784
3785 if (before(ack, prior_snd_una)) {
3786
3787 if (before(ack, prior_snd_una - tp->max_window)) {
3788 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3789 tcp_send_challenge_ack(sk);
3790 return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
3791 }
3792 goto old_ack;
3793 }
3794
3795
3796
3797
3798 if (after(ack, tp->snd_nxt))
3799 return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
3800
3801 if (after(ack, prior_snd_una)) {
3802 flag |= FLAG_SND_UNA_ADVANCED;
3803 icsk->icsk_retransmits = 0;
3804
3805 #if IS_ENABLED(CONFIG_TLS_DEVICE)
3806 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3807 if (icsk->icsk_clean_acked)
3808 icsk->icsk_clean_acked(sk, ack);
3809 #endif
3810 }
3811
3812 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3813 rs.prior_in_flight = tcp_packets_in_flight(tp);
3814
3815
3816
3817
3818 if (flag & FLAG_UPDATE_TS_RECENT)
3819 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3820
3821 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3822 FLAG_SND_UNA_ADVANCED) {
3823
3824
3825
3826
3827 tcp_update_wl(tp, ack_seq);
3828 tcp_snd_una_update(tp, ack);
3829 flag |= FLAG_WIN_UPDATE;
3830
3831 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3832
3833 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3834 } else {
3835 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3836
3837 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3838 flag |= FLAG_DATA;
3839 else
3840 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3841
3842 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3843
3844 if (TCP_SKB_CB(skb)->sacked)
3845 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3846 &sack_state);
3847
3848 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3849 flag |= FLAG_ECE;
3850 ack_ev_flags |= CA_ACK_ECE;
3851 }
3852
3853 if (sack_state.sack_delivered)
3854 tcp_count_delivered(tp, sack_state.sack_delivered,
3855 flag & FLAG_ECE);
3856
3857 if (flag & FLAG_WIN_UPDATE)
3858 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3859
3860 tcp_in_ack_event(sk, ack_ev_flags);
3861 }
3862
3863
3864
3865
3866
3867
3868
3869
3870 tcp_ecn_accept_cwr(sk, skb);
3871
3872
3873
3874
3875 sk->sk_err_soft = 0;
3876 icsk->icsk_probes_out = 0;
3877 tp->rcv_tstamp = tcp_jiffies32;
3878 if (!prior_packets)
3879 goto no_queue;
3880
3881
3882 flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
3883 &sack_state, flag & FLAG_ECE);
3884
3885 tcp_rack_update_reo_wnd(sk, &rs);
3886
3887 if (tp->tlp_high_seq)
3888 tcp_process_tlp_ack(sk, ack, flag);
3889
3890 if (tcp_ack_is_dubious(sk, flag)) {
3891 if (!(flag & (FLAG_SND_UNA_ADVANCED |
3892 FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
3893 num_dupack = 1;
3894
3895 if (!(flag & FLAG_DATA))
3896 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3897 }
3898 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3899 &rexmit);
3900 }
3901
3902
3903 if (flag & FLAG_SET_XMIT_TIMER)
3904 tcp_set_xmit_timer(sk);
3905
3906 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3907 sk_dst_confirm(sk);
3908
3909 delivered = tcp_newly_delivered(sk, delivered, flag);
3910 lost = tp->lost - lost;
3911 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3912 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3913 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3914 tcp_xmit_recovery(sk, rexmit);
3915 return 1;
3916
3917 no_queue:
3918
3919 if (flag & FLAG_DSACKING_ACK) {
3920 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3921 &rexmit);
3922 tcp_newly_delivered(sk, delivered, flag);
3923 }
3924
3925
3926
3927
3928 tcp_ack_probe(sk);
3929
3930 if (tp->tlp_high_seq)
3931 tcp_process_tlp_ack(sk, ack, flag);
3932 return 1;
3933
3934 old_ack:
3935
3936
3937
3938 if (TCP_SKB_CB(skb)->sacked) {
3939 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3940 &sack_state);
3941 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3942 &rexmit);
3943 tcp_newly_delivered(sk, delivered, flag);
3944 tcp_xmit_recovery(sk, rexmit);
3945 }
3946
3947 return 0;
3948 }
3949
3950 static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3951 bool syn, struct tcp_fastopen_cookie *foc,
3952 bool exp_opt)
3953 {
3954
3955 if (!foc || !syn || len < 0 || (len & 1))
3956 return;
3957
3958 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3959 len <= TCP_FASTOPEN_COOKIE_MAX)
3960 memcpy(foc->val, cookie, len);
3961 else if (len != 0)
3962 len = -1;
3963 foc->len = len;
3964 foc->exp = exp_opt;
3965 }
3966
3967 static bool smc_parse_options(const struct tcphdr *th,
3968 struct tcp_options_received *opt_rx,
3969 const unsigned char *ptr,
3970 int opsize)
3971 {
3972 #if IS_ENABLED(CONFIG_SMC)
3973 if (static_branch_unlikely(&tcp_have_smc)) {
3974 if (th->syn && !(opsize & 1) &&
3975 opsize >= TCPOLEN_EXP_SMC_BASE &&
3976 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
3977 opt_rx->smc_ok = 1;
3978 return true;
3979 }
3980 }
3981 #endif
3982 return false;
3983 }
3984
3985
3986
3987
3988 u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
3989 {
3990 const unsigned char *ptr = (const unsigned char *)(th + 1);
3991 int length = (th->doff * 4) - sizeof(struct tcphdr);
3992 u16 mss = 0;
3993
3994 while (length > 0) {
3995 int opcode = *ptr++;
3996 int opsize;
3997
3998 switch (opcode) {
3999 case TCPOPT_EOL:
4000 return mss;
4001 case TCPOPT_NOP:
4002 length--;
4003 continue;
4004 default:
4005 if (length < 2)
4006 return mss;
4007 opsize = *ptr++;
4008 if (opsize < 2)
4009 return mss;
4010 if (opsize > length)
4011 return mss;
4012 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
4013 u16 in_mss = get_unaligned_be16(ptr);
4014
4015 if (in_mss) {
4016 if (user_mss && user_mss < in_mss)
4017 in_mss = user_mss;
4018 mss = in_mss;
4019 }
4020 }
4021 ptr += opsize - 2;
4022 length -= opsize;
4023 }
4024 }
4025 return mss;
4026 }
4027 EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
4028
4029
4030
4031
4032
4033 void tcp_parse_options(const struct net *net,
4034 const struct sk_buff *skb,
4035 struct tcp_options_received *opt_rx, int estab,
4036 struct tcp_fastopen_cookie *foc)
4037 {
4038 const unsigned char *ptr;
4039 const struct tcphdr *th = tcp_hdr(skb);
4040 int length = (th->doff * 4) - sizeof(struct tcphdr);
4041
4042 ptr = (const unsigned char *)(th + 1);
4043 opt_rx->saw_tstamp = 0;
4044 opt_rx->saw_unknown = 0;
4045
4046 while (length > 0) {
4047 int opcode = *ptr++;
4048 int opsize;
4049
4050 switch (opcode) {
4051 case TCPOPT_EOL:
4052 return;
4053 case TCPOPT_NOP:
4054 length--;
4055 continue;
4056 default:
4057 if (length < 2)
4058 return;
4059 opsize = *ptr++;
4060 if (opsize < 2)
4061 return;
4062 if (opsize > length)
4063 return;
4064 switch (opcode) {
4065 case TCPOPT_MSS:
4066 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
4067 u16 in_mss = get_unaligned_be16(ptr);
4068 if (in_mss) {
4069 if (opt_rx->user_mss &&
4070 opt_rx->user_mss < in_mss)
4071 in_mss = opt_rx->user_mss;
4072 opt_rx->mss_clamp = in_mss;
4073 }
4074 }
4075 break;
4076 case TCPOPT_WINDOW:
4077 if (opsize == TCPOLEN_WINDOW && th->syn &&
4078 !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
4079 __u8 snd_wscale = *(__u8 *)ptr;
4080 opt_rx->wscale_ok = 1;
4081 if (snd_wscale > TCP_MAX_WSCALE) {
4082 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
4083 __func__,
4084 snd_wscale,
4085 TCP_MAX_WSCALE);
4086 snd_wscale = TCP_MAX_WSCALE;
4087 }
4088 opt_rx->snd_wscale = snd_wscale;
4089 }
4090 break;
4091 case TCPOPT_TIMESTAMP:
4092 if ((opsize == TCPOLEN_TIMESTAMP) &&
4093 ((estab && opt_rx->tstamp_ok) ||
4094 (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
4095 opt_rx->saw_tstamp = 1;
4096 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
4097 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
4098 }
4099 break;
4100 case TCPOPT_SACK_PERM:
4101 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
4102 !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
4103 opt_rx->sack_ok = TCP_SACK_SEEN;
4104 tcp_sack_reset(opt_rx);
4105 }
4106 break;
4107
4108 case TCPOPT_SACK:
4109 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
4110 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
4111 opt_rx->sack_ok) {
4112 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
4113 }
4114 break;
4115 #ifdef CONFIG_TCP_MD5SIG
4116 case TCPOPT_MD5SIG:
4117
4118
4119
4120
4121 break;
4122 #endif
4123 case TCPOPT_FASTOPEN:
4124 tcp_parse_fastopen_option(
4125 opsize - TCPOLEN_FASTOPEN_BASE,
4126 ptr, th->syn, foc, false);
4127 break;
4128
4129 case TCPOPT_EXP:
4130
4131
4132
4133 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
4134 get_unaligned_be16(ptr) ==
4135 TCPOPT_FASTOPEN_MAGIC) {
4136 tcp_parse_fastopen_option(opsize -
4137 TCPOLEN_EXP_FASTOPEN_BASE,
4138 ptr + 2, th->syn, foc, true);
4139 break;
4140 }
4141
4142 if (smc_parse_options(th, opt_rx, ptr, opsize))
4143 break;
4144
4145 opt_rx->saw_unknown = 1;
4146 break;
4147
4148 default:
4149 opt_rx->saw_unknown = 1;
4150 }
4151 ptr += opsize-2;
4152 length -= opsize;
4153 }
4154 }
4155 }
4156 EXPORT_SYMBOL(tcp_parse_options);
4157
4158 static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
4159 {
4160 const __be32 *ptr = (const __be32 *)(th + 1);
4161
4162 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4163 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
4164 tp->rx_opt.saw_tstamp = 1;
4165 ++ptr;
4166 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4167 ++ptr;
4168 if (*ptr)
4169 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
4170 else
4171 tp->rx_opt.rcv_tsecr = 0;
4172 return true;
4173 }
4174 return false;
4175 }
4176
4177
4178
4179
4180 static bool tcp_fast_parse_options(const struct net *net,
4181 const struct sk_buff *skb,
4182 const struct tcphdr *th, struct tcp_sock *tp)
4183 {
4184
4185
4186
4187 if (th->doff == (sizeof(*th) / 4)) {
4188 tp->rx_opt.saw_tstamp = 0;
4189 return false;
4190 } else if (tp->rx_opt.tstamp_ok &&
4191 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
4192 if (tcp_parse_aligned_timestamp(tp, th))
4193 return true;
4194 }
4195
4196 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
4197 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
4198 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
4199
4200 return true;
4201 }
4202
4203 #ifdef CONFIG_TCP_MD5SIG
4204
4205
4206
4207 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
4208 {
4209 int length = (th->doff << 2) - sizeof(*th);
4210 const u8 *ptr = (const u8 *)(th + 1);
4211
4212
4213 while (length >= TCPOLEN_MD5SIG) {
4214 int opcode = *ptr++;
4215 int opsize;
4216
4217 switch (opcode) {
4218 case TCPOPT_EOL:
4219 return NULL;
4220 case TCPOPT_NOP:
4221 length--;
4222 continue;
4223 default:
4224 opsize = *ptr++;
4225 if (opsize < 2 || opsize > length)
4226 return NULL;
4227 if (opcode == TCPOPT_MD5SIG)
4228 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4229 }
4230 ptr += opsize - 2;
4231 length -= opsize;
4232 }
4233 return NULL;
4234 }
4235 EXPORT_SYMBOL(tcp_parse_md5sig_option);
4236 #endif
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4262 {
4263 const struct tcp_sock *tp = tcp_sk(sk);
4264 const struct tcphdr *th = tcp_hdr(skb);
4265 u32 seq = TCP_SKB_CB(skb)->seq;
4266 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4267
4268 return (
4269 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4270
4271
4272 ack == tp->snd_una &&
4273
4274
4275 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4276
4277
4278 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4279 }
4280
4281 static inline bool tcp_paws_discard(const struct sock *sk,
4282 const struct sk_buff *skb)
4283 {
4284 const struct tcp_sock *tp = tcp_sk(sk);
4285
4286 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4287 !tcp_disordered_ack(sk, skb);
4288 }
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303 static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4304 {
4305 return !before(end_seq, tp->rcv_wup) &&
4306 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4307 }
4308
4309
4310 void tcp_reset(struct sock *sk, struct sk_buff *skb)
4311 {
4312 trace_tcp_receive_reset(sk);
4313
4314
4315
4316
4317 if (sk_is_mptcp(sk))
4318 mptcp_incoming_options(sk, skb);
4319
4320
4321 switch (sk->sk_state) {
4322 case TCP_SYN_SENT:
4323 sk->sk_err = ECONNREFUSED;
4324 break;
4325 case TCP_CLOSE_WAIT:
4326 sk->sk_err = EPIPE;
4327 break;
4328 case TCP_CLOSE:
4329 return;
4330 default:
4331 sk->sk_err = ECONNRESET;
4332 }
4333
4334 smp_wmb();
4335
4336 tcp_write_queue_purge(sk);
4337 tcp_done(sk);
4338
4339 if (!sock_flag(sk, SOCK_DEAD))
4340 sk_error_report(sk);
4341 }
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357 void tcp_fin(struct sock *sk)
4358 {
4359 struct tcp_sock *tp = tcp_sk(sk);
4360
4361 inet_csk_schedule_ack(sk);
4362
4363 sk->sk_shutdown |= RCV_SHUTDOWN;
4364 sock_set_flag(sk, SOCK_DONE);
4365
4366 switch (sk->sk_state) {
4367 case TCP_SYN_RECV:
4368 case TCP_ESTABLISHED:
4369
4370 tcp_set_state(sk, TCP_CLOSE_WAIT);
4371 inet_csk_enter_pingpong_mode(sk);
4372 break;
4373
4374 case TCP_CLOSE_WAIT:
4375 case TCP_CLOSING:
4376
4377
4378
4379 break;
4380 case TCP_LAST_ACK:
4381
4382 break;
4383
4384 case TCP_FIN_WAIT1:
4385
4386
4387
4388
4389 tcp_send_ack(sk);
4390 tcp_set_state(sk, TCP_CLOSING);
4391 break;
4392 case TCP_FIN_WAIT2:
4393
4394 tcp_send_ack(sk);
4395 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4396 break;
4397 default:
4398
4399
4400
4401 pr_err("%s: Impossible, sk->sk_state=%d\n",
4402 __func__, sk->sk_state);
4403 break;
4404 }
4405
4406
4407
4408
4409 skb_rbtree_purge(&tp->out_of_order_queue);
4410 if (tcp_is_sack(tp))
4411 tcp_sack_reset(&tp->rx_opt);
4412
4413 if (!sock_flag(sk, SOCK_DEAD)) {
4414 sk->sk_state_change(sk);
4415
4416
4417 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4418 sk->sk_state == TCP_CLOSE)
4419 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4420 else
4421 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4422 }
4423 }
4424
4425 static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4426 u32 end_seq)
4427 {
4428 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4429 if (before(seq, sp->start_seq))
4430 sp->start_seq = seq;
4431 if (after(end_seq, sp->end_seq))
4432 sp->end_seq = end_seq;
4433 return true;
4434 }
4435 return false;
4436 }
4437
4438 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4439 {
4440 struct tcp_sock *tp = tcp_sk(sk);
4441
4442 if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
4443 int mib_idx;
4444
4445 if (before(seq, tp->rcv_nxt))
4446 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4447 else
4448 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4449
4450 NET_INC_STATS(sock_net(sk), mib_idx);
4451
4452 tp->rx_opt.dsack = 1;
4453 tp->duplicate_sack[0].start_seq = seq;
4454 tp->duplicate_sack[0].end_seq = end_seq;
4455 }
4456 }
4457
4458 static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4459 {
4460 struct tcp_sock *tp = tcp_sk(sk);
4461
4462 if (!tp->rx_opt.dsack)
4463 tcp_dsack_set(sk, seq, end_seq);
4464 else
4465 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4466 }
4467
4468 static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4469 {
4470
4471
4472
4473
4474
4475 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
4476 sk_rethink_txhash(sk))
4477 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
4478 }
4479
4480 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4481 {
4482 struct tcp_sock *tp = tcp_sk(sk);
4483
4484 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4485 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4486 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4487 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4488
4489 if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
4490 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4491
4492 tcp_rcv_spurious_retrans(sk, skb);
4493 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4494 end_seq = tp->rcv_nxt;
4495 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4496 }
4497 }
4498
4499 tcp_send_ack(sk);
4500 }
4501
4502
4503
4504
4505 static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4506 {
4507 int this_sack;
4508 struct tcp_sack_block *sp = &tp->selective_acks[0];
4509 struct tcp_sack_block *swalk = sp + 1;
4510
4511
4512
4513
4514 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4515 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4516 int i;
4517
4518
4519
4520
4521 tp->rx_opt.num_sacks--;
4522 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4523 sp[i] = sp[i + 1];
4524 continue;
4525 }
4526 this_sack++;
4527 swalk++;
4528 }
4529 }
4530
4531 static void tcp_sack_compress_send_ack(struct sock *sk)
4532 {
4533 struct tcp_sock *tp = tcp_sk(sk);
4534
4535 if (!tp->compressed_ack)
4536 return;
4537
4538 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
4539 __sock_put(sk);
4540
4541
4542
4543
4544
4545 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
4546 tp->compressed_ack - 1);
4547
4548 tp->compressed_ack = 0;
4549 tcp_send_ack(sk);
4550 }
4551
4552
4553
4554
4555
4556 #define TCP_SACK_BLOCKS_EXPECTED 2
4557
4558 static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4559 {
4560 struct tcp_sock *tp = tcp_sk(sk);
4561 struct tcp_sack_block *sp = &tp->selective_acks[0];
4562 int cur_sacks = tp->rx_opt.num_sacks;
4563 int this_sack;
4564
4565 if (!cur_sacks)
4566 goto new_sack;
4567
4568 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4569 if (tcp_sack_extend(sp, seq, end_seq)) {
4570 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4571 tcp_sack_compress_send_ack(sk);
4572
4573 for (; this_sack > 0; this_sack--, sp--)
4574 swap(*sp, *(sp - 1));
4575 if (cur_sacks > 1)
4576 tcp_sack_maybe_coalesce(tp);
4577 return;
4578 }
4579 }
4580
4581 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4582 tcp_sack_compress_send_ack(sk);
4583
4584
4585
4586
4587
4588
4589
4590 if (this_sack >= TCP_NUM_SACKS) {
4591 this_sack--;
4592 tp->rx_opt.num_sacks--;
4593 sp--;
4594 }
4595 for (; this_sack > 0; this_sack--, sp--)
4596 *sp = *(sp - 1);
4597
4598 new_sack:
4599
4600 sp->start_seq = seq;
4601 sp->end_seq = end_seq;
4602 tp->rx_opt.num_sacks++;
4603 }
4604
4605
4606
4607 static void tcp_sack_remove(struct tcp_sock *tp)
4608 {
4609 struct tcp_sack_block *sp = &tp->selective_acks[0];
4610 int num_sacks = tp->rx_opt.num_sacks;
4611 int this_sack;
4612
4613
4614 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4615 tp->rx_opt.num_sacks = 0;
4616 return;
4617 }
4618
4619 for (this_sack = 0; this_sack < num_sacks;) {
4620
4621 if (!before(tp->rcv_nxt, sp->start_seq)) {
4622 int i;
4623
4624
4625 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4626
4627
4628 for (i = this_sack+1; i < num_sacks; i++)
4629 tp->selective_acks[i-1] = tp->selective_acks[i];
4630 num_sacks--;
4631 continue;
4632 }
4633 this_sack++;
4634 sp++;
4635 }
4636 tp->rx_opt.num_sacks = num_sacks;
4637 }
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652 static bool tcp_try_coalesce(struct sock *sk,
4653 struct sk_buff *to,
4654 struct sk_buff *from,
4655 bool *fragstolen)
4656 {
4657 int delta;
4658
4659 *fragstolen = false;
4660
4661
4662 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4663 return false;
4664
4665 if (!mptcp_skb_can_collapse(to, from))
4666 return false;
4667
4668 #ifdef CONFIG_TLS_DEVICE
4669 if (from->decrypted != to->decrypted)
4670 return false;
4671 #endif
4672
4673 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4674 return false;
4675
4676 atomic_add(delta, &sk->sk_rmem_alloc);
4677 sk_mem_charge(sk, delta);
4678 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4679 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4680 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4681 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4682
4683 if (TCP_SKB_CB(from)->has_rxtstamp) {
4684 TCP_SKB_CB(to)->has_rxtstamp = true;
4685 to->tstamp = from->tstamp;
4686 skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
4687 }
4688
4689 return true;
4690 }
4691
4692 static bool tcp_ooo_try_coalesce(struct sock *sk,
4693 struct sk_buff *to,
4694 struct sk_buff *from,
4695 bool *fragstolen)
4696 {
4697 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4698
4699
4700 if (res) {
4701 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4702 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4703
4704 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4705 }
4706 return res;
4707 }
4708
4709 static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
4710 enum skb_drop_reason reason)
4711 {
4712 sk_drops_add(sk, skb);
4713 kfree_skb_reason(skb, reason);
4714 }
4715
4716
4717
4718
4719 static void tcp_ofo_queue(struct sock *sk)
4720 {
4721 struct tcp_sock *tp = tcp_sk(sk);
4722 __u32 dsack_high = tp->rcv_nxt;
4723 bool fin, fragstolen, eaten;
4724 struct sk_buff *skb, *tail;
4725 struct rb_node *p;
4726
4727 p = rb_first(&tp->out_of_order_queue);
4728 while (p) {
4729 skb = rb_to_skb(p);
4730 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4731 break;
4732
4733 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4734 __u32 dsack = dsack_high;
4735 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4736 dsack_high = TCP_SKB_CB(skb)->end_seq;
4737 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4738 }
4739 p = rb_next(p);
4740 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4741
4742 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4743 tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
4744 continue;
4745 }
4746
4747 tail = skb_peek_tail(&sk->sk_receive_queue);
4748 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4749 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4750 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4751 if (!eaten)
4752 __skb_queue_tail(&sk->sk_receive_queue, skb);
4753 else
4754 kfree_skb_partial(skb, fragstolen);
4755
4756 if (unlikely(fin)) {
4757 tcp_fin(sk);
4758
4759
4760
4761 break;
4762 }
4763 }
4764 }
4765
4766 static bool tcp_prune_ofo_queue(struct sock *sk);
4767 static int tcp_prune_queue(struct sock *sk);
4768
4769 static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4770 unsigned int size)
4771 {
4772 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4773 !sk_rmem_schedule(sk, skb, size)) {
4774
4775 if (tcp_prune_queue(sk) < 0)
4776 return -1;
4777
4778 while (!sk_rmem_schedule(sk, skb, size)) {
4779 if (!tcp_prune_ofo_queue(sk))
4780 return -1;
4781 }
4782 }
4783 return 0;
4784 }
4785
4786 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4787 {
4788 struct tcp_sock *tp = tcp_sk(sk);
4789 struct rb_node **p, *parent;
4790 struct sk_buff *skb1;
4791 u32 seq, end_seq;
4792 bool fragstolen;
4793
4794 tcp_ecn_check_ce(sk, skb);
4795
4796 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4797 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4798 sk->sk_data_ready(sk);
4799 tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
4800 return;
4801 }
4802
4803
4804 tp->pred_flags = 0;
4805 inet_csk_schedule_ack(sk);
4806
4807 tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
4808 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4809 seq = TCP_SKB_CB(skb)->seq;
4810 end_seq = TCP_SKB_CB(skb)->end_seq;
4811
4812 p = &tp->out_of_order_queue.rb_node;
4813 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4814
4815 if (tcp_is_sack(tp)) {
4816 tp->rx_opt.num_sacks = 1;
4817 tp->selective_acks[0].start_seq = seq;
4818 tp->selective_acks[0].end_seq = end_seq;
4819 }
4820 rb_link_node(&skb->rbnode, NULL, p);
4821 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4822 tp->ooo_last_skb = skb;
4823 goto end;
4824 }
4825
4826
4827
4828
4829 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4830 skb, &fragstolen)) {
4831 coalesce_done:
4832
4833
4834
4835 if (tcp_is_sack(tp))
4836 tcp_grow_window(sk, skb, true);
4837 kfree_skb_partial(skb, fragstolen);
4838 skb = NULL;
4839 goto add_sack;
4840 }
4841
4842 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4843 parent = &tp->ooo_last_skb->rbnode;
4844 p = &parent->rb_right;
4845 goto insert;
4846 }
4847
4848
4849 parent = NULL;
4850 while (*p) {
4851 parent = *p;
4852 skb1 = rb_to_skb(parent);
4853 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4854 p = &parent->rb_left;
4855 continue;
4856 }
4857 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4858 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4859
4860 NET_INC_STATS(sock_net(sk),
4861 LINUX_MIB_TCPOFOMERGE);
4862 tcp_drop_reason(sk, skb,
4863 SKB_DROP_REASON_TCP_OFOMERGE);
4864 skb = NULL;
4865 tcp_dsack_set(sk, seq, end_seq);
4866 goto add_sack;
4867 }
4868 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4869
4870 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4871 } else {
4872
4873
4874
4875 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4876 &tp->out_of_order_queue);
4877 tcp_dsack_extend(sk,
4878 TCP_SKB_CB(skb1)->seq,
4879 TCP_SKB_CB(skb1)->end_seq);
4880 NET_INC_STATS(sock_net(sk),
4881 LINUX_MIB_TCPOFOMERGE);
4882 tcp_drop_reason(sk, skb1,
4883 SKB_DROP_REASON_TCP_OFOMERGE);
4884 goto merge_right;
4885 }
4886 } else if (tcp_ooo_try_coalesce(sk, skb1,
4887 skb, &fragstolen)) {
4888 goto coalesce_done;
4889 }
4890 p = &parent->rb_right;
4891 }
4892 insert:
4893
4894 rb_link_node(&skb->rbnode, parent, p);
4895 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4896
4897 merge_right:
4898
4899 while ((skb1 = skb_rb_next(skb)) != NULL) {
4900 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4901 break;
4902 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4903 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4904 end_seq);
4905 break;
4906 }
4907 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4908 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4909 TCP_SKB_CB(skb1)->end_seq);
4910 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4911 tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
4912 }
4913
4914 if (!skb1)
4915 tp->ooo_last_skb = skb;
4916
4917 add_sack:
4918 if (tcp_is_sack(tp))
4919 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4920 end:
4921 if (skb) {
4922
4923
4924
4925 if (tcp_is_sack(tp))
4926 tcp_grow_window(sk, skb, false);
4927 skb_condense(skb);
4928 skb_set_owner_r(skb, sk);
4929 }
4930 }
4931
4932 static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
4933 bool *fragstolen)
4934 {
4935 int eaten;
4936 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4937
4938 eaten = (tail &&
4939 tcp_try_coalesce(sk, tail,
4940 skb, fragstolen)) ? 1 : 0;
4941 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4942 if (!eaten) {
4943 __skb_queue_tail(&sk->sk_receive_queue, skb);
4944 skb_set_owner_r(skb, sk);
4945 }
4946 return eaten;
4947 }
4948
4949 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4950 {
4951 struct sk_buff *skb;
4952 int err = -ENOMEM;
4953 int data_len = 0;
4954 bool fragstolen;
4955
4956 if (size == 0)
4957 return 0;
4958
4959 if (size > PAGE_SIZE) {
4960 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4961
4962 data_len = npages << PAGE_SHIFT;
4963 size = data_len + (size & ~PAGE_MASK);
4964 }
4965 skb = alloc_skb_with_frags(size - data_len, data_len,
4966 PAGE_ALLOC_COSTLY_ORDER,
4967 &err, sk->sk_allocation);
4968 if (!skb)
4969 goto err;
4970
4971 skb_put(skb, size - data_len);
4972 skb->data_len = data_len;
4973 skb->len = size;
4974
4975 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4976 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4977 goto err_free;
4978 }
4979
4980 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4981 if (err)
4982 goto err_free;
4983
4984 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4985 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4986 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4987
4988 if (tcp_queue_rcv(sk, skb, &fragstolen)) {
4989 WARN_ON_ONCE(fragstolen);
4990 __kfree_skb(skb);
4991 }
4992 return size;
4993
4994 err_free:
4995 kfree_skb(skb);
4996 err:
4997 return err;
4998
4999 }
5000
5001 void tcp_data_ready(struct sock *sk)
5002 {
5003 if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
5004 sk->sk_data_ready(sk);
5005 }
5006
5007 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
5008 {
5009 struct tcp_sock *tp = tcp_sk(sk);
5010 enum skb_drop_reason reason;
5011 bool fragstolen;
5012 int eaten;
5013
5014
5015
5016
5017 if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
5018 __kfree_skb(skb);
5019 return;
5020 }
5021
5022 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
5023 __kfree_skb(skb);
5024 return;
5025 }
5026 skb_dst_drop(skb);
5027 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
5028
5029 reason = SKB_DROP_REASON_NOT_SPECIFIED;
5030 tp->rx_opt.dsack = 0;
5031
5032
5033
5034
5035
5036 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
5037 if (tcp_receive_window(tp) == 0) {
5038 reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
5039 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
5040 goto out_of_window;
5041 }
5042
5043
5044 queue_and_out:
5045 if (skb_queue_len(&sk->sk_receive_queue) == 0)
5046 sk_forced_mem_schedule(sk, skb->truesize);
5047 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
5048 reason = SKB_DROP_REASON_PROTO_MEM;
5049 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
5050 sk->sk_data_ready(sk);
5051 goto drop;
5052 }
5053
5054 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5055 if (skb->len)
5056 tcp_event_data_recv(sk, skb);
5057 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
5058 tcp_fin(sk);
5059
5060 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5061 tcp_ofo_queue(sk);
5062
5063
5064
5065
5066 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5067 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
5068 }
5069
5070 if (tp->rx_opt.num_sacks)
5071 tcp_sack_remove(tp);
5072
5073 tcp_fast_path_check(sk);
5074
5075 if (eaten > 0)
5076 kfree_skb_partial(skb, fragstolen);
5077 if (!sock_flag(sk, SOCK_DEAD))
5078 tcp_data_ready(sk);
5079 return;
5080 }
5081
5082 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
5083 tcp_rcv_spurious_retrans(sk, skb);
5084
5085 reason = SKB_DROP_REASON_TCP_OLD_DATA;
5086 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
5087 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
5088
5089 out_of_window:
5090 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
5091 inet_csk_schedule_ack(sk);
5092 drop:
5093 tcp_drop_reason(sk, skb, reason);
5094 return;
5095 }
5096
5097
5098 if (!before(TCP_SKB_CB(skb)->seq,
5099 tp->rcv_nxt + tcp_receive_window(tp))) {
5100 reason = SKB_DROP_REASON_TCP_OVERWINDOW;
5101 goto out_of_window;
5102 }
5103
5104 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5105
5106 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
5107
5108
5109
5110
5111 if (!tcp_receive_window(tp)) {
5112 reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
5113 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
5114 goto out_of_window;
5115 }
5116 goto queue_and_out;
5117 }
5118
5119 tcp_data_queue_ofo(sk, skb);
5120 }
5121
5122 static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
5123 {
5124 if (list)
5125 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
5126
5127 return skb_rb_next(skb);
5128 }
5129
5130 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
5131 struct sk_buff_head *list,
5132 struct rb_root *root)
5133 {
5134 struct sk_buff *next = tcp_skb_next(skb, list);
5135
5136 if (list)
5137 __skb_unlink(skb, list);
5138 else
5139 rb_erase(&skb->rbnode, root);
5140
5141 __kfree_skb(skb);
5142 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
5143
5144 return next;
5145 }
5146
5147
5148 void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
5149 {
5150 struct rb_node **p = &root->rb_node;
5151 struct rb_node *parent = NULL;
5152 struct sk_buff *skb1;
5153
5154 while (*p) {
5155 parent = *p;
5156 skb1 = rb_to_skb(parent);
5157 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
5158 p = &parent->rb_left;
5159 else
5160 p = &parent->rb_right;
5161 }
5162 rb_link_node(&skb->rbnode, parent, p);
5163 rb_insert_color(&skb->rbnode, root);
5164 }
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174 static void
5175 tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
5176 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
5177 {
5178 struct sk_buff *skb = head, *n;
5179 struct sk_buff_head tmp;
5180 bool end_of_skbs;
5181
5182
5183
5184
5185 restart:
5186 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
5187 n = tcp_skb_next(skb, list);
5188
5189
5190 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5191 skb = tcp_collapse_one(sk, skb, list, root);
5192 if (!skb)
5193 break;
5194 goto restart;
5195 }
5196
5197
5198
5199
5200
5201
5202 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
5203 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
5204 before(TCP_SKB_CB(skb)->seq, start))) {
5205 end_of_skbs = false;
5206 break;
5207 }
5208
5209 if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
5210 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
5211 end_of_skbs = false;
5212 break;
5213 }
5214
5215
5216 start = TCP_SKB_CB(skb)->end_seq;
5217 }
5218 if (end_of_skbs ||
5219 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5220 return;
5221
5222 __skb_queue_head_init(&tmp);
5223
5224 while (before(start, end)) {
5225 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
5226 struct sk_buff *nskb;
5227
5228 nskb = alloc_skb(copy, GFP_ATOMIC);
5229 if (!nskb)
5230 break;
5231
5232 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
5233 #ifdef CONFIG_TLS_DEVICE
5234 nskb->decrypted = skb->decrypted;
5235 #endif
5236 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
5237 if (list)
5238 __skb_queue_before(list, skb, nskb);
5239 else
5240 __skb_queue_tail(&tmp, nskb);
5241 skb_set_owner_r(nskb, sk);
5242 mptcp_skb_ext_move(nskb, skb);
5243
5244
5245 while (copy > 0) {
5246 int offset = start - TCP_SKB_CB(skb)->seq;
5247 int size = TCP_SKB_CB(skb)->end_seq - start;
5248
5249 BUG_ON(offset < 0);
5250 if (size > 0) {
5251 size = min(copy, size);
5252 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
5253 BUG();
5254 TCP_SKB_CB(nskb)->end_seq += size;
5255 copy -= size;
5256 start += size;
5257 }
5258 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5259 skb = tcp_collapse_one(sk, skb, list, root);
5260 if (!skb ||
5261 skb == tail ||
5262 !mptcp_skb_can_collapse(nskb, skb) ||
5263 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5264 goto end;
5265 #ifdef CONFIG_TLS_DEVICE
5266 if (skb->decrypted != nskb->decrypted)
5267 goto end;
5268 #endif
5269 }
5270 }
5271 }
5272 end:
5273 skb_queue_walk_safe(&tmp, skb, n)
5274 tcp_rbtree_insert(root, skb);
5275 }
5276
5277
5278
5279
5280 static void tcp_collapse_ofo_queue(struct sock *sk)
5281 {
5282 struct tcp_sock *tp = tcp_sk(sk);
5283 u32 range_truesize, sum_tiny = 0;
5284 struct sk_buff *skb, *head;
5285 u32 start, end;
5286
5287 skb = skb_rb_first(&tp->out_of_order_queue);
5288 new_range:
5289 if (!skb) {
5290 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
5291 return;
5292 }
5293 start = TCP_SKB_CB(skb)->seq;
5294 end = TCP_SKB_CB(skb)->end_seq;
5295 range_truesize = skb->truesize;
5296
5297 for (head = skb;;) {
5298 skb = skb_rb_next(skb);
5299
5300
5301
5302
5303 if (!skb ||
5304 after(TCP_SKB_CB(skb)->seq, end) ||
5305 before(TCP_SKB_CB(skb)->end_seq, start)) {
5306
5307 if (range_truesize != head->truesize ||
5308 end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) {
5309 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
5310 head, skb, start, end);
5311 } else {
5312 sum_tiny += range_truesize;
5313 if (sum_tiny > sk->sk_rcvbuf >> 3)
5314 return;
5315 }
5316 goto new_range;
5317 }
5318
5319 range_truesize += skb->truesize;
5320 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
5321 start = TCP_SKB_CB(skb)->seq;
5322 if (after(TCP_SKB_CB(skb)->end_seq, end))
5323 end = TCP_SKB_CB(skb)->end_seq;
5324 }
5325 }
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338 static bool tcp_prune_ofo_queue(struct sock *sk)
5339 {
5340 struct tcp_sock *tp = tcp_sk(sk);
5341 struct rb_node *node, *prev;
5342 int goal;
5343
5344 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5345 return false;
5346
5347 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5348 goal = sk->sk_rcvbuf >> 3;
5349 node = &tp->ooo_last_skb->rbnode;
5350 do {
5351 prev = rb_prev(node);
5352 rb_erase(node, &tp->out_of_order_queue);
5353 goal -= rb_to_skb(node)->truesize;
5354 tcp_drop_reason(sk, rb_to_skb(node),
5355 SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
5356 if (!prev || goal <= 0) {
5357 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5358 !tcp_under_memory_pressure(sk))
5359 break;
5360 goal = sk->sk_rcvbuf >> 3;
5361 }
5362 node = prev;
5363 } while (node);
5364 tp->ooo_last_skb = rb_to_skb(prev);
5365
5366
5367
5368
5369
5370
5371 if (tp->rx_opt.sack_ok)
5372 tcp_sack_reset(&tp->rx_opt);
5373 return true;
5374 }
5375
5376
5377
5378
5379
5380
5381
5382
5383 static int tcp_prune_queue(struct sock *sk)
5384 {
5385 struct tcp_sock *tp = tcp_sk(sk);
5386
5387 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5388
5389 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5390 tcp_clamp_window(sk);
5391 else if (tcp_under_memory_pressure(sk))
5392 tcp_adjust_rcv_ssthresh(sk);
5393
5394 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5395 return 0;
5396
5397 tcp_collapse_ofo_queue(sk);
5398 if (!skb_queue_empty(&sk->sk_receive_queue))
5399 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5400 skb_peek(&sk->sk_receive_queue),
5401 NULL,
5402 tp->copied_seq, tp->rcv_nxt);
5403
5404 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5405 return 0;
5406
5407
5408
5409
5410 tcp_prune_ofo_queue(sk);
5411
5412 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5413 return 0;
5414
5415
5416
5417
5418
5419 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5420
5421
5422 tp->pred_flags = 0;
5423 return -1;
5424 }
5425
5426 static bool tcp_should_expand_sndbuf(struct sock *sk)
5427 {
5428 const struct tcp_sock *tp = tcp_sk(sk);
5429
5430
5431
5432
5433 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5434 return false;
5435
5436
5437 if (tcp_under_memory_pressure(sk)) {
5438 int unused_mem = sk_unused_reserved_mem(sk);
5439
5440
5441
5442
5443
5444 if (unused_mem > SOCK_MIN_SNDBUF)
5445 WRITE_ONCE(sk->sk_sndbuf, unused_mem);
5446
5447 return false;
5448 }
5449
5450
5451 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5452 return false;
5453
5454
5455 if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
5456 return false;
5457
5458 return true;
5459 }
5460
5461 static void tcp_new_space(struct sock *sk)
5462 {
5463 struct tcp_sock *tp = tcp_sk(sk);
5464
5465 if (tcp_should_expand_sndbuf(sk)) {
5466 tcp_sndbuf_expand(sk);
5467 tp->snd_cwnd_stamp = tcp_jiffies32;
5468 }
5469
5470 INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
5471 }
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483 void tcp_check_space(struct sock *sk)
5484 {
5485
5486 smp_mb();
5487 if (sk->sk_socket &&
5488 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5489 tcp_new_space(sk);
5490 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5491 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5492 }
5493 }
5494
5495 static inline void tcp_data_snd_check(struct sock *sk)
5496 {
5497 tcp_push_pending_frames(sk);
5498 tcp_check_space(sk);
5499 }
5500
5501
5502
5503
5504 static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5505 {
5506 struct tcp_sock *tp = tcp_sk(sk);
5507 unsigned long rtt, delay;
5508
5509
5510 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5511
5512
5513
5514
5515
5516 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5517 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5518
5519 tcp_in_quickack_mode(sk) ||
5520
5521 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5522 send_now:
5523 tcp_send_ack(sk);
5524 return;
5525 }
5526
5527 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5528 tcp_send_delayed_ack(sk);
5529 return;
5530 }
5531
5532 if (!tcp_is_sack(tp) ||
5533 tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
5534 goto send_now;
5535
5536 if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
5537 tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
5538 tp->dup_ack_counter = 0;
5539 }
5540 if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
5541 tp->dup_ack_counter++;
5542 goto send_now;
5543 }
5544 tp->compressed_ack++;
5545 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5546 return;
5547
5548
5549
5550 rtt = tp->rcv_rtt_est.rtt_us;
5551 if (tp->srtt_us && tp->srtt_us < rtt)
5552 rtt = tp->srtt_us;
5553
5554 delay = min_t(unsigned long,
5555 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
5556 rtt * (NSEC_PER_USEC >> 3)/20);
5557 sock_hold(sk);
5558 hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
5559 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
5560 HRTIMER_MODE_REL_PINNED_SOFT);
5561 }
5562
5563 static inline void tcp_ack_snd_check(struct sock *sk)
5564 {
5565 if (!inet_csk_ack_scheduled(sk)) {
5566
5567 return;
5568 }
5569 __tcp_ack_snd_check(sk, 1);
5570 }
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582 static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5583 {
5584 struct tcp_sock *tp = tcp_sk(sk);
5585 u32 ptr = ntohs(th->urg_ptr);
5586
5587 if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
5588 ptr--;
5589 ptr += ntohl(th->seq);
5590
5591
5592 if (after(tp->copied_seq, ptr))
5593 return;
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605 if (before(ptr, tp->rcv_nxt))
5606 return;
5607
5608
5609 if (tp->urg_data && !after(ptr, tp->urg_seq))
5610 return;
5611
5612
5613 sk_send_sigurg(sk);
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5631 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5632 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5633 tp->copied_seq++;
5634 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5635 __skb_unlink(skb, &sk->sk_receive_queue);
5636 __kfree_skb(skb);
5637 }
5638 }
5639
5640 WRITE_ONCE(tp->urg_data, TCP_URG_NOTYET);
5641 WRITE_ONCE(tp->urg_seq, ptr);
5642
5643
5644 tp->pred_flags = 0;
5645 }
5646
5647
5648 static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5649 {
5650 struct tcp_sock *tp = tcp_sk(sk);
5651
5652
5653 if (unlikely(th->urg))
5654 tcp_check_urg(sk, th);
5655
5656
5657 if (unlikely(tp->urg_data == TCP_URG_NOTYET)) {
5658 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5659 th->syn;
5660
5661
5662 if (ptr < skb->len) {
5663 u8 tmp;
5664 if (skb_copy_bits(skb, ptr, &tmp, 1))
5665 BUG();
5666 WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp);
5667 if (!sock_flag(sk, SOCK_DEAD))
5668 sk->sk_data_ready(sk);
5669 }
5670 }
5671 }
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681 static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5682 {
5683 struct tcp_sock *tp = tcp_sk(sk);
5684
5685 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5686 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5687 TCPF_CLOSING));
5688 }
5689
5690
5691
5692
5693 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5694 const struct tcphdr *th, int syn_inerr)
5695 {
5696 struct tcp_sock *tp = tcp_sk(sk);
5697 SKB_DR(reason);
5698
5699
5700 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5701 tp->rx_opt.saw_tstamp &&
5702 tcp_paws_discard(sk, skb)) {
5703 if (!th->rst) {
5704 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5705 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5706 LINUX_MIB_TCPACKSKIPPEDPAWS,
5707 &tp->last_oow_ack_time))
5708 tcp_send_dupack(sk, skb);
5709 SKB_DR_SET(reason, TCP_RFC7323_PAWS);
5710 goto discard;
5711 }
5712
5713 }
5714
5715
5716 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5717
5718
5719
5720
5721
5722
5723 if (!th->rst) {
5724 if (th->syn)
5725 goto syn_challenge;
5726 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5727 LINUX_MIB_TCPACKSKIPPEDSEQ,
5728 &tp->last_oow_ack_time))
5729 tcp_send_dupack(sk, skb);
5730 } else if (tcp_reset_check(sk, skb)) {
5731 goto reset;
5732 }
5733 SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
5734 goto discard;
5735 }
5736
5737
5738 if (th->rst) {
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5749 tcp_reset_check(sk, skb))
5750 goto reset;
5751
5752 if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5753 struct tcp_sack_block *sp = &tp->selective_acks[0];
5754 int max_sack = sp[0].end_seq;
5755 int this_sack;
5756
5757 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5758 ++this_sack) {
5759 max_sack = after(sp[this_sack].end_seq,
5760 max_sack) ?
5761 sp[this_sack].end_seq : max_sack;
5762 }
5763
5764 if (TCP_SKB_CB(skb)->seq == max_sack)
5765 goto reset;
5766 }
5767
5768
5769
5770
5771
5772 if (tp->syn_fastopen && !tp->data_segs_in &&
5773 sk->sk_state == TCP_ESTABLISHED)
5774 tcp_fastopen_active_disable(sk);
5775 tcp_send_challenge_ack(sk);
5776 SKB_DR_SET(reason, TCP_RESET);
5777 goto discard;
5778 }
5779
5780
5781
5782
5783
5784
5785 if (th->syn) {
5786 syn_challenge:
5787 if (syn_inerr)
5788 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5789 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5790 tcp_send_challenge_ack(sk);
5791 SKB_DR_SET(reason, TCP_INVALID_SYN);
5792 goto discard;
5793 }
5794
5795 bpf_skops_parse_hdr(sk, skb);
5796
5797 return true;
5798
5799 discard:
5800 tcp_drop_reason(sk, skb, reason);
5801 return false;
5802
5803 reset:
5804 tcp_reset(sk, skb);
5805 __kfree_skb(skb);
5806 return false;
5807 }
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5833 {
5834 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
5835 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5836 struct tcp_sock *tp = tcp_sk(sk);
5837 unsigned int len = skb->len;
5838
5839
5840 trace_tcp_probe(sk, skb);
5841
5842 tcp_mstamp_refresh(tp);
5843 if (unlikely(!rcu_access_pointer(sk->sk_rx_dst)))
5844 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860 tp->rx_opt.saw_tstamp = 0;
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5872 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5873 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5874 int tcp_header_len = tp->tcp_header_len;
5875
5876
5877
5878
5879
5880
5881
5882 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5883
5884 if (!tcp_parse_aligned_timestamp(tp, th))
5885 goto slow_path;
5886
5887
5888 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5889 goto slow_path;
5890
5891
5892
5893
5894
5895
5896 }
5897
5898 if (len <= tcp_header_len) {
5899
5900 if (len == tcp_header_len) {
5901
5902
5903
5904
5905 if (tcp_header_len ==
5906 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5907 tp->rcv_nxt == tp->rcv_wup)
5908 tcp_store_ts_recent(tp);
5909
5910
5911
5912
5913 tcp_ack(sk, skb, 0);
5914 __kfree_skb(skb);
5915 tcp_data_snd_check(sk);
5916
5917
5918
5919
5920 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5921 return;
5922 } else {
5923 reason = SKB_DROP_REASON_PKT_TOO_SMALL;
5924 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5925 goto discard;
5926 }
5927 } else {
5928 int eaten = 0;
5929 bool fragstolen = false;
5930
5931 if (tcp_checksum_complete(skb))
5932 goto csum_error;
5933
5934 if ((int)skb->truesize > sk->sk_forward_alloc)
5935 goto step5;
5936
5937
5938
5939
5940
5941 if (tcp_header_len ==
5942 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5943 tp->rcv_nxt == tp->rcv_wup)
5944 tcp_store_ts_recent(tp);
5945
5946 tcp_rcv_rtt_measure_ts(sk, skb);
5947
5948 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5949
5950
5951 skb_dst_drop(skb);
5952 __skb_pull(skb, tcp_header_len);
5953 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5954
5955 tcp_event_data_recv(sk, skb);
5956
5957 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5958
5959 tcp_ack(sk, skb, FLAG_DATA);
5960 tcp_data_snd_check(sk);
5961 if (!inet_csk_ack_scheduled(sk))
5962 goto no_ack;
5963 } else {
5964 tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
5965 }
5966
5967 __tcp_ack_snd_check(sk, 0);
5968 no_ack:
5969 if (eaten)
5970 kfree_skb_partial(skb, fragstolen);
5971 tcp_data_ready(sk);
5972 return;
5973 }
5974 }
5975
5976 slow_path:
5977 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5978 goto csum_error;
5979
5980 if (!th->ack && !th->rst && !th->syn) {
5981 reason = SKB_DROP_REASON_TCP_FLAGS;
5982 goto discard;
5983 }
5984
5985
5986
5987
5988
5989 if (!tcp_validate_incoming(sk, skb, th, 1))
5990 return;
5991
5992 step5:
5993 reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
5994 if ((int)reason < 0) {
5995 reason = -reason;
5996 goto discard;
5997 }
5998 tcp_rcv_rtt_measure_ts(sk, skb);
5999
6000
6001 tcp_urg(sk, skb, th);
6002
6003
6004 tcp_data_queue(sk, skb);
6005
6006 tcp_data_snd_check(sk);
6007 tcp_ack_snd_check(sk);
6008 return;
6009
6010 csum_error:
6011 reason = SKB_DROP_REASON_TCP_CSUM;
6012 trace_tcp_bad_csum(skb);
6013 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
6014 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
6015
6016 discard:
6017 tcp_drop_reason(sk, skb, reason);
6018 }
6019 EXPORT_SYMBOL(tcp_rcv_established);
6020
6021 void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
6022 {
6023 struct inet_connection_sock *icsk = inet_csk(sk);
6024 struct tcp_sock *tp = tcp_sk(sk);
6025
6026 tcp_mtup_init(sk);
6027 icsk->icsk_af_ops->rebuild_header(sk);
6028 tcp_init_metrics(sk);
6029
6030
6031
6032
6033
6034
6035
6036 if (tp->total_retrans > 1 && tp->undo_marker)
6037 tcp_snd_cwnd_set(tp, 1);
6038 else
6039 tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
6040 tp->snd_cwnd_stamp = tcp_jiffies32;
6041
6042 bpf_skops_established(sk, bpf_op, skb);
6043
6044 if (!icsk->icsk_ca_initialized)
6045 tcp_init_congestion_control(sk);
6046 tcp_init_buffer_space(sk);
6047 }
6048
6049 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
6050 {
6051 struct tcp_sock *tp = tcp_sk(sk);
6052 struct inet_connection_sock *icsk = inet_csk(sk);
6053
6054 tcp_set_state(sk, TCP_ESTABLISHED);
6055 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
6056
6057 if (skb) {
6058 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
6059 security_inet_conn_established(sk, skb);
6060 sk_mark_napi_id(sk, skb);
6061 }
6062
6063 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
6064
6065
6066
6067
6068 tp->lsndtime = tcp_jiffies32;
6069
6070 if (sock_flag(sk, SOCK_KEEPOPEN))
6071 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
6072
6073 if (!tp->rx_opt.snd_wscale)
6074 __tcp_fast_path_on(tp, tp->snd_wnd);
6075 else
6076 tp->pred_flags = 0;
6077 }
6078
6079 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
6080 struct tcp_fastopen_cookie *cookie)
6081 {
6082 struct tcp_sock *tp = tcp_sk(sk);
6083 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
6084 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
6085 bool syn_drop = false;
6086
6087 if (mss == tp->rx_opt.user_mss) {
6088 struct tcp_options_received opt;
6089
6090
6091 tcp_clear_options(&opt);
6092 opt.user_mss = opt.mss_clamp = 0;
6093 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
6094 mss = opt.mss_clamp;
6095 }
6096
6097 if (!tp->syn_fastopen) {
6098
6099 cookie->len = -1;
6100 } else if (tp->total_retrans) {
6101
6102
6103
6104
6105
6106 syn_drop = (cookie->len < 0 && data);
6107 } else if (cookie->len < 0 && !tp->syn_data) {
6108
6109
6110
6111
6112 try_exp = tp->syn_fastopen_exp ? 2 : 1;
6113 }
6114
6115 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
6116
6117 if (data) {
6118 if (tp->total_retrans)
6119 tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
6120 else
6121 tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
6122 skb_rbtree_walk_from(data)
6123 tcp_mark_skb_lost(sk, data);
6124 tcp_xmit_retransmit_queue(sk);
6125 NET_INC_STATS(sock_net(sk),
6126 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
6127 return true;
6128 }
6129 tp->syn_data_acked = tp->syn_data;
6130 if (tp->syn_data_acked) {
6131 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
6132
6133 if (tp->delivered > 1)
6134 --tp->delivered;
6135 }
6136
6137 tcp_fastopen_add_skb(sk, synack);
6138
6139 return false;
6140 }
6141
6142 static void smc_check_reset_syn(struct tcp_sock *tp)
6143 {
6144 #if IS_ENABLED(CONFIG_SMC)
6145 if (static_branch_unlikely(&tcp_have_smc)) {
6146 if (tp->syn_smc && !tp->rx_opt.smc_ok)
6147 tp->syn_smc = 0;
6148 }
6149 #endif
6150 }
6151
6152 static void tcp_try_undo_spurious_syn(struct sock *sk)
6153 {
6154 struct tcp_sock *tp = tcp_sk(sk);
6155 u32 syn_stamp;
6156
6157
6158
6159
6160
6161 syn_stamp = tp->retrans_stamp;
6162 if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
6163 syn_stamp == tp->rx_opt.rcv_tsecr)
6164 tp->undo_marker = 0;
6165 }
6166
6167 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
6168 const struct tcphdr *th)
6169 {
6170 struct inet_connection_sock *icsk = inet_csk(sk);
6171 struct tcp_sock *tp = tcp_sk(sk);
6172 struct tcp_fastopen_cookie foc = { .len = -1 };
6173 int saved_clamp = tp->rx_opt.mss_clamp;
6174 bool fastopen_fail;
6175 SKB_DR(reason);
6176
6177 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
6178 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
6179 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
6180
6181 if (th->ack) {
6182
6183
6184
6185
6186
6187
6188
6189
6190 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
6191 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
6192
6193 if (icsk->icsk_retransmits == 0)
6194 inet_csk_reset_xmit_timer(sk,
6195 ICSK_TIME_RETRANS,
6196 TCP_TIMEOUT_MIN, TCP_RTO_MAX);
6197 goto reset_and_undo;
6198 }
6199
6200 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
6201 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
6202 tcp_time_stamp(tp))) {
6203 NET_INC_STATS(sock_net(sk),
6204 LINUX_MIB_PAWSACTIVEREJECTED);
6205 goto reset_and_undo;
6206 }
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216 if (th->rst) {
6217 tcp_reset(sk, skb);
6218 consume:
6219 __kfree_skb(skb);
6220 return 0;
6221 }
6222
6223
6224
6225
6226
6227
6228
6229
6230 if (!th->syn) {
6231 SKB_DR_SET(reason, TCP_FLAGS);
6232 goto discard_and_undo;
6233 }
6234
6235
6236
6237
6238
6239
6240
6241 tcp_ecn_rcv_synack(tp, th);
6242
6243 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6244 tcp_try_undo_spurious_syn(sk);
6245 tcp_ack(sk, skb, FLAG_SLOWPATH);
6246
6247
6248
6249
6250 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6251 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6252
6253
6254
6255
6256 tp->snd_wnd = ntohs(th->window);
6257
6258 if (!tp->rx_opt.wscale_ok) {
6259 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
6260 tp->window_clamp = min(tp->window_clamp, 65535U);
6261 }
6262
6263 if (tp->rx_opt.saw_tstamp) {
6264 tp->rx_opt.tstamp_ok = 1;
6265 tp->tcp_header_len =
6266 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6267 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6268 tcp_store_ts_recent(tp);
6269 } else {
6270 tp->tcp_header_len = sizeof(struct tcphdr);
6271 }
6272
6273 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6274 tcp_initialize_rcv_mss(sk);
6275
6276
6277
6278
6279 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6280
6281 smc_check_reset_syn(tp);
6282
6283 smp_mb();
6284
6285 tcp_finish_connect(sk, skb);
6286
6287 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
6288 tcp_rcv_fastopen_synack(sk, skb, &foc);
6289
6290 if (!sock_flag(sk, SOCK_DEAD)) {
6291 sk->sk_state_change(sk);
6292 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6293 }
6294 if (fastopen_fail)
6295 return -1;
6296 if (sk->sk_write_pending ||
6297 icsk->icsk_accept_queue.rskq_defer_accept ||
6298 inet_csk_in_pingpong_mode(sk)) {
6299
6300
6301
6302
6303
6304
6305
6306 inet_csk_schedule_ack(sk);
6307 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
6308 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
6309 TCP_DELACK_MAX, TCP_RTO_MAX);
6310 goto consume;
6311 }
6312 tcp_send_ack(sk);
6313 return -1;
6314 }
6315
6316
6317
6318 if (th->rst) {
6319
6320
6321
6322
6323
6324 SKB_DR_SET(reason, TCP_RESET);
6325 goto discard_and_undo;
6326 }
6327
6328
6329 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
6330 tcp_paws_reject(&tp->rx_opt, 0)) {
6331 SKB_DR_SET(reason, TCP_RFC7323_PAWS);
6332 goto discard_and_undo;
6333 }
6334 if (th->syn) {
6335
6336
6337
6338
6339 tcp_set_state(sk, TCP_SYN_RECV);
6340
6341 if (tp->rx_opt.saw_tstamp) {
6342 tp->rx_opt.tstamp_ok = 1;
6343 tcp_store_ts_recent(tp);
6344 tp->tcp_header_len =
6345 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6346 } else {
6347 tp->tcp_header_len = sizeof(struct tcphdr);
6348 }
6349
6350 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6351 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6352 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6353
6354
6355
6356
6357 tp->snd_wnd = ntohs(th->window);
6358 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
6359 tp->max_window = tp->snd_wnd;
6360
6361 tcp_ecn_rcv_syn(tp, th);
6362
6363 tcp_mtup_init(sk);
6364 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6365 tcp_initialize_rcv_mss(sk);
6366
6367 tcp_send_synack(sk);
6368 #if 0
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380 return -1;
6381 #else
6382 goto consume;
6383 #endif
6384 }
6385
6386
6387
6388
6389 discard_and_undo:
6390 tcp_clear_options(&tp->rx_opt);
6391 tp->rx_opt.mss_clamp = saved_clamp;
6392 tcp_drop_reason(sk, skb, reason);
6393 return 0;
6394
6395 reset_and_undo:
6396 tcp_clear_options(&tp->rx_opt);
6397 tp->rx_opt.mss_clamp = saved_clamp;
6398 return 1;
6399 }
6400
6401 static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
6402 {
6403 struct request_sock *req;
6404
6405
6406
6407
6408 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
6409 tcp_try_undo_loss(sk, false);
6410
6411
6412 tcp_sk(sk)->retrans_stamp = 0;
6413 inet_csk(sk)->icsk_retransmits = 0;
6414
6415
6416
6417
6418 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
6419 lockdep_sock_is_held(sk));
6420 reqsk_fastopen_remove(sk, req, false);
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430 tcp_rearm_rto(sk);
6431 }
6432
6433
6434
6435
6436
6437
6438
6439
6440 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6441 {
6442 struct tcp_sock *tp = tcp_sk(sk);
6443 struct inet_connection_sock *icsk = inet_csk(sk);
6444 const struct tcphdr *th = tcp_hdr(skb);
6445 struct request_sock *req;
6446 int queued = 0;
6447 bool acceptable;
6448 SKB_DR(reason);
6449
6450 switch (sk->sk_state) {
6451 case TCP_CLOSE:
6452 SKB_DR_SET(reason, TCP_CLOSE);
6453 goto discard;
6454
6455 case TCP_LISTEN:
6456 if (th->ack)
6457 return 1;
6458
6459 if (th->rst) {
6460 SKB_DR_SET(reason, TCP_RESET);
6461 goto discard;
6462 }
6463 if (th->syn) {
6464 if (th->fin) {
6465 SKB_DR_SET(reason, TCP_FLAGS);
6466 goto discard;
6467 }
6468
6469
6470
6471 rcu_read_lock();
6472 local_bh_disable();
6473 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6474 local_bh_enable();
6475 rcu_read_unlock();
6476
6477 if (!acceptable)
6478 return 1;
6479 consume_skb(skb);
6480 return 0;
6481 }
6482 SKB_DR_SET(reason, TCP_FLAGS);
6483 goto discard;
6484
6485 case TCP_SYN_SENT:
6486 tp->rx_opt.saw_tstamp = 0;
6487 tcp_mstamp_refresh(tp);
6488 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6489 if (queued >= 0)
6490 return queued;
6491
6492
6493 tcp_urg(sk, skb, th);
6494 __kfree_skb(skb);
6495 tcp_data_snd_check(sk);
6496 return 0;
6497 }
6498
6499 tcp_mstamp_refresh(tp);
6500 tp->rx_opt.saw_tstamp = 0;
6501 req = rcu_dereference_protected(tp->fastopen_rsk,
6502 lockdep_sock_is_held(sk));
6503 if (req) {
6504 bool req_stolen;
6505
6506 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6507 sk->sk_state != TCP_FIN_WAIT1);
6508
6509 if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
6510 SKB_DR_SET(reason, TCP_FASTOPEN);
6511 goto discard;
6512 }
6513 }
6514
6515 if (!th->ack && !th->rst && !th->syn) {
6516 SKB_DR_SET(reason, TCP_FLAGS);
6517 goto discard;
6518 }
6519 if (!tcp_validate_incoming(sk, skb, th, 0))
6520 return 0;
6521
6522
6523 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6524 FLAG_UPDATE_TS_RECENT |
6525 FLAG_NO_CHALLENGE_ACK) > 0;
6526
6527 if (!acceptable) {
6528 if (sk->sk_state == TCP_SYN_RECV)
6529 return 1;
6530 tcp_send_challenge_ack(sk);
6531 SKB_DR_SET(reason, TCP_OLD_ACK);
6532 goto discard;
6533 }
6534 switch (sk->sk_state) {
6535 case TCP_SYN_RECV:
6536 tp->delivered++;
6537 if (!tp->srtt_us)
6538 tcp_synack_rtt_meas(sk, req);
6539
6540 if (req) {
6541 tcp_rcv_synrecv_state_fastopen(sk);
6542 } else {
6543 tcp_try_undo_spurious_syn(sk);
6544 tp->retrans_stamp = 0;
6545 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
6546 skb);
6547 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6548 }
6549 smp_mb();
6550 tcp_set_state(sk, TCP_ESTABLISHED);
6551 sk->sk_state_change(sk);
6552
6553
6554
6555
6556
6557 if (sk->sk_socket)
6558 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6559
6560 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6561 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6562 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6563
6564 if (tp->rx_opt.tstamp_ok)
6565 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6566
6567 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6568 tcp_update_pacing_rate(sk);
6569
6570
6571 tp->lsndtime = tcp_jiffies32;
6572
6573 tcp_initialize_rcv_mss(sk);
6574 tcp_fast_path_on(tp);
6575 break;
6576
6577 case TCP_FIN_WAIT1: {
6578 int tmo;
6579
6580 if (req)
6581 tcp_rcv_synrecv_state_fastopen(sk);
6582
6583 if (tp->snd_una != tp->write_seq)
6584 break;
6585
6586 tcp_set_state(sk, TCP_FIN_WAIT2);
6587 sk->sk_shutdown |= SEND_SHUTDOWN;
6588
6589 sk_dst_confirm(sk);
6590
6591 if (!sock_flag(sk, SOCK_DEAD)) {
6592
6593 sk->sk_state_change(sk);
6594 break;
6595 }
6596
6597 if (tp->linger2 < 0) {
6598 tcp_done(sk);
6599 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6600 return 1;
6601 }
6602 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6603 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6604
6605 if (tp->syn_fastopen && th->fin)
6606 tcp_fastopen_active_disable(sk);
6607 tcp_done(sk);
6608 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6609 return 1;
6610 }
6611
6612 tmo = tcp_fin_time(sk);
6613 if (tmo > TCP_TIMEWAIT_LEN) {
6614 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6615 } else if (th->fin || sock_owned_by_user(sk)) {
6616
6617
6618
6619
6620
6621
6622 inet_csk_reset_keepalive_timer(sk, tmo);
6623 } else {
6624 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6625 goto consume;
6626 }
6627 break;
6628 }
6629
6630 case TCP_CLOSING:
6631 if (tp->snd_una == tp->write_seq) {
6632 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6633 goto consume;
6634 }
6635 break;
6636
6637 case TCP_LAST_ACK:
6638 if (tp->snd_una == tp->write_seq) {
6639 tcp_update_metrics(sk);
6640 tcp_done(sk);
6641 goto consume;
6642 }
6643 break;
6644 }
6645
6646
6647 tcp_urg(sk, skb, th);
6648
6649
6650 switch (sk->sk_state) {
6651 case TCP_CLOSE_WAIT:
6652 case TCP_CLOSING:
6653 case TCP_LAST_ACK:
6654 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
6655
6656
6657
6658 if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
6659 goto discard;
6660 break;
6661 }
6662 fallthrough;
6663 case TCP_FIN_WAIT1:
6664 case TCP_FIN_WAIT2:
6665
6666
6667
6668
6669 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6670 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6671 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6672 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6673 tcp_reset(sk, skb);
6674 return 1;
6675 }
6676 }
6677 fallthrough;
6678 case TCP_ESTABLISHED:
6679 tcp_data_queue(sk, skb);
6680 queued = 1;
6681 break;
6682 }
6683
6684
6685 if (sk->sk_state != TCP_CLOSE) {
6686 tcp_data_snd_check(sk);
6687 tcp_ack_snd_check(sk);
6688 }
6689
6690 if (!queued) {
6691 discard:
6692 tcp_drop_reason(sk, skb, reason);
6693 }
6694 return 0;
6695
6696 consume:
6697 __kfree_skb(skb);
6698 return 0;
6699 }
6700 EXPORT_SYMBOL(tcp_rcv_state_process);
6701
6702 static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6703 {
6704 struct inet_request_sock *ireq = inet_rsk(req);
6705
6706 if (family == AF_INET)
6707 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6708 &ireq->ir_rmt_addr, port);
6709 #if IS_ENABLED(CONFIG_IPV6)
6710 else if (family == AF_INET6)
6711 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6712 &ireq->ir_v6_rmt_addr, port);
6713 #endif
6714 }
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733 static void tcp_ecn_create_request(struct request_sock *req,
6734 const struct sk_buff *skb,
6735 const struct sock *listen_sk,
6736 const struct dst_entry *dst)
6737 {
6738 const struct tcphdr *th = tcp_hdr(skb);
6739 const struct net *net = sock_net(listen_sk);
6740 bool th_ecn = th->ece && th->cwr;
6741 bool ect, ecn_ok;
6742 u32 ecn_ok_dst;
6743
6744 if (!th_ecn)
6745 return;
6746
6747 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6748 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6749 ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
6750
6751 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6752 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6753 tcp_bpf_ca_needs_ecn((struct sock *)req))
6754 inet_rsk(req)->ecn_ok = 1;
6755 }
6756
6757 static void tcp_openreq_init(struct request_sock *req,
6758 const struct tcp_options_received *rx_opt,
6759 struct sk_buff *skb, const struct sock *sk)
6760 {
6761 struct inet_request_sock *ireq = inet_rsk(req);
6762
6763 req->rsk_rcv_wnd = 0;
6764 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6765 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6766 tcp_rsk(req)->snt_synack = 0;
6767 tcp_rsk(req)->last_oow_ack_time = 0;
6768 req->mss = rx_opt->mss_clamp;
6769 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6770 ireq->tstamp_ok = rx_opt->tstamp_ok;
6771 ireq->sack_ok = rx_opt->sack_ok;
6772 ireq->snd_wscale = rx_opt->snd_wscale;
6773 ireq->wscale_ok = rx_opt->wscale_ok;
6774 ireq->acked = 0;
6775 ireq->ecn_ok = 0;
6776 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6777 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6778 ireq->ir_mark = inet_request_mark(sk, skb);
6779 #if IS_ENABLED(CONFIG_SMC)
6780 ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_hs_congested &&
6781 tcp_sk(sk)->smc_hs_congested(sk));
6782 #endif
6783 }
6784
6785 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6786 struct sock *sk_listener,
6787 bool attach_listener)
6788 {
6789 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6790 attach_listener);
6791
6792 if (req) {
6793 struct inet_request_sock *ireq = inet_rsk(req);
6794
6795 ireq->ireq_opt = NULL;
6796 #if IS_ENABLED(CONFIG_IPV6)
6797 ireq->pktopts = NULL;
6798 #endif
6799 atomic64_set(&ireq->ir_cookie, 0);
6800 ireq->ireq_state = TCP_NEW_SYN_RECV;
6801 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6802 ireq->ireq_family = sk_listener->sk_family;
6803 req->timeout = TCP_TIMEOUT_INIT;
6804 }
6805
6806 return req;
6807 }
6808 EXPORT_SYMBOL(inet_reqsk_alloc);
6809
6810
6811
6812
6813 static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
6814 {
6815 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6816 const char *msg = "Dropping request";
6817 struct net *net = sock_net(sk);
6818 bool want_cookie = false;
6819 u8 syncookies;
6820
6821 syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
6822
6823 #ifdef CONFIG_SYN_COOKIES
6824 if (syncookies) {
6825 msg = "Sending cookies";
6826 want_cookie = true;
6827 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6828 } else
6829 #endif
6830 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6831
6832 if (!queue->synflood_warned && syncookies != 2 &&
6833 xchg(&queue->synflood_warned, 1) == 0)
6834 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6835 proto, sk->sk_num, msg);
6836
6837 return want_cookie;
6838 }
6839
6840 static void tcp_reqsk_record_syn(const struct sock *sk,
6841 struct request_sock *req,
6842 const struct sk_buff *skb)
6843 {
6844 if (tcp_sk(sk)->save_syn) {
6845 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6846 struct saved_syn *saved_syn;
6847 u32 mac_hdrlen;
6848 void *base;
6849
6850 if (tcp_sk(sk)->save_syn == 2) {
6851 base = skb_mac_header(skb);
6852 mac_hdrlen = skb_mac_header_len(skb);
6853 len += mac_hdrlen;
6854 } else {
6855 base = skb_network_header(skb);
6856 mac_hdrlen = 0;
6857 }
6858
6859 saved_syn = kmalloc(struct_size(saved_syn, data, len),
6860 GFP_ATOMIC);
6861 if (saved_syn) {
6862 saved_syn->mac_hdrlen = mac_hdrlen;
6863 saved_syn->network_hdrlen = skb_network_header_len(skb);
6864 saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
6865 memcpy(saved_syn->data, base, len);
6866 req->saved_syn = saved_syn;
6867 }
6868 }
6869 }
6870
6871
6872
6873
6874 u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
6875 const struct tcp_request_sock_ops *af_ops,
6876 struct sock *sk, struct tcphdr *th)
6877 {
6878 struct tcp_sock *tp = tcp_sk(sk);
6879 u16 mss;
6880
6881 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
6882 !inet_csk_reqsk_queue_is_full(sk))
6883 return 0;
6884
6885 if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
6886 return 0;
6887
6888 if (sk_acceptq_is_full(sk)) {
6889 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6890 return 0;
6891 }
6892
6893 mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
6894 if (!mss)
6895 mss = af_ops->mss_clamp;
6896
6897 return mss;
6898 }
6899 EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
6900
6901 int tcp_conn_request(struct request_sock_ops *rsk_ops,
6902 const struct tcp_request_sock_ops *af_ops,
6903 struct sock *sk, struct sk_buff *skb)
6904 {
6905 struct tcp_fastopen_cookie foc = { .len = -1 };
6906 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6907 struct tcp_options_received tmp_opt;
6908 struct tcp_sock *tp = tcp_sk(sk);
6909 struct net *net = sock_net(sk);
6910 struct sock *fastopen_sk = NULL;
6911 struct request_sock *req;
6912 bool want_cookie = false;
6913 struct dst_entry *dst;
6914 struct flowi fl;
6915 u8 syncookies;
6916
6917 syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
6918
6919
6920
6921
6922
6923 if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6924 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
6925 if (!want_cookie)
6926 goto drop;
6927 }
6928
6929 if (sk_acceptq_is_full(sk)) {
6930 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6931 goto drop;
6932 }
6933
6934 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6935 if (!req)
6936 goto drop;
6937
6938 req->syncookie = want_cookie;
6939 tcp_rsk(req)->af_specific = af_ops;
6940 tcp_rsk(req)->ts_off = 0;
6941 #if IS_ENABLED(CONFIG_MPTCP)
6942 tcp_rsk(req)->is_mptcp = 0;
6943 #endif
6944
6945 tcp_clear_options(&tmp_opt);
6946 tmp_opt.mss_clamp = af_ops->mss_clamp;
6947 tmp_opt.user_mss = tp->rx_opt.user_mss;
6948 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6949 want_cookie ? NULL : &foc);
6950
6951 if (want_cookie && !tmp_opt.saw_tstamp)
6952 tcp_clear_options(&tmp_opt);
6953
6954 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6955 tmp_opt.smc_ok = 0;
6956
6957 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6958 tcp_openreq_init(req, &tmp_opt, skb, sk);
6959 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6960
6961
6962 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6963
6964 dst = af_ops->route_req(sk, skb, &fl, req);
6965 if (!dst)
6966 goto drop_and_free;
6967
6968 if (tmp_opt.tstamp_ok)
6969 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6970
6971 if (!want_cookie && !isn) {
6972 int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
6973
6974
6975 if (!syncookies &&
6976 (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6977 (max_syn_backlog >> 2)) &&
6978 !tcp_peer_is_proven(req, dst)) {
6979
6980
6981
6982
6983
6984
6985
6986 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6987 rsk_ops->family);
6988 goto drop_and_release;
6989 }
6990
6991 isn = af_ops->init_seq(skb);
6992 }
6993
6994 tcp_ecn_create_request(req, skb, sk, dst);
6995
6996 if (want_cookie) {
6997 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6998 if (!tmp_opt.tstamp_ok)
6999 inet_rsk(req)->ecn_ok = 0;
7000 }
7001
7002 tcp_rsk(req)->snt_isn = isn;
7003 tcp_rsk(req)->txhash = net_tx_rndhash();
7004 tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
7005 tcp_openreq_init_rwin(req, sk, dst);
7006 sk_rx_queue_set(req_to_sk(req), skb);
7007 if (!want_cookie) {
7008 tcp_reqsk_record_syn(sk, req, skb);
7009 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
7010 }
7011 if (fastopen_sk) {
7012 af_ops->send_synack(fastopen_sk, dst, &fl, req,
7013 &foc, TCP_SYNACK_FASTOPEN, skb);
7014
7015 if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
7016 reqsk_fastopen_remove(fastopen_sk, req, false);
7017 bh_unlock_sock(fastopen_sk);
7018 sock_put(fastopen_sk);
7019 goto drop_and_free;
7020 }
7021 sk->sk_data_ready(sk);
7022 bh_unlock_sock(fastopen_sk);
7023 sock_put(fastopen_sk);
7024 } else {
7025 tcp_rsk(req)->tfo_listener = false;
7026 if (!want_cookie) {
7027 req->timeout = tcp_timeout_init((struct sock *)req);
7028 inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
7029 }
7030 af_ops->send_synack(sk, dst, &fl, req, &foc,
7031 !want_cookie ? TCP_SYNACK_NORMAL :
7032 TCP_SYNACK_COOKIE,
7033 skb);
7034 if (want_cookie) {
7035 reqsk_free(req);
7036 return 0;
7037 }
7038 }
7039 reqsk_put(req);
7040 return 0;
7041
7042 drop_and_release:
7043 dst_release(dst);
7044 drop_and_free:
7045 __reqsk_free(req);
7046 drop:
7047 tcp_listendrop(sk);
7048 return 0;
7049 }
7050 EXPORT_SYMBOL(tcp_conn_request);