net/ipv4/tcp_vegas.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * TCP Vegas congestion control
0004  *
0005  * This is based on the congestion detection/avoidance scheme described in
0006  *    Lawrence S. Brakmo and Larry L. Peterson.
0007  *    "TCP Vegas: End to end congestion avoidance on a global internet."
0008  *    IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
0009  *    October 1995. Available from:
0010  *  ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
0011  *
0012  * See http://www.cs.arizona.edu/xkernel/ for their implementation.
0013  * The main aspects that distinguish this implementation from the
0014  * Arizona Vegas implementation are:
0015  *   o We do not change the loss detection or recovery mechanisms of
0016  *     Linux in any way. Linux already recovers from losses quite well,
0017  *     using fine-grained timers, NewReno, and FACK.
0018  *   o To avoid the performance penalty imposed by increasing cwnd
0019  *     only every-other RTT during slow start, we increase during
0020  *     every RTT during slow start, just like Reno.
0021  *   o Largely to allow continuous cwnd growth during slow start,
0022  *     we use the rate at which ACKs come back as the "actual"
0023  *     rate, rather than the rate at which data is sent.
0024  *   o To speed convergence to the right rate, we set the cwnd
0025  *     to achieve the right ("actual") rate when we exit slow start.
0026  *   o To filter out the noise caused by delayed ACKs, we use the
0027  *     minimum RTT sample observed during the last RTT to calculate
0028  *     the actual rate.
0029  *   o When the sender re-starts from idle, it waits until it has
0030  *     received ACKs for an entire flight of new data before making
0031  *     a cwnd adjustment decision. The original Vegas implementation
0032  *     assumed senders never went idle.
0033  */
0034
0035 #include <linux/mm.h>
0036 #include <linux/module.h>
0037 #include <linux/skbuff.h>
0038 #include <linux/inet_diag.h>
0039
0040 #include <net/tcp.h>
0041
0042 #include "tcp_vegas.h"
0043
0044 static int alpha = 2;
0045 static int beta  = 4;
0046 static int gamma = 1;
0047
0048 module_param(alpha, int, 0644);
0049 MODULE_PARM_DESC(alpha, "lower bound of packets in network");
0050 module_param(beta, int, 0644);
0051 MODULE_PARM_DESC(beta, "upper bound of packets in network");
0052 module_param(gamma, int, 0644);
0053 MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
0054
0055 /* There are several situations when we must "re-start" Vegas:
0056  *
0057  *  o when a connection is established
0058  *  o after an RTO
0059  *  o after fast recovery
0060  *  o when we send a packet and there is no outstanding
0061  *    unacknowledged data (restarting an idle connection)
0062  *
0063  * In these circumstances we cannot do a Vegas calculation at the
0064  * end of the first RTT, because any calculation we do is using
0065  * stale info -- both the saved cwnd and congestion feedback are
0066  * stale.
0067  *
0068  * Instead we must wait until the completion of an RTT during
0069  * which we actually receive ACKs.
0070  */
0071 static void vegas_enable(struct sock *sk)
0072 {
0073     const struct tcp_sock *tp = tcp_sk(sk);
0074     struct vegas *vegas = inet_csk_ca(sk);
0075
0076     /* Begin taking Vegas samples next time we send something. */
0077     vegas->doing_vegas_now = 1;
0078
0079     /* Set the beginning of the next send window. */
0080     vegas->beg_snd_nxt = tp->snd_nxt;
0081
0082     vegas->cntRTT = 0;
0083     vegas->minRTT = 0x7fffffff;
0084 }
0085
0086 /* Stop taking Vegas samples for now. */
0087 static inline void vegas_disable(struct sock *sk)
0088 {
0089     struct vegas *vegas = inet_csk_ca(sk);
0090
0091     vegas->doing_vegas_now = 0;
0092 }
0093
0094 void tcp_vegas_init(struct sock *sk)
0095 {
0096     struct vegas *vegas = inet_csk_ca(sk);
0097
0098     vegas->baseRTT = 0x7fffffff;
0099     vegas_enable(sk);
0100 }
0101 EXPORT_SYMBOL_GPL(tcp_vegas_init);
0102
0103 /* Do RTT sampling needed for Vegas.
0104  * Basically we:
0105  *   o min-filter RTT samples from within an RTT to get the current
0106  *     propagation delay + queuing delay (we are min-filtering to try to
0107  *     avoid the effects of delayed ACKs)
0108  *   o min-filter RTT samples from a much longer window (forever for now)
0109  *     to find the propagation delay (baseRTT)
0110  */
0111 void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample)
0112 {
0113     struct vegas *vegas = inet_csk_ca(sk);
0114     u32 vrtt;
0115
0116     if (sample->rtt_us < 0)
0117         return;
0118
0119     /* Never allow zero rtt or baseRTT */
0120     vrtt = sample->rtt_us + 1;
0121
0122     /* Filter to find propagation delay: */
0123     if (vrtt < vegas->baseRTT)
0124         vegas->baseRTT = vrtt;
0125
0126     /* Find the min RTT during the last RTT to find
0127      * the current prop. delay + queuing delay:
0128      */
0129     vegas->minRTT = min(vegas->minRTT, vrtt);
0130     vegas->cntRTT++;
0131 }
0132 EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
0133
0134 void tcp_vegas_state(struct sock *sk, u8 ca_state)
0135 {
0136     if (ca_state == TCP_CA_Open)
0137         vegas_enable(sk);
0138     else
0139         vegas_disable(sk);
0140 }
0141 EXPORT_SYMBOL_GPL(tcp_vegas_state);
0142
0143 /*
0144  * If the connection is idle and we are restarting,
0145  * then we don't want to do any Vegas calculations
0146  * until we get fresh RTT samples.  So when we
0147  * restart, we reset our Vegas state to a clean
0148  * slate. After we get acks for this flight of
0149  * packets, _then_ we can make Vegas calculations
0150  * again.
0151  */
0152 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
0153 {
0154     if (event == CA_EVENT_CWND_RESTART ||
0155         event == CA_EVENT_TX_START)
0156         tcp_vegas_init(sk);
0157 }
0158 EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
0159
0160 static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
0161 {
0162     return  min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
0163 }
0164
0165 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
0166 {
0167     struct tcp_sock *tp = tcp_sk(sk);
0168     struct vegas *vegas = inet_csk_ca(sk);
0169
0170     if (!vegas->doing_vegas_now) {
0171         tcp_reno_cong_avoid(sk, ack, acked);
0172         return;
0173     }
0174
0175     if (after(ack, vegas->beg_snd_nxt)) {
0176         /* Do the Vegas once-per-RTT cwnd adjustment. */
0177
0178         /* Save the extent of the current window so we can use this
0179          * at the end of the next RTT.
0180          */
0181         vegas->beg_snd_nxt  = tp->snd_nxt;
0182
0183         /* We do the Vegas calculations only if we got enough RTT
0184          * samples that we can be reasonably sure that we got
0185          * at least one RTT sample that wasn't from a delayed ACK.
0186          * If we only had 2 samples total,
0187          * then that means we're getting only 1 ACK per RTT, which
0188          * means they're almost certainly delayed ACKs.
0189          * If  we have 3 samples, we should be OK.
0190          */
0191
0192         if (vegas->cntRTT <= 2) {
0193             /* We don't have enough RTT samples to do the Vegas
0194              * calculation, so we'll behave like Reno.
0195              */
0196             tcp_reno_cong_avoid(sk, ack, acked);
0197         } else {
0198             u32 rtt, diff;
0199             u64 target_cwnd;
0200
0201             /* We have enough RTT samples, so, using the Vegas
0202              * algorithm, we determine if we should increase or
0203              * decrease cwnd, and by how much.
0204              */
0205
0206             /* Pluck out the RTT we are using for the Vegas
0207              * calculations. This is the min RTT seen during the
0208              * last RTT. Taking the min filters out the effects
0209              * of delayed ACKs, at the cost of noticing congestion
0210              * a bit later.
0211              */
0212             rtt = vegas->minRTT;
0213
0214             /* Calculate the cwnd we should have, if we weren't
0215              * going too fast.
0216              *
0217              * This is:
0218              *     (actual rate in segments) * baseRTT
0219              */
0220             target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
0221             do_div(target_cwnd, rtt);
0222
0223             /* Calculate the difference between the window we had,
0224              * and the window we would like to have. This quantity
0225              * is the "Diff" from the Arizona Vegas papers.
0226              */
0227             diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
0228
0229             if (diff > gamma && tcp_in_slow_start(tp)) {
0230                 /* Going too fast. Time to slow down
0231                  * and switch to congestion avoidance.
0232                  */
0233
0234                 /* Set cwnd to match the actual rate
0235                  * exactly:
0236                  *   cwnd = (actual rate) * baseRTT
0237                  * Then we add 1 because the integer
0238                  * truncation robs us of full link
0239                  * utilization.
0240                  */
0241                 tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
0242                              (u32)target_cwnd + 1));
0243                 tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
0244
0245             } else if (tcp_in_slow_start(tp)) {
0246                 /* Slow start.  */
0247                 tcp_slow_start(tp, acked);
0248             } else {
0249                 /* Congestion avoidance. */
0250
0251                 /* Figure out where we would like cwnd
0252                  * to be.
0253                  */
0254                 if (diff > beta) {
0255                     /* The old window was too fast, so
0256                      * we slow down.
0257                      */
0258                     tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
0259                     tp->snd_ssthresh
0260                         = tcp_vegas_ssthresh(tp);
0261                 } else if (diff < alpha) {
0262                     /* We don't have enough extra packets
0263                      * in the network, so speed up.
0264                      */
0265                     tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
0266                 } else {
0267                     /* Sending just as fast as we
0268                      * should be.
0269                      */
0270                 }
0271             }
0272
0273             if (tcp_snd_cwnd(tp) < 2)
0274                 tcp_snd_cwnd_set(tp, 2);
0275             else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
0276                 tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
0277
0278             tp->snd_ssthresh = tcp_current_ssthresh(sk);
0279         }
0280
0281         /* Wipe the slate clean for the next RTT. */
0282         vegas->cntRTT = 0;
0283         vegas->minRTT = 0x7fffffff;
0284     }
0285     /* Use normal slow start */
0286     else if (tcp_in_slow_start(tp))
0287         tcp_slow_start(tp, acked);
0288 }
0289
0290 /* Extract info for Tcp socket info provided via netlink. */
0291 size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
0292               union tcp_cc_info *info)
0293 {
0294     const struct vegas *ca = inet_csk_ca(sk);
0295
0296     if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
0297         info->vegas.tcpv_enabled = ca->doing_vegas_now;
0298         info->vegas.tcpv_rttcnt = ca->cntRTT;
0299         info->vegas.tcpv_rtt = ca->baseRTT;
0300         info->vegas.tcpv_minrtt = ca->minRTT;
0301
0302         *attr = INET_DIAG_VEGASINFO;
0303         return sizeof(struct tcpvegas_info);
0304     }
0305     return 0;
0306 }
0307 EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
0308
0309 static struct tcp_congestion_ops tcp_vegas __read_mostly = {
0310     .init       = tcp_vegas_init,
0311     .ssthresh   = tcp_reno_ssthresh,
0312     .undo_cwnd  = tcp_reno_undo_cwnd,
0313     .cong_avoid = tcp_vegas_cong_avoid,
0314     .pkts_acked = tcp_vegas_pkts_acked,
0315     .set_state  = tcp_vegas_state,
0316     .cwnd_event = tcp_vegas_cwnd_event,
0317     .get_info   = tcp_vegas_get_info,
0318
0319     .owner      = THIS_MODULE,
0320     .name       = "vegas",
0321 };
0322
0323 static int __init tcp_vegas_register(void)
0324 {
0325     BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
0326     tcp_register_congestion_control(&tcp_vegas);
0327     return 0;
0328 }
0329
0330 static void __exit tcp_vegas_unregister(void)
0331 {
0332     tcp_unregister_congestion_control(&tcp_vegas);
0333 }
0334
0335 module_init(tcp_vegas_register);
0336 module_exit(tcp_vegas_unregister);
0337
0338 MODULE_AUTHOR("Stephen Hemminger");
0339 MODULE_LICENSE("GPL");
0340 MODULE_DESCRIPTION("TCP Vegas");