Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * TCP Low Priority (TCP-LP)
0004  *
0005  * TCP Low Priority is a distributed algorithm whose goal is to utilize only
0006  *   the excess network bandwidth as compared to the ``fair share`` of
0007  *   bandwidth as targeted by TCP.
0008  *
0009  * As of 2.6.13, Linux supports pluggable congestion control algorithms.
0010  * Due to the limitation of the API, we take the following changes from
0011  * the original TCP-LP implementation:
0012  *   o We use newReno in most core CA handling. Only add some checking
0013  *     within cong_avoid.
0014  *   o Error correcting in remote HZ, therefore remote HZ will be keeped
0015  *     on checking and updating.
0016  *   o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since
0017  *     OWD have a similar meaning as RTT. Also correct the buggy formular.
0018  *   o Handle reaction for Early Congestion Indication (ECI) within
0019  *     pkts_acked, as mentioned within pseudo code.
0020  *   o OWD is handled in relative format, where local time stamp will in
0021  *     tcp_time_stamp format.
0022  *
0023  * Original Author:
0024  *   Aleksandar Kuzmanovic <akuzma@northwestern.edu>
0025  * Available from:
0026  *   http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
0027  * Original implementation for 2.4.19:
0028  *   http://www-ece.rice.edu/networks/TCP-LP/
0029  *
0030  * 2.6.x module Authors:
0031  *   Wong Hoi Sing, Edison <hswong3i@gmail.com>
0032  *   Hung Hing Lun, Mike <hlhung3i@gmail.com>
0033  * SourceForge project page:
0034  *   http://tcp-lp-mod.sourceforge.net/
0035  */
0036 
0037 #include <linux/module.h>
0038 #include <net/tcp.h>
0039 
0040 /* resolution of owd */
0041 #define LP_RESOL       TCP_TS_HZ
0042 
0043 /**
0044  * enum tcp_lp_state
0045  * @LP_VALID_RHZ: is remote HZ valid?
0046  * @LP_VALID_OWD: is OWD valid?
0047  * @LP_WITHIN_THR: are we within threshold?
0048  * @LP_WITHIN_INF: are we within inference?
0049  *
0050  * TCP-LP's state flags.
0051  * We create this set of state flag mainly for debugging.
0052  */
0053 enum tcp_lp_state {
0054     LP_VALID_RHZ = (1 << 0),
0055     LP_VALID_OWD = (1 << 1),
0056     LP_WITHIN_THR = (1 << 3),
0057     LP_WITHIN_INF = (1 << 4),
0058 };
0059 
0060 /**
0061  * struct lp
0062  * @flag: TCP-LP state flag
0063  * @sowd: smoothed OWD << 3
0064  * @owd_min: min OWD
0065  * @owd_max: max OWD
0066  * @owd_max_rsv: reserved max owd
0067  * @remote_hz: estimated remote HZ
0068  * @remote_ref_time: remote reference time
0069  * @local_ref_time: local reference time
0070  * @last_drop: time for last active drop
0071  * @inference: current inference
0072  *
0073  * TCP-LP's private struct.
0074  * We get the idea from original TCP-LP implementation where only left those we
0075  * found are really useful.
0076  */
0077 struct lp {
0078     u32 flag;
0079     u32 sowd;
0080     u32 owd_min;
0081     u32 owd_max;
0082     u32 owd_max_rsv;
0083     u32 remote_hz;
0084     u32 remote_ref_time;
0085     u32 local_ref_time;
0086     u32 last_drop;
0087     u32 inference;
0088 };
0089 
0090 /**
0091  * tcp_lp_init
0092  * @sk: socket to initialize congestion control algorithm for
0093  *
0094  * Init all required variables.
0095  * Clone the handling from Vegas module implementation.
0096  */
0097 static void tcp_lp_init(struct sock *sk)
0098 {
0099     struct lp *lp = inet_csk_ca(sk);
0100 
0101     lp->flag = 0;
0102     lp->sowd = 0;
0103     lp->owd_min = 0xffffffff;
0104     lp->owd_max = 0;
0105     lp->owd_max_rsv = 0;
0106     lp->remote_hz = 0;
0107     lp->remote_ref_time = 0;
0108     lp->local_ref_time = 0;
0109     lp->last_drop = 0;
0110     lp->inference = 0;
0111 }
0112 
0113 /**
0114  * tcp_lp_cong_avoid
0115  * @sk: socket to avoid congesting
0116  *
0117  * Implementation of cong_avoid.
0118  * Will only call newReno CA when away from inference.
0119  * From TCP-LP's paper, this will be handled in additive increasement.
0120  */
0121 static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
0122 {
0123     struct lp *lp = inet_csk_ca(sk);
0124 
0125     if (!(lp->flag & LP_WITHIN_INF))
0126         tcp_reno_cong_avoid(sk, ack, acked);
0127 }
0128 
0129 /**
0130  * tcp_lp_remote_hz_estimator
0131  * @sk: socket which needs an estimate for the remote HZs
0132  *
0133  * Estimate remote HZ.
0134  * We keep on updating the estimated value, where original TCP-LP
0135  * implementation only guest it for once and use forever.
0136  */
0137 static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
0138 {
0139     struct tcp_sock *tp = tcp_sk(sk);
0140     struct lp *lp = inet_csk_ca(sk);
0141     s64 rhz = lp->remote_hz << 6;   /* remote HZ << 6 */
0142     s64 m = 0;
0143 
0144     /* not yet record reference time
0145      * go away!! record it before come back!! */
0146     if (lp->remote_ref_time == 0 || lp->local_ref_time == 0)
0147         goto out;
0148 
0149     /* we can't calc remote HZ with no different!! */
0150     if (tp->rx_opt.rcv_tsval == lp->remote_ref_time ||
0151         tp->rx_opt.rcv_tsecr == lp->local_ref_time)
0152         goto out;
0153 
0154     m = TCP_TS_HZ *
0155         (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
0156         (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
0157     if (m < 0)
0158         m = -m;
0159 
0160     if (rhz > 0) {
0161         m -= rhz >> 6;  /* m is now error in remote HZ est */
0162         rhz += m;   /* 63/64 old + 1/64 new */
0163     } else
0164         rhz = m << 6;
0165 
0166  out:
0167     /* record time for successful remote HZ calc */
0168     if ((rhz >> 6) > 0)
0169         lp->flag |= LP_VALID_RHZ;
0170     else
0171         lp->flag &= ~LP_VALID_RHZ;
0172 
0173     /* record reference time stamp */
0174     lp->remote_ref_time = tp->rx_opt.rcv_tsval;
0175     lp->local_ref_time = tp->rx_opt.rcv_tsecr;
0176 
0177     return rhz >> 6;
0178 }
0179 
0180 /**
0181  * tcp_lp_owd_calculator
0182  * @sk: socket to calculate one way delay for
0183  *
0184  * Calculate one way delay (in relative format).
0185  * Original implement OWD as minus of remote time difference to local time
0186  * difference directly. As this time difference just simply equal to RTT, when
0187  * the network status is stable, remote RTT will equal to local RTT, and result
0188  * OWD into zero.
0189  * It seems to be a bug and so we fixed it.
0190  */
0191 static u32 tcp_lp_owd_calculator(struct sock *sk)
0192 {
0193     struct tcp_sock *tp = tcp_sk(sk);
0194     struct lp *lp = inet_csk_ca(sk);
0195     s64 owd = 0;
0196 
0197     lp->remote_hz = tcp_lp_remote_hz_estimator(sk);
0198 
0199     if (lp->flag & LP_VALID_RHZ) {
0200         owd =
0201             tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
0202             tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
0203         if (owd < 0)
0204             owd = -owd;
0205     }
0206 
0207     if (owd > 0)
0208         lp->flag |= LP_VALID_OWD;
0209     else
0210         lp->flag &= ~LP_VALID_OWD;
0211 
0212     return owd;
0213 }
0214 
0215 /**
0216  * tcp_lp_rtt_sample
0217  * @sk: socket to add a rtt sample to
0218  * @rtt: round trip time, which is ignored!
0219  *
0220  * Implementation or rtt_sample.
0221  * Will take the following action,
0222  *   1. calc OWD,
0223  *   2. record the min/max OWD,
0224  *   3. calc smoothed OWD (SOWD).
0225  * Most ideas come from the original TCP-LP implementation.
0226  */
0227 static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
0228 {
0229     struct lp *lp = inet_csk_ca(sk);
0230     s64 mowd = tcp_lp_owd_calculator(sk);
0231 
0232     /* sorry that we don't have valid data */
0233     if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD))
0234         return;
0235 
0236     /* record the next min owd */
0237     if (mowd < lp->owd_min)
0238         lp->owd_min = mowd;
0239 
0240     /* always forget the max of the max
0241      * we just set owd_max as one below it */
0242     if (mowd > lp->owd_max) {
0243         if (mowd > lp->owd_max_rsv) {
0244             if (lp->owd_max_rsv == 0)
0245                 lp->owd_max = mowd;
0246             else
0247                 lp->owd_max = lp->owd_max_rsv;
0248             lp->owd_max_rsv = mowd;
0249         } else
0250             lp->owd_max = mowd;
0251     }
0252 
0253     /* calc for smoothed owd */
0254     if (lp->sowd != 0) {
0255         mowd -= lp->sowd >> 3;  /* m is now error in owd est */
0256         lp->sowd += mowd;   /* owd = 7/8 owd + 1/8 new */
0257     } else
0258         lp->sowd = mowd << 3;   /* take the measured time be owd */
0259 }
0260 
0261 /**
0262  * tcp_lp_pkts_acked
0263  * @sk: socket requiring congestion avoidance calculations
0264  *
0265  * Implementation of pkts_acked.
0266  * Deal with active drop under Early Congestion Indication.
0267  * Only drop to half and 1 will be handle, because we hope to use back
0268  * newReno in increase case.
0269  * We work it out by following the idea from TCP-LP's paper directly
0270  */
0271 static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
0272 {
0273     struct tcp_sock *tp = tcp_sk(sk);
0274     struct lp *lp = inet_csk_ca(sk);
0275     u32 now = tcp_time_stamp(tp);
0276     u32 delta;
0277 
0278     if (sample->rtt_us > 0)
0279         tcp_lp_rtt_sample(sk, sample->rtt_us);
0280 
0281     /* calc inference */
0282     delta = now - tp->rx_opt.rcv_tsecr;
0283     if ((s32)delta > 0)
0284         lp->inference = 3 * delta;
0285 
0286     /* test if within inference */
0287     if (lp->last_drop && (now - lp->last_drop < lp->inference))
0288         lp->flag |= LP_WITHIN_INF;
0289     else
0290         lp->flag &= ~LP_WITHIN_INF;
0291 
0292     /* test if within threshold */
0293     if (lp->sowd >> 3 <
0294         lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100)
0295         lp->flag |= LP_WITHIN_THR;
0296     else
0297         lp->flag &= ~LP_WITHIN_THR;
0298 
0299     pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
0300          tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
0301          lp->sowd >> 3);
0302 
0303     if (lp->flag & LP_WITHIN_THR)
0304         return;
0305 
0306     /* FIXME: try to reset owd_min and owd_max here
0307      * so decrease the chance the min/max is no longer suitable
0308      * and will usually within threshold when within inference */
0309     lp->owd_min = lp->sowd >> 3;
0310     lp->owd_max = lp->sowd >> 2;
0311     lp->owd_max_rsv = lp->sowd >> 2;
0312 
0313     /* happened within inference
0314      * drop snd_cwnd into 1 */
0315     if (lp->flag & LP_WITHIN_INF)
0316         tcp_snd_cwnd_set(tp, 1U);
0317 
0318     /* happened after inference
0319      * cut snd_cwnd into half */
0320     else
0321         tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
0322 
0323     /* record this drop time */
0324     lp->last_drop = now;
0325 }
0326 
0327 static struct tcp_congestion_ops tcp_lp __read_mostly = {
0328     .init = tcp_lp_init,
0329     .ssthresh = tcp_reno_ssthresh,
0330     .undo_cwnd = tcp_reno_undo_cwnd,
0331     .cong_avoid = tcp_lp_cong_avoid,
0332     .pkts_acked = tcp_lp_pkts_acked,
0333 
0334     .owner = THIS_MODULE,
0335     .name = "lp"
0336 };
0337 
0338 static int __init tcp_lp_register(void)
0339 {
0340     BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
0341     return tcp_register_congestion_control(&tcp_lp);
0342 }
0343 
0344 static void __exit tcp_lp_unregister(void)
0345 {
0346     tcp_unregister_congestion_control(&tcp_lp);
0347 }
0348 
0349 module_init(tcp_lp_register);
0350 module_exit(tcp_lp_unregister);
0351 
0352 MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike");
0353 MODULE_LICENSE("GPL");
0354 MODULE_DESCRIPTION("TCP Low Priority");