0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #include <linux/module.h>
0029 #include <linux/math64.h>
0030 #include <net/tcp.h>
0031 #include <linux/inet_diag.h>
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 static int nv_pad __read_mostly = 10;
0058 static int nv_pad_buffer __read_mostly = 2;
0059 static int nv_reset_period __read_mostly = 5;
0060 static int nv_min_cwnd __read_mostly = 2;
0061 static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100;
0062 static int nv_ssthresh_factor __read_mostly = 8;
0063 static int nv_rtt_factor __read_mostly = 128;
0064 static int nv_loss_dec_factor __read_mostly = 819;
0065 static int nv_cwnd_growth_rate_neg __read_mostly = 8;
0066 static int nv_cwnd_growth_rate_pos __read_mostly;
0067 static int nv_dec_eval_min_calls __read_mostly = 60;
0068 static int nv_inc_eval_min_calls __read_mostly = 20;
0069 static int nv_ssthresh_eval_min_calls __read_mostly = 30;
0070 static int nv_stop_rtt_cnt __read_mostly = 10;
0071 static int nv_rtt_min_cnt __read_mostly = 2;
0072
0073 module_param(nv_pad, int, 0644);
0074 MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network");
0075 module_param(nv_reset_period, int, 0644);
0076 MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)");
0077 module_param(nv_min_cwnd, int, 0644);
0078 MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value"
0079 " without losses");
0080
0081
0082 struct tcpnv {
0083 unsigned long nv_min_rtt_reset_jiffies;
0084
0085 s8 cwnd_growth_factor;
0086
0087 u8 available8;
0088 u16 available16;
0089 u8 nv_allow_cwnd_growth:1,
0090 nv_reset:1,
0091 nv_catchup:1;
0092
0093 u8 nv_eval_call_cnt;
0094 u8 nv_min_cwnd;
0095
0096
0097
0098
0099
0100 u8 nv_rtt_cnt; ;
0101 u32 nv_last_rtt;
0102 u32 nv_min_rtt;
0103 u32 nv_min_rtt_new;
0104 u32 nv_base_rtt;
0105
0106 u32 nv_lower_bound_rtt;
0107
0108
0109 u32 nv_rtt_max_rate;
0110 u32 nv_rtt_start_seq;
0111
0112 u32 nv_last_snd_una;
0113
0114
0115 u32 nv_no_cong_cnt;
0116 };
0117
0118 #define NV_INIT_RTT U32_MAX
0119 #define NV_MIN_CWND 4
0120 #define NV_MIN_CWND_GROW 2
0121 #define NV_TSO_CWND_BOUND 80
0122
0123 static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
0124 {
0125 struct tcp_sock *tp = tcp_sk(sk);
0126
0127 ca->nv_reset = 0;
0128 ca->nv_no_cong_cnt = 0;
0129 ca->nv_rtt_cnt = 0;
0130 ca->nv_last_rtt = 0;
0131 ca->nv_rtt_max_rate = 0;
0132 ca->nv_rtt_start_seq = tp->snd_una;
0133 ca->nv_eval_call_cnt = 0;
0134 ca->nv_last_snd_una = tp->snd_una;
0135 }
0136
0137 static void tcpnv_init(struct sock *sk)
0138 {
0139 struct tcpnv *ca = inet_csk_ca(sk);
0140 int base_rtt;
0141
0142 tcpnv_reset(ca, sk);
0143
0144
0145
0146
0147
0148
0149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
0150 if (base_rtt > 0) {
0151 ca->nv_base_rtt = base_rtt;
0152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8;
0153 } else {
0154 ca->nv_base_rtt = 0;
0155 ca->nv_lower_bound_rtt = 0;
0156 }
0157
0158 ca->nv_allow_cwnd_growth = 1;
0159 ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
0160 ca->nv_min_rtt = NV_INIT_RTT;
0161 ca->nv_min_rtt_new = NV_INIT_RTT;
0162 ca->nv_min_cwnd = NV_MIN_CWND;
0163 ca->nv_catchup = 0;
0164 ca->cwnd_growth_factor = 0;
0165 }
0166
0167
0168
0169
0170 inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
0171 {
0172 if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
0173 return ca->nv_lower_bound_rtt;
0174 else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
0175 return ca->nv_base_rtt;
0176 else
0177 return val;
0178 }
0179
0180 static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
0181 {
0182 struct tcp_sock *tp = tcp_sk(sk);
0183 struct tcpnv *ca = inet_csk_ca(sk);
0184 u32 cnt;
0185
0186 if (!tcp_is_cwnd_limited(sk))
0187 return;
0188
0189
0190 if (!ca->nv_allow_cwnd_growth)
0191 return;
0192
0193 if (tcp_in_slow_start(tp)) {
0194 acked = tcp_slow_start(tp, acked);
0195 if (!acked)
0196 return;
0197 }
0198
0199 if (ca->cwnd_growth_factor < 0) {
0200 cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
0201 tcp_cong_avoid_ai(tp, cnt, acked);
0202 } else {
0203 cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
0204 tcp_cong_avoid_ai(tp, cnt, acked);
0205 }
0206 }
0207
0208 static u32 tcpnv_recalc_ssthresh(struct sock *sk)
0209 {
0210 const struct tcp_sock *tp = tcp_sk(sk);
0211
0212 return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
0213 }
0214
0215 static void tcpnv_state(struct sock *sk, u8 new_state)
0216 {
0217 struct tcpnv *ca = inet_csk_ca(sk);
0218
0219 if (new_state == TCP_CA_Open && ca->nv_reset) {
0220 tcpnv_reset(ca, sk);
0221 } else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR ||
0222 new_state == TCP_CA_Recovery) {
0223 ca->nv_reset = 1;
0224 ca->nv_allow_cwnd_growth = 0;
0225 if (new_state == TCP_CA_Loss) {
0226
0227 if (ca->cwnd_growth_factor > 0)
0228 ca->cwnd_growth_factor = 0;
0229
0230 if (nv_cwnd_growth_rate_neg > 0 &&
0231 ca->cwnd_growth_factor > -8)
0232 ca->cwnd_growth_factor--;
0233 }
0234 }
0235 }
0236
0237
0238
0239 static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
0240 {
0241 const struct inet_connection_sock *icsk = inet_csk(sk);
0242 struct tcp_sock *tp = tcp_sk(sk);
0243 struct tcpnv *ca = inet_csk_ca(sk);
0244 unsigned long now = jiffies;
0245 u64 rate64;
0246 u32 rate, max_win, cwnd_by_slope;
0247 u32 avg_rtt;
0248 u32 bytes_acked = 0;
0249
0250
0251 if (sample->rtt_us < 0)
0252 return;
0253
0254
0255 if (icsk->icsk_ca_state != TCP_CA_Open &&
0256 icsk->icsk_ca_state != TCP_CA_Disorder)
0257 return;
0258
0259
0260 if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
0261 ca->nv_catchup = 0;
0262 ca->nv_allow_cwnd_growth = 0;
0263 }
0264
0265 bytes_acked = tp->snd_una - ca->nv_last_snd_una;
0266 ca->nv_last_snd_una = tp->snd_una;
0267
0268 if (sample->in_flight == 0)
0269 return;
0270
0271
0272 if (nv_rtt_factor > 0) {
0273 if (ca->nv_last_rtt > 0) {
0274 avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor +
0275 ((u64)ca->nv_last_rtt)
0276 * (256 - nv_rtt_factor)) >> 8;
0277 } else {
0278 avg_rtt = sample->rtt_us;
0279 ca->nv_min_rtt = avg_rtt << 1;
0280 }
0281 ca->nv_last_rtt = avg_rtt;
0282 } else {
0283 avg_rtt = sample->rtt_us;
0284 }
0285
0286
0287 rate64 = ((u64)sample->in_flight) * 80000;
0288 do_div(rate64, avg_rtt ?: 1);
0289 rate = (u32)rate64;
0290
0291
0292
0293
0294
0295 if (ca->nv_rtt_max_rate < rate)
0296 ca->nv_rtt_max_rate = rate;
0297
0298
0299 if (ca->nv_eval_call_cnt < 255)
0300 ca->nv_eval_call_cnt++;
0301
0302
0303 avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
0304
0305
0306 if (avg_rtt < ca->nv_min_rtt)
0307 ca->nv_min_rtt = avg_rtt;
0308
0309
0310 if (avg_rtt < ca->nv_min_rtt_new)
0311 ca->nv_min_rtt_new = avg_rtt;
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322 if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) {
0323 unsigned char rand;
0324
0325 ca->nv_min_rtt = ca->nv_min_rtt_new;
0326 ca->nv_min_rtt_new = NV_INIT_RTT;
0327 get_random_bytes(&rand, 1);
0328 ca->nv_min_rtt_reset_jiffies =
0329 now + ((nv_reset_period * (384 + rand) * HZ) >> 9);
0330
0331
0332
0333 ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND);
0334 }
0335
0336
0337 if (before(ca->nv_rtt_start_seq, tp->snd_una)) {
0338 ca->nv_rtt_start_seq = tp->snd_nxt;
0339 if (ca->nv_rtt_cnt < 0xff)
0340
0341 ca->nv_rtt_cnt++;
0342
0343
0344
0345
0346
0347
0348 if (ca->nv_eval_call_cnt == 1 &&
0349 bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache &&
0350 ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) {
0351 ca->nv_min_cwnd = min(ca->nv_min_cwnd
0352 + NV_MIN_CWND_GROW,
0353 NV_TSO_CWND_BOUND + 1);
0354 ca->nv_rtt_start_seq = tp->snd_nxt +
0355 ca->nv_min_cwnd * tp->mss_cache;
0356 ca->nv_eval_call_cnt = 0;
0357 ca->nv_allow_cwnd_growth = 1;
0358 return;
0359 }
0360
0361
0362
0363
0364
0365 cwnd_by_slope = (u32)
0366 div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
0367 80000ULL * tp->mss_cache);
0368 max_win = cwnd_by_slope + nv_pad;
0369
0370
0371
0372
0373
0374 if (tcp_snd_cwnd(tp) > max_win) {
0375
0376
0377
0378
0379
0380
0381
0382 if (ca->nv_rtt_cnt < nv_rtt_min_cnt) {
0383 return;
0384 } else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
0385 if (ca->nv_eval_call_cnt <
0386 nv_ssthresh_eval_min_calls)
0387 return;
0388
0389 } else if (ca->nv_eval_call_cnt <
0390 nv_dec_eval_min_calls) {
0391 if (ca->nv_allow_cwnd_growth &&
0392 ca->nv_rtt_cnt > nv_stop_rtt_cnt)
0393 ca->nv_allow_cwnd_growth = 0;
0394 return;
0395 }
0396
0397
0398 ca->nv_allow_cwnd_growth = 0;
0399 tp->snd_ssthresh =
0400 (nv_ssthresh_factor * max_win) >> 3;
0401 if (tcp_snd_cwnd(tp) - max_win > 2) {
0402
0403 int dec;
0404
0405 dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
0406 nv_cong_dec_mult) >> 7);
0407 tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
0408 } else if (nv_cong_dec_mult > 0) {
0409 tcp_snd_cwnd_set(tp, max_win);
0410 }
0411 if (ca->cwnd_growth_factor > 0)
0412 ca->cwnd_growth_factor = 0;
0413 ca->nv_no_cong_cnt = 0;
0414 } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
0415
0416 if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
0417 return;
0418
0419 ca->nv_allow_cwnd_growth = 1;
0420 ca->nv_no_cong_cnt++;
0421 if (ca->cwnd_growth_factor < 0 &&
0422 nv_cwnd_growth_rate_neg > 0 &&
0423 ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) {
0424 ca->cwnd_growth_factor++;
0425 ca->nv_no_cong_cnt = 0;
0426 } else if (ca->cwnd_growth_factor >= 0 &&
0427 nv_cwnd_growth_rate_pos > 0 &&
0428 ca->nv_no_cong_cnt >
0429 nv_cwnd_growth_rate_pos) {
0430 ca->cwnd_growth_factor++;
0431 ca->nv_no_cong_cnt = 0;
0432 }
0433 } else {
0434
0435 return;
0436 }
0437
0438
0439 ca->nv_eval_call_cnt = 0;
0440 ca->nv_rtt_cnt = 0;
0441 ca->nv_rtt_max_rate = 0;
0442
0443
0444
0445
0446
0447 if (tcp_snd_cwnd(tp) < nv_min_cwnd)
0448 tcp_snd_cwnd_set(tp, nv_min_cwnd);
0449 }
0450 }
0451
0452
0453 static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
0454 union tcp_cc_info *info)
0455 {
0456 const struct tcpnv *ca = inet_csk_ca(sk);
0457
0458 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
0459 info->vegas.tcpv_enabled = 1;
0460 info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt;
0461 info->vegas.tcpv_rtt = ca->nv_last_rtt;
0462 info->vegas.tcpv_minrtt = ca->nv_min_rtt;
0463
0464 *attr = INET_DIAG_VEGASINFO;
0465 return sizeof(struct tcpvegas_info);
0466 }
0467 return 0;
0468 }
0469
0470 static struct tcp_congestion_ops tcpnv __read_mostly = {
0471 .init = tcpnv_init,
0472 .ssthresh = tcpnv_recalc_ssthresh,
0473 .cong_avoid = tcpnv_cong_avoid,
0474 .set_state = tcpnv_state,
0475 .undo_cwnd = tcp_reno_undo_cwnd,
0476 .pkts_acked = tcpnv_acked,
0477 .get_info = tcpnv_get_info,
0478
0479 .owner = THIS_MODULE,
0480 .name = "nv",
0481 };
0482
0483 static int __init tcpnv_register(void)
0484 {
0485 BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE);
0486
0487 return tcp_register_congestion_control(&tcpnv);
0488 }
0489
0490 static void __exit tcpnv_unregister(void)
0491 {
0492 tcp_unregister_congestion_control(&tcpnv);
0493 }
0494
0495 module_init(tcpnv_register);
0496 module_exit(tcpnv_unregister);
0497
0498 MODULE_AUTHOR("Lawrence Brakmo");
0499 MODULE_LICENSE("GPL");
0500 MODULE_DESCRIPTION("TCP NV");
0501 MODULE_VERSION("1.0");