0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/module.h>
0010 #include <linux/types.h>
0011 #include <linux/kernel.h>
0012 #include <linux/skbuff.h>
0013 #include <linux/vmalloc.h>
0014 #include <net/pkt_sched.h>
0015 #include <net/pkt_cls.h>
0016 #include <net/inet_ecn.h>
0017 #include <net/red.h>
0018 #include <net/flow_dissector.h>
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 #define CHOKE_MAX_QUEUE (128*1024 - 1)
0047
0048 struct choke_sched_data {
0049
0050 u32 limit;
0051 unsigned char flags;
0052
0053 struct red_parms parms;
0054
0055
0056 struct red_vars vars;
0057 struct {
0058 u32 prob_drop;
0059 u32 prob_mark;
0060 u32 forced_drop;
0061 u32 forced_mark;
0062 u32 pdrop;
0063 u32 other;
0064 u32 matched;
0065 } stats;
0066
0067 unsigned int head;
0068 unsigned int tail;
0069
0070 unsigned int tab_mask;
0071
0072 struct sk_buff **tab;
0073 };
0074
0075
0076 static unsigned int choke_len(const struct choke_sched_data *q)
0077 {
0078 return (q->tail - q->head) & q->tab_mask;
0079 }
0080
0081
0082 static int use_ecn(const struct choke_sched_data *q)
0083 {
0084 return q->flags & TC_RED_ECN;
0085 }
0086
0087
0088 static int use_harddrop(const struct choke_sched_data *q)
0089 {
0090 return q->flags & TC_RED_HARDDROP;
0091 }
0092
0093
0094 static void choke_zap_head_holes(struct choke_sched_data *q)
0095 {
0096 do {
0097 q->head = (q->head + 1) & q->tab_mask;
0098 if (q->head == q->tail)
0099 break;
0100 } while (q->tab[q->head] == NULL);
0101 }
0102
0103
0104 static void choke_zap_tail_holes(struct choke_sched_data *q)
0105 {
0106 do {
0107 q->tail = (q->tail - 1) & q->tab_mask;
0108 if (q->head == q->tail)
0109 break;
0110 } while (q->tab[q->tail] == NULL);
0111 }
0112
0113
0114 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
0115 struct sk_buff **to_free)
0116 {
0117 struct choke_sched_data *q = qdisc_priv(sch);
0118 struct sk_buff *skb = q->tab[idx];
0119
0120 q->tab[idx] = NULL;
0121
0122 if (idx == q->head)
0123 choke_zap_head_holes(q);
0124 if (idx == q->tail)
0125 choke_zap_tail_holes(q);
0126
0127 qdisc_qstats_backlog_dec(sch, skb);
0128 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
0129 qdisc_drop(skb, sch, to_free);
0130 --sch->q.qlen;
0131 }
0132
0133 struct choke_skb_cb {
0134 u8 keys_valid;
0135 struct flow_keys_digest keys;
0136 };
0137
0138 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
0139 {
0140 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
0141 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
0142 }
0143
0144
0145
0146
0147
0148
0149 static bool choke_match_flow(struct sk_buff *skb1,
0150 struct sk_buff *skb2)
0151 {
0152 struct flow_keys temp;
0153
0154 if (skb1->protocol != skb2->protocol)
0155 return false;
0156
0157 if (!choke_skb_cb(skb1)->keys_valid) {
0158 choke_skb_cb(skb1)->keys_valid = 1;
0159 skb_flow_dissect_flow_keys(skb1, &temp, 0);
0160 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
0161 }
0162
0163 if (!choke_skb_cb(skb2)->keys_valid) {
0164 choke_skb_cb(skb2)->keys_valid = 1;
0165 skb_flow_dissect_flow_keys(skb2, &temp, 0);
0166 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
0167 }
0168
0169 return !memcmp(&choke_skb_cb(skb1)->keys,
0170 &choke_skb_cb(skb2)->keys,
0171 sizeof(choke_skb_cb(skb1)->keys));
0172 }
0173
0174
0175
0176
0177
0178
0179
0180 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
0181 unsigned int *pidx)
0182 {
0183 struct sk_buff *skb;
0184 int retrys = 3;
0185
0186 do {
0187 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
0188 skb = q->tab[*pidx];
0189 if (skb)
0190 return skb;
0191 } while (--retrys > 0);
0192
0193 return q->tab[*pidx = q->head];
0194 }
0195
0196
0197
0198
0199
0200 static bool choke_match_random(const struct choke_sched_data *q,
0201 struct sk_buff *nskb,
0202 unsigned int *pidx)
0203 {
0204 struct sk_buff *oskb;
0205
0206 if (q->head == q->tail)
0207 return false;
0208
0209 oskb = choke_peek_random(q, pidx);
0210 return choke_match_flow(oskb, nskb);
0211 }
0212
0213 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0214 struct sk_buff **to_free)
0215 {
0216 struct choke_sched_data *q = qdisc_priv(sch);
0217 const struct red_parms *p = &q->parms;
0218
0219 choke_skb_cb(skb)->keys_valid = 0;
0220
0221 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
0222 if (red_is_idling(&q->vars))
0223 red_end_of_idle_period(&q->vars);
0224
0225
0226 if (q->vars.qavg <= p->qth_min)
0227 q->vars.qcount = -1;
0228 else {
0229 unsigned int idx;
0230
0231
0232 if (choke_match_random(q, skb, &idx)) {
0233 q->stats.matched++;
0234 choke_drop_by_idx(sch, idx, to_free);
0235 goto congestion_drop;
0236 }
0237
0238
0239 if (q->vars.qavg > p->qth_max) {
0240 q->vars.qcount = -1;
0241
0242 qdisc_qstats_overlimit(sch);
0243 if (use_harddrop(q) || !use_ecn(q) ||
0244 !INET_ECN_set_ce(skb)) {
0245 q->stats.forced_drop++;
0246 goto congestion_drop;
0247 }
0248
0249 q->stats.forced_mark++;
0250 } else if (++q->vars.qcount) {
0251 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
0252 q->vars.qcount = 0;
0253 q->vars.qR = red_random(p);
0254
0255 qdisc_qstats_overlimit(sch);
0256 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
0257 q->stats.prob_drop++;
0258 goto congestion_drop;
0259 }
0260
0261 q->stats.prob_mark++;
0262 }
0263 } else
0264 q->vars.qR = red_random(p);
0265 }
0266
0267
0268 if (sch->q.qlen < q->limit) {
0269 q->tab[q->tail] = skb;
0270 q->tail = (q->tail + 1) & q->tab_mask;
0271 ++sch->q.qlen;
0272 qdisc_qstats_backlog_inc(sch, skb);
0273 return NET_XMIT_SUCCESS;
0274 }
0275
0276 q->stats.pdrop++;
0277 return qdisc_drop(skb, sch, to_free);
0278
0279 congestion_drop:
0280 qdisc_drop(skb, sch, to_free);
0281 return NET_XMIT_CN;
0282 }
0283
0284 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
0285 {
0286 struct choke_sched_data *q = qdisc_priv(sch);
0287 struct sk_buff *skb;
0288
0289 if (q->head == q->tail) {
0290 if (!red_is_idling(&q->vars))
0291 red_start_of_idle_period(&q->vars);
0292 return NULL;
0293 }
0294
0295 skb = q->tab[q->head];
0296 q->tab[q->head] = NULL;
0297 choke_zap_head_holes(q);
0298 --sch->q.qlen;
0299 qdisc_qstats_backlog_dec(sch, skb);
0300 qdisc_bstats_update(sch, skb);
0301
0302 return skb;
0303 }
0304
0305 static void choke_reset(struct Qdisc *sch)
0306 {
0307 struct choke_sched_data *q = qdisc_priv(sch);
0308
0309 while (q->head != q->tail) {
0310 struct sk_buff *skb = q->tab[q->head];
0311
0312 q->head = (q->head + 1) & q->tab_mask;
0313 if (!skb)
0314 continue;
0315 rtnl_qdisc_drop(skb, sch);
0316 }
0317
0318 sch->q.qlen = 0;
0319 sch->qstats.backlog = 0;
0320 if (q->tab)
0321 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
0322 q->head = q->tail = 0;
0323 red_restart(&q->vars);
0324 }
0325
0326 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
0327 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
0328 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
0329 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
0330 };
0331
0332
0333 static void choke_free(void *addr)
0334 {
0335 kvfree(addr);
0336 }
0337
0338 static int choke_change(struct Qdisc *sch, struct nlattr *opt,
0339 struct netlink_ext_ack *extack)
0340 {
0341 struct choke_sched_data *q = qdisc_priv(sch);
0342 struct nlattr *tb[TCA_CHOKE_MAX + 1];
0343 const struct tc_red_qopt *ctl;
0344 int err;
0345 struct sk_buff **old = NULL;
0346 unsigned int mask;
0347 u32 max_P;
0348 u8 *stab;
0349
0350 if (opt == NULL)
0351 return -EINVAL;
0352
0353 err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
0354 choke_policy, NULL);
0355 if (err < 0)
0356 return err;
0357
0358 if (tb[TCA_CHOKE_PARMS] == NULL ||
0359 tb[TCA_CHOKE_STAB] == NULL)
0360 return -EINVAL;
0361
0362 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
0363
0364 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
0365 stab = nla_data(tb[TCA_CHOKE_STAB]);
0366 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
0367 return -EINVAL;
0368
0369 if (ctl->limit > CHOKE_MAX_QUEUE)
0370 return -EINVAL;
0371
0372 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
0373 if (mask != q->tab_mask) {
0374 struct sk_buff **ntab;
0375
0376 ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
0377 if (!ntab)
0378 return -ENOMEM;
0379
0380 sch_tree_lock(sch);
0381 old = q->tab;
0382 if (old) {
0383 unsigned int oqlen = sch->q.qlen, tail = 0;
0384 unsigned dropped = 0;
0385
0386 while (q->head != q->tail) {
0387 struct sk_buff *skb = q->tab[q->head];
0388
0389 q->head = (q->head + 1) & q->tab_mask;
0390 if (!skb)
0391 continue;
0392 if (tail < mask) {
0393 ntab[tail++] = skb;
0394 continue;
0395 }
0396 dropped += qdisc_pkt_len(skb);
0397 qdisc_qstats_backlog_dec(sch, skb);
0398 --sch->q.qlen;
0399 rtnl_qdisc_drop(skb, sch);
0400 }
0401 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
0402 q->head = 0;
0403 q->tail = tail;
0404 }
0405
0406 q->tab_mask = mask;
0407 q->tab = ntab;
0408 } else
0409 sch_tree_lock(sch);
0410
0411 q->flags = ctl->flags;
0412 q->limit = ctl->limit;
0413
0414 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
0415 ctl->Plog, ctl->Scell_log,
0416 stab,
0417 max_P);
0418 red_set_vars(&q->vars);
0419
0420 if (q->head == q->tail)
0421 red_end_of_idle_period(&q->vars);
0422
0423 sch_tree_unlock(sch);
0424 choke_free(old);
0425 return 0;
0426 }
0427
0428 static int choke_init(struct Qdisc *sch, struct nlattr *opt,
0429 struct netlink_ext_ack *extack)
0430 {
0431 return choke_change(sch, opt, extack);
0432 }
0433
0434 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
0435 {
0436 struct choke_sched_data *q = qdisc_priv(sch);
0437 struct nlattr *opts = NULL;
0438 struct tc_red_qopt opt = {
0439 .limit = q->limit,
0440 .flags = q->flags,
0441 .qth_min = q->parms.qth_min >> q->parms.Wlog,
0442 .qth_max = q->parms.qth_max >> q->parms.Wlog,
0443 .Wlog = q->parms.Wlog,
0444 .Plog = q->parms.Plog,
0445 .Scell_log = q->parms.Scell_log,
0446 };
0447
0448 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0449 if (opts == NULL)
0450 goto nla_put_failure;
0451
0452 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
0453 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
0454 goto nla_put_failure;
0455 return nla_nest_end(skb, opts);
0456
0457 nla_put_failure:
0458 nla_nest_cancel(skb, opts);
0459 return -EMSGSIZE;
0460 }
0461
0462 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
0463 {
0464 struct choke_sched_data *q = qdisc_priv(sch);
0465 struct tc_choke_xstats st = {
0466 .early = q->stats.prob_drop + q->stats.forced_drop,
0467 .marked = q->stats.prob_mark + q->stats.forced_mark,
0468 .pdrop = q->stats.pdrop,
0469 .other = q->stats.other,
0470 .matched = q->stats.matched,
0471 };
0472
0473 return gnet_stats_copy_app(d, &st, sizeof(st));
0474 }
0475
0476 static void choke_destroy(struct Qdisc *sch)
0477 {
0478 struct choke_sched_data *q = qdisc_priv(sch);
0479
0480 choke_free(q->tab);
0481 }
0482
0483 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
0484 {
0485 struct choke_sched_data *q = qdisc_priv(sch);
0486
0487 return (q->head != q->tail) ? q->tab[q->head] : NULL;
0488 }
0489
0490 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
0491 .id = "choke",
0492 .priv_size = sizeof(struct choke_sched_data),
0493
0494 .enqueue = choke_enqueue,
0495 .dequeue = choke_dequeue,
0496 .peek = choke_peek_head,
0497 .init = choke_init,
0498 .destroy = choke_destroy,
0499 .reset = choke_reset,
0500 .change = choke_change,
0501 .dump = choke_dump,
0502 .dump_stats = choke_dump_stats,
0503 .owner = THIS_MODULE,
0504 };
0505
0506 static int __init choke_module_init(void)
0507 {
0508 return register_qdisc(&choke_qdisc_ops);
0509 }
0510
0511 static void __exit choke_module_exit(void)
0512 {
0513 unregister_qdisc(&choke_qdisc_ops);
0514 }
0515
0516 module_init(choke_module_init)
0517 module_exit(choke_module_exit)
0518
0519 MODULE_LICENSE("GPL");