Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * net/sched/sch_choke.c    CHOKE scheduler
0004  *
0005  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
0006  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
0007  */
0008 
0009 #include <linux/module.h>
0010 #include <linux/types.h>
0011 #include <linux/kernel.h>
0012 #include <linux/skbuff.h>
0013 #include <linux/vmalloc.h>
0014 #include <net/pkt_sched.h>
0015 #include <net/pkt_cls.h>
0016 #include <net/inet_ecn.h>
0017 #include <net/red.h>
0018 #include <net/flow_dissector.h>
0019 
0020 /*
0021    CHOKe stateless AQM for fair bandwidth allocation
0022    =================================================
0023 
0024    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
0025    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
0026    maintains no flow state. The difference from RED is an additional step
0027    during the enqueuing process. If average queue size is over the
0028    low threshold (qmin), a packet is chosen at random from the queue.
0029    If both the new and chosen packet are from the same flow, both
0030    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
0031    needs to access packets in queue randomly. It has a minimal class
0032    interface to allow overriding the builtin flow classifier with
0033    filters.
0034 
0035    Source:
0036    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
0037    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
0038    IEEE INFOCOM, 2000.
0039 
0040    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
0041    Characteristics", IEEE/ACM Transactions on Networking, 2004
0042 
0043  */
0044 
0045 /* Upper bound on size of sk_buff table (packets) */
0046 #define CHOKE_MAX_QUEUE (128*1024 - 1)
0047 
0048 struct choke_sched_data {
0049 /* Parameters */
0050     u32      limit;
0051     unsigned char    flags;
0052 
0053     struct red_parms parms;
0054 
0055 /* Variables */
0056     struct red_vars  vars;
0057     struct {
0058         u32 prob_drop;  /* Early probability drops */
0059         u32 prob_mark;  /* Early probability marks */
0060         u32 forced_drop;    /* Forced drops, qavg > max_thresh */
0061         u32 forced_mark;    /* Forced marks, qavg > max_thresh */
0062         u32 pdrop;          /* Drops due to queue limits */
0063         u32 other;          /* Drops due to drop() calls */
0064         u32 matched;    /* Drops to flow match */
0065     } stats;
0066 
0067     unsigned int     head;
0068     unsigned int     tail;
0069 
0070     unsigned int     tab_mask; /* size - 1 */
0071 
0072     struct sk_buff **tab;
0073 };
0074 
0075 /* number of elements in queue including holes */
0076 static unsigned int choke_len(const struct choke_sched_data *q)
0077 {
0078     return (q->tail - q->head) & q->tab_mask;
0079 }
0080 
0081 /* Is ECN parameter configured */
0082 static int use_ecn(const struct choke_sched_data *q)
0083 {
0084     return q->flags & TC_RED_ECN;
0085 }
0086 
0087 /* Should packets over max just be dropped (versus marked) */
0088 static int use_harddrop(const struct choke_sched_data *q)
0089 {
0090     return q->flags & TC_RED_HARDDROP;
0091 }
0092 
0093 /* Move head pointer forward to skip over holes */
0094 static void choke_zap_head_holes(struct choke_sched_data *q)
0095 {
0096     do {
0097         q->head = (q->head + 1) & q->tab_mask;
0098         if (q->head == q->tail)
0099             break;
0100     } while (q->tab[q->head] == NULL);
0101 }
0102 
0103 /* Move tail pointer backwards to reuse holes */
0104 static void choke_zap_tail_holes(struct choke_sched_data *q)
0105 {
0106     do {
0107         q->tail = (q->tail - 1) & q->tab_mask;
0108         if (q->head == q->tail)
0109             break;
0110     } while (q->tab[q->tail] == NULL);
0111 }
0112 
0113 /* Drop packet from queue array by creating a "hole" */
0114 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
0115                   struct sk_buff **to_free)
0116 {
0117     struct choke_sched_data *q = qdisc_priv(sch);
0118     struct sk_buff *skb = q->tab[idx];
0119 
0120     q->tab[idx] = NULL;
0121 
0122     if (idx == q->head)
0123         choke_zap_head_holes(q);
0124     if (idx == q->tail)
0125         choke_zap_tail_holes(q);
0126 
0127     qdisc_qstats_backlog_dec(sch, skb);
0128     qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
0129     qdisc_drop(skb, sch, to_free);
0130     --sch->q.qlen;
0131 }
0132 
0133 struct choke_skb_cb {
0134     u8          keys_valid;
0135     struct          flow_keys_digest keys;
0136 };
0137 
0138 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
0139 {
0140     qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
0141     return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
0142 }
0143 
0144 /*
0145  * Compare flow of two packets
0146  *  Returns true only if source and destination address and port match.
0147  *          false for special cases
0148  */
0149 static bool choke_match_flow(struct sk_buff *skb1,
0150                  struct sk_buff *skb2)
0151 {
0152     struct flow_keys temp;
0153 
0154     if (skb1->protocol != skb2->protocol)
0155         return false;
0156 
0157     if (!choke_skb_cb(skb1)->keys_valid) {
0158         choke_skb_cb(skb1)->keys_valid = 1;
0159         skb_flow_dissect_flow_keys(skb1, &temp, 0);
0160         make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
0161     }
0162 
0163     if (!choke_skb_cb(skb2)->keys_valid) {
0164         choke_skb_cb(skb2)->keys_valid = 1;
0165         skb_flow_dissect_flow_keys(skb2, &temp, 0);
0166         make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
0167     }
0168 
0169     return !memcmp(&choke_skb_cb(skb1)->keys,
0170                &choke_skb_cb(skb2)->keys,
0171                sizeof(choke_skb_cb(skb1)->keys));
0172 }
0173 
0174 /*
0175  * Select a packet at random from queue
0176  * HACK: since queue can have holes from previous deletion; retry several
0177  *   times to find a random skb but then just give up and return the head
0178  * Will return NULL if queue is empty (q->head == q->tail)
0179  */
0180 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
0181                      unsigned int *pidx)
0182 {
0183     struct sk_buff *skb;
0184     int retrys = 3;
0185 
0186     do {
0187         *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
0188         skb = q->tab[*pidx];
0189         if (skb)
0190             return skb;
0191     } while (--retrys > 0);
0192 
0193     return q->tab[*pidx = q->head];
0194 }
0195 
0196 /*
0197  * Compare new packet with random packet in queue
0198  * returns true if matched and sets *pidx
0199  */
0200 static bool choke_match_random(const struct choke_sched_data *q,
0201                    struct sk_buff *nskb,
0202                    unsigned int *pidx)
0203 {
0204     struct sk_buff *oskb;
0205 
0206     if (q->head == q->tail)
0207         return false;
0208 
0209     oskb = choke_peek_random(q, pidx);
0210     return choke_match_flow(oskb, nskb);
0211 }
0212 
0213 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0214              struct sk_buff **to_free)
0215 {
0216     struct choke_sched_data *q = qdisc_priv(sch);
0217     const struct red_parms *p = &q->parms;
0218 
0219     choke_skb_cb(skb)->keys_valid = 0;
0220     /* Compute average queue usage (see RED) */
0221     q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
0222     if (red_is_idling(&q->vars))
0223         red_end_of_idle_period(&q->vars);
0224 
0225     /* Is queue small? */
0226     if (q->vars.qavg <= p->qth_min)
0227         q->vars.qcount = -1;
0228     else {
0229         unsigned int idx;
0230 
0231         /* Draw a packet at random from queue and compare flow */
0232         if (choke_match_random(q, skb, &idx)) {
0233             q->stats.matched++;
0234             choke_drop_by_idx(sch, idx, to_free);
0235             goto congestion_drop;
0236         }
0237 
0238         /* Queue is large, always mark/drop */
0239         if (q->vars.qavg > p->qth_max) {
0240             q->vars.qcount = -1;
0241 
0242             qdisc_qstats_overlimit(sch);
0243             if (use_harddrop(q) || !use_ecn(q) ||
0244                 !INET_ECN_set_ce(skb)) {
0245                 q->stats.forced_drop++;
0246                 goto congestion_drop;
0247             }
0248 
0249             q->stats.forced_mark++;
0250         } else if (++q->vars.qcount) {
0251             if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
0252                 q->vars.qcount = 0;
0253                 q->vars.qR = red_random(p);
0254 
0255                 qdisc_qstats_overlimit(sch);
0256                 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
0257                     q->stats.prob_drop++;
0258                     goto congestion_drop;
0259                 }
0260 
0261                 q->stats.prob_mark++;
0262             }
0263         } else
0264             q->vars.qR = red_random(p);
0265     }
0266 
0267     /* Admit new packet */
0268     if (sch->q.qlen < q->limit) {
0269         q->tab[q->tail] = skb;
0270         q->tail = (q->tail + 1) & q->tab_mask;
0271         ++sch->q.qlen;
0272         qdisc_qstats_backlog_inc(sch, skb);
0273         return NET_XMIT_SUCCESS;
0274     }
0275 
0276     q->stats.pdrop++;
0277     return qdisc_drop(skb, sch, to_free);
0278 
0279 congestion_drop:
0280     qdisc_drop(skb, sch, to_free);
0281     return NET_XMIT_CN;
0282 }
0283 
0284 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
0285 {
0286     struct choke_sched_data *q = qdisc_priv(sch);
0287     struct sk_buff *skb;
0288 
0289     if (q->head == q->tail) {
0290         if (!red_is_idling(&q->vars))
0291             red_start_of_idle_period(&q->vars);
0292         return NULL;
0293     }
0294 
0295     skb = q->tab[q->head];
0296     q->tab[q->head] = NULL;
0297     choke_zap_head_holes(q);
0298     --sch->q.qlen;
0299     qdisc_qstats_backlog_dec(sch, skb);
0300     qdisc_bstats_update(sch, skb);
0301 
0302     return skb;
0303 }
0304 
0305 static void choke_reset(struct Qdisc *sch)
0306 {
0307     struct choke_sched_data *q = qdisc_priv(sch);
0308 
0309     while (q->head != q->tail) {
0310         struct sk_buff *skb = q->tab[q->head];
0311 
0312         q->head = (q->head + 1) & q->tab_mask;
0313         if (!skb)
0314             continue;
0315         rtnl_qdisc_drop(skb, sch);
0316     }
0317 
0318     sch->q.qlen = 0;
0319     sch->qstats.backlog = 0;
0320     if (q->tab)
0321         memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
0322     q->head = q->tail = 0;
0323     red_restart(&q->vars);
0324 }
0325 
0326 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
0327     [TCA_CHOKE_PARMS]   = { .len = sizeof(struct tc_red_qopt) },
0328     [TCA_CHOKE_STAB]    = { .len = RED_STAB_SIZE },
0329     [TCA_CHOKE_MAX_P]   = { .type = NLA_U32 },
0330 };
0331 
0332 
0333 static void choke_free(void *addr)
0334 {
0335     kvfree(addr);
0336 }
0337 
0338 static int choke_change(struct Qdisc *sch, struct nlattr *opt,
0339             struct netlink_ext_ack *extack)
0340 {
0341     struct choke_sched_data *q = qdisc_priv(sch);
0342     struct nlattr *tb[TCA_CHOKE_MAX + 1];
0343     const struct tc_red_qopt *ctl;
0344     int err;
0345     struct sk_buff **old = NULL;
0346     unsigned int mask;
0347     u32 max_P;
0348     u8 *stab;
0349 
0350     if (opt == NULL)
0351         return -EINVAL;
0352 
0353     err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
0354                       choke_policy, NULL);
0355     if (err < 0)
0356         return err;
0357 
0358     if (tb[TCA_CHOKE_PARMS] == NULL ||
0359         tb[TCA_CHOKE_STAB] == NULL)
0360         return -EINVAL;
0361 
0362     max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
0363 
0364     ctl = nla_data(tb[TCA_CHOKE_PARMS]);
0365     stab = nla_data(tb[TCA_CHOKE_STAB]);
0366     if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
0367         return -EINVAL;
0368 
0369     if (ctl->limit > CHOKE_MAX_QUEUE)
0370         return -EINVAL;
0371 
0372     mask = roundup_pow_of_two(ctl->limit + 1) - 1;
0373     if (mask != q->tab_mask) {
0374         struct sk_buff **ntab;
0375 
0376         ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
0377         if (!ntab)
0378             return -ENOMEM;
0379 
0380         sch_tree_lock(sch);
0381         old = q->tab;
0382         if (old) {
0383             unsigned int oqlen = sch->q.qlen, tail = 0;
0384             unsigned dropped = 0;
0385 
0386             while (q->head != q->tail) {
0387                 struct sk_buff *skb = q->tab[q->head];
0388 
0389                 q->head = (q->head + 1) & q->tab_mask;
0390                 if (!skb)
0391                     continue;
0392                 if (tail < mask) {
0393                     ntab[tail++] = skb;
0394                     continue;
0395                 }
0396                 dropped += qdisc_pkt_len(skb);
0397                 qdisc_qstats_backlog_dec(sch, skb);
0398                 --sch->q.qlen;
0399                 rtnl_qdisc_drop(skb, sch);
0400             }
0401             qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
0402             q->head = 0;
0403             q->tail = tail;
0404         }
0405 
0406         q->tab_mask = mask;
0407         q->tab = ntab;
0408     } else
0409         sch_tree_lock(sch);
0410 
0411     q->flags = ctl->flags;
0412     q->limit = ctl->limit;
0413 
0414     red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
0415               ctl->Plog, ctl->Scell_log,
0416               stab,
0417               max_P);
0418     red_set_vars(&q->vars);
0419 
0420     if (q->head == q->tail)
0421         red_end_of_idle_period(&q->vars);
0422 
0423     sch_tree_unlock(sch);
0424     choke_free(old);
0425     return 0;
0426 }
0427 
0428 static int choke_init(struct Qdisc *sch, struct nlattr *opt,
0429               struct netlink_ext_ack *extack)
0430 {
0431     return choke_change(sch, opt, extack);
0432 }
0433 
0434 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
0435 {
0436     struct choke_sched_data *q = qdisc_priv(sch);
0437     struct nlattr *opts = NULL;
0438     struct tc_red_qopt opt = {
0439         .limit      = q->limit,
0440         .flags      = q->flags,
0441         .qth_min    = q->parms.qth_min >> q->parms.Wlog,
0442         .qth_max    = q->parms.qth_max >> q->parms.Wlog,
0443         .Wlog       = q->parms.Wlog,
0444         .Plog       = q->parms.Plog,
0445         .Scell_log  = q->parms.Scell_log,
0446     };
0447 
0448     opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0449     if (opts == NULL)
0450         goto nla_put_failure;
0451 
0452     if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
0453         nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
0454         goto nla_put_failure;
0455     return nla_nest_end(skb, opts);
0456 
0457 nla_put_failure:
0458     nla_nest_cancel(skb, opts);
0459     return -EMSGSIZE;
0460 }
0461 
0462 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
0463 {
0464     struct choke_sched_data *q = qdisc_priv(sch);
0465     struct tc_choke_xstats st = {
0466         .early  = q->stats.prob_drop + q->stats.forced_drop,
0467         .marked = q->stats.prob_mark + q->stats.forced_mark,
0468         .pdrop  = q->stats.pdrop,
0469         .other  = q->stats.other,
0470         .matched = q->stats.matched,
0471     };
0472 
0473     return gnet_stats_copy_app(d, &st, sizeof(st));
0474 }
0475 
0476 static void choke_destroy(struct Qdisc *sch)
0477 {
0478     struct choke_sched_data *q = qdisc_priv(sch);
0479 
0480     choke_free(q->tab);
0481 }
0482 
0483 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
0484 {
0485     struct choke_sched_data *q = qdisc_priv(sch);
0486 
0487     return (q->head != q->tail) ? q->tab[q->head] : NULL;
0488 }
0489 
0490 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
0491     .id     =   "choke",
0492     .priv_size  =   sizeof(struct choke_sched_data),
0493 
0494     .enqueue    =   choke_enqueue,
0495     .dequeue    =   choke_dequeue,
0496     .peek       =   choke_peek_head,
0497     .init       =   choke_init,
0498     .destroy    =   choke_destroy,
0499     .reset      =   choke_reset,
0500     .change     =   choke_change,
0501     .dump       =   choke_dump,
0502     .dump_stats =   choke_dump_stats,
0503     .owner      =   THIS_MODULE,
0504 };
0505 
0506 static int __init choke_module_init(void)
0507 {
0508     return register_qdisc(&choke_qdisc_ops);
0509 }
0510 
0511 static void __exit choke_module_exit(void)
0512 {
0513     unregister_qdisc(&choke_qdisc_ops);
0514 }
0515 
0516 module_init(choke_module_init)
0517 module_exit(choke_module_exit)
0518 
0519 MODULE_LICENSE("GPL");