Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/sch_red.c  Random Early Detection queue.
0004  *
0005  * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0006  *
0007  * Changes:
0008  * J Hadi Salim 980914: computation fixes
0009  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
0010  * J Hadi Salim 980816:  ECN support
0011  */
0012 
0013 #include <linux/module.h>
0014 #include <linux/types.h>
0015 #include <linux/kernel.h>
0016 #include <linux/skbuff.h>
0017 #include <net/pkt_sched.h>
0018 #include <net/pkt_cls.h>
0019 #include <net/inet_ecn.h>
0020 #include <net/red.h>
0021 
0022 
0023 /*  Parameters, settable by user:
0024     -----------------------------
0025 
0026     limit       - bytes (must be > qth_max + burst)
0027 
0028     Hard limit on queue length, should be chosen >qth_max
0029     to allow packet bursts. This parameter does not
0030     affect the algorithms behaviour and can be chosen
0031     arbitrarily high (well, less than ram size)
0032     Really, this limit will never be reached
0033     if RED works correctly.
0034  */
0035 
0036 struct red_sched_data {
0037     u32         limit;      /* HARD maximal queue length */
0038 
0039     unsigned char       flags;
0040     /* Non-flags in tc_red_qopt.flags. */
0041     unsigned char       userbits;
0042 
0043     struct timer_list   adapt_timer;
0044     struct Qdisc        *sch;
0045     struct red_parms    parms;
0046     struct red_vars     vars;
0047     struct red_stats    stats;
0048     struct Qdisc        *qdisc;
0049     struct tcf_qevent   qe_early_drop;
0050     struct tcf_qevent   qe_mark;
0051 };
0052 
0053 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
0054 
0055 static inline int red_use_ecn(struct red_sched_data *q)
0056 {
0057     return q->flags & TC_RED_ECN;
0058 }
0059 
0060 static inline int red_use_harddrop(struct red_sched_data *q)
0061 {
0062     return q->flags & TC_RED_HARDDROP;
0063 }
0064 
0065 static int red_use_nodrop(struct red_sched_data *q)
0066 {
0067     return q->flags & TC_RED_NODROP;
0068 }
0069 
0070 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0071                struct sk_buff **to_free)
0072 {
0073     struct red_sched_data *q = qdisc_priv(sch);
0074     struct Qdisc *child = q->qdisc;
0075     int ret;
0076 
0077     q->vars.qavg = red_calc_qavg(&q->parms,
0078                      &q->vars,
0079                      child->qstats.backlog);
0080 
0081     if (red_is_idling(&q->vars))
0082         red_end_of_idle_period(&q->vars);
0083 
0084     switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
0085     case RED_DONT_MARK:
0086         break;
0087 
0088     case RED_PROB_MARK:
0089         qdisc_qstats_overlimit(sch);
0090         if (!red_use_ecn(q)) {
0091             q->stats.prob_drop++;
0092             goto congestion_drop;
0093         }
0094 
0095         if (INET_ECN_set_ce(skb)) {
0096             q->stats.prob_mark++;
0097             skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
0098             if (!skb)
0099                 return NET_XMIT_CN | ret;
0100         } else if (!red_use_nodrop(q)) {
0101             q->stats.prob_drop++;
0102             goto congestion_drop;
0103         }
0104 
0105         /* Non-ECT packet in ECN nodrop mode: queue it. */
0106         break;
0107 
0108     case RED_HARD_MARK:
0109         qdisc_qstats_overlimit(sch);
0110         if (red_use_harddrop(q) || !red_use_ecn(q)) {
0111             q->stats.forced_drop++;
0112             goto congestion_drop;
0113         }
0114 
0115         if (INET_ECN_set_ce(skb)) {
0116             q->stats.forced_mark++;
0117             skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
0118             if (!skb)
0119                 return NET_XMIT_CN | ret;
0120         } else if (!red_use_nodrop(q)) {
0121             q->stats.forced_drop++;
0122             goto congestion_drop;
0123         }
0124 
0125         /* Non-ECT packet in ECN nodrop mode: queue it. */
0126         break;
0127     }
0128 
0129     ret = qdisc_enqueue(skb, child, to_free);
0130     if (likely(ret == NET_XMIT_SUCCESS)) {
0131         qdisc_qstats_backlog_inc(sch, skb);
0132         sch->q.qlen++;
0133     } else if (net_xmit_drop_count(ret)) {
0134         q->stats.pdrop++;
0135         qdisc_qstats_drop(sch);
0136     }
0137     return ret;
0138 
0139 congestion_drop:
0140     skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
0141     if (!skb)
0142         return NET_XMIT_CN | ret;
0143 
0144     qdisc_drop(skb, sch, to_free);
0145     return NET_XMIT_CN;
0146 }
0147 
0148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
0149 {
0150     struct sk_buff *skb;
0151     struct red_sched_data *q = qdisc_priv(sch);
0152     struct Qdisc *child = q->qdisc;
0153 
0154     skb = child->dequeue(child);
0155     if (skb) {
0156         qdisc_bstats_update(sch, skb);
0157         qdisc_qstats_backlog_dec(sch, skb);
0158         sch->q.qlen--;
0159     } else {
0160         if (!red_is_idling(&q->vars))
0161             red_start_of_idle_period(&q->vars);
0162     }
0163     return skb;
0164 }
0165 
0166 static struct sk_buff *red_peek(struct Qdisc *sch)
0167 {
0168     struct red_sched_data *q = qdisc_priv(sch);
0169     struct Qdisc *child = q->qdisc;
0170 
0171     return child->ops->peek(child);
0172 }
0173 
0174 static void red_reset(struct Qdisc *sch)
0175 {
0176     struct red_sched_data *q = qdisc_priv(sch);
0177 
0178     qdisc_reset(q->qdisc);
0179     sch->qstats.backlog = 0;
0180     sch->q.qlen = 0;
0181     red_restart(&q->vars);
0182 }
0183 
0184 static int red_offload(struct Qdisc *sch, bool enable)
0185 {
0186     struct red_sched_data *q = qdisc_priv(sch);
0187     struct net_device *dev = qdisc_dev(sch);
0188     struct tc_red_qopt_offload opt = {
0189         .handle = sch->handle,
0190         .parent = sch->parent,
0191     };
0192 
0193     if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
0194         return -EOPNOTSUPP;
0195 
0196     if (enable) {
0197         opt.command = TC_RED_REPLACE;
0198         opt.set.min = q->parms.qth_min >> q->parms.Wlog;
0199         opt.set.max = q->parms.qth_max >> q->parms.Wlog;
0200         opt.set.probability = q->parms.max_P;
0201         opt.set.limit = q->limit;
0202         opt.set.is_ecn = red_use_ecn(q);
0203         opt.set.is_harddrop = red_use_harddrop(q);
0204         opt.set.is_nodrop = red_use_nodrop(q);
0205         opt.set.qstats = &sch->qstats;
0206     } else {
0207         opt.command = TC_RED_DESTROY;
0208     }
0209 
0210     return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
0211 }
0212 
0213 static void red_destroy(struct Qdisc *sch)
0214 {
0215     struct red_sched_data *q = qdisc_priv(sch);
0216 
0217     tcf_qevent_destroy(&q->qe_mark, sch);
0218     tcf_qevent_destroy(&q->qe_early_drop, sch);
0219     del_timer_sync(&q->adapt_timer);
0220     red_offload(sch, false);
0221     qdisc_put(q->qdisc);
0222 }
0223 
0224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
0225     [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
0226     [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
0227     [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
0228     [TCA_RED_MAX_P] = { .type = NLA_U32 },
0229     [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
0230     [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
0231     [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
0232 };
0233 
0234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
0235             struct netlink_ext_ack *extack)
0236 {
0237     struct Qdisc *old_child = NULL, *child = NULL;
0238     struct red_sched_data *q = qdisc_priv(sch);
0239     struct nla_bitfield32 flags_bf;
0240     struct tc_red_qopt *ctl;
0241     unsigned char userbits;
0242     unsigned char flags;
0243     int err;
0244     u32 max_P;
0245     u8 *stab;
0246 
0247     if (tb[TCA_RED_PARMS] == NULL ||
0248         tb[TCA_RED_STAB] == NULL)
0249         return -EINVAL;
0250 
0251     max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
0252 
0253     ctl = nla_data(tb[TCA_RED_PARMS]);
0254     stab = nla_data(tb[TCA_RED_STAB]);
0255     if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
0256                   ctl->Scell_log, stab))
0257         return -EINVAL;
0258 
0259     err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
0260                 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
0261                 &flags_bf, &userbits, extack);
0262     if (err)
0263         return err;
0264 
0265     if (ctl->limit > 0) {
0266         child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
0267                      extack);
0268         if (IS_ERR(child))
0269             return PTR_ERR(child);
0270 
0271         /* child is fifo, no need to check for noop_qdisc */
0272         qdisc_hash_add(child, true);
0273     }
0274 
0275     sch_tree_lock(sch);
0276 
0277     flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
0278     err = red_validate_flags(flags, extack);
0279     if (err)
0280         goto unlock_out;
0281 
0282     q->flags = flags;
0283     q->userbits = userbits;
0284     q->limit = ctl->limit;
0285     if (child) {
0286         qdisc_tree_flush_backlog(q->qdisc);
0287         old_child = q->qdisc;
0288         q->qdisc = child;
0289     }
0290 
0291     red_set_parms(&q->parms,
0292               ctl->qth_min, ctl->qth_max, ctl->Wlog,
0293               ctl->Plog, ctl->Scell_log,
0294               stab,
0295               max_P);
0296     red_set_vars(&q->vars);
0297 
0298     del_timer(&q->adapt_timer);
0299     if (ctl->flags & TC_RED_ADAPTATIVE)
0300         mod_timer(&q->adapt_timer, jiffies + HZ/2);
0301 
0302     if (!q->qdisc->q.qlen)
0303         red_start_of_idle_period(&q->vars);
0304 
0305     sch_tree_unlock(sch);
0306 
0307     red_offload(sch, true);
0308 
0309     if (old_child)
0310         qdisc_put(old_child);
0311     return 0;
0312 
0313 unlock_out:
0314     sch_tree_unlock(sch);
0315     if (child)
0316         qdisc_put(child);
0317     return err;
0318 }
0319 
0320 static inline void red_adaptative_timer(struct timer_list *t)
0321 {
0322     struct red_sched_data *q = from_timer(q, t, adapt_timer);
0323     struct Qdisc *sch = q->sch;
0324     spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
0325 
0326     spin_lock(root_lock);
0327     red_adaptative_algo(&q->parms, &q->vars);
0328     mod_timer(&q->adapt_timer, jiffies + HZ/2);
0329     spin_unlock(root_lock);
0330 }
0331 
0332 static int red_init(struct Qdisc *sch, struct nlattr *opt,
0333             struct netlink_ext_ack *extack)
0334 {
0335     struct red_sched_data *q = qdisc_priv(sch);
0336     struct nlattr *tb[TCA_RED_MAX + 1];
0337     int err;
0338 
0339     q->qdisc = &noop_qdisc;
0340     q->sch = sch;
0341     timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
0342 
0343     if (!opt)
0344         return -EINVAL;
0345 
0346     err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
0347                       extack);
0348     if (err < 0)
0349         return err;
0350 
0351     err = __red_change(sch, tb, extack);
0352     if (err)
0353         return err;
0354 
0355     err = tcf_qevent_init(&q->qe_early_drop, sch,
0356                   FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
0357                   tb[TCA_RED_EARLY_DROP_BLOCK], extack);
0358     if (err)
0359         return err;
0360 
0361     return tcf_qevent_init(&q->qe_mark, sch,
0362                    FLOW_BLOCK_BINDER_TYPE_RED_MARK,
0363                    tb[TCA_RED_MARK_BLOCK], extack);
0364 }
0365 
0366 static int red_change(struct Qdisc *sch, struct nlattr *opt,
0367               struct netlink_ext_ack *extack)
0368 {
0369     struct red_sched_data *q = qdisc_priv(sch);
0370     struct nlattr *tb[TCA_RED_MAX + 1];
0371     int err;
0372 
0373     if (!opt)
0374         return -EINVAL;
0375 
0376     err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
0377                       extack);
0378     if (err < 0)
0379         return err;
0380 
0381     err = tcf_qevent_validate_change(&q->qe_early_drop,
0382                      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
0383     if (err)
0384         return err;
0385 
0386     err = tcf_qevent_validate_change(&q->qe_mark,
0387                      tb[TCA_RED_MARK_BLOCK], extack);
0388     if (err)
0389         return err;
0390 
0391     return __red_change(sch, tb, extack);
0392 }
0393 
0394 static int red_dump_offload_stats(struct Qdisc *sch)
0395 {
0396     struct tc_red_qopt_offload hw_stats = {
0397         .command = TC_RED_STATS,
0398         .handle = sch->handle,
0399         .parent = sch->parent,
0400         {
0401             .stats.bstats = &sch->bstats,
0402             .stats.qstats = &sch->qstats,
0403         },
0404     };
0405 
0406     return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
0407 }
0408 
0409 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
0410 {
0411     struct red_sched_data *q = qdisc_priv(sch);
0412     struct nlattr *opts = NULL;
0413     struct tc_red_qopt opt = {
0414         .limit      = q->limit,
0415         .flags      = (q->flags & TC_RED_HISTORIC_FLAGS) |
0416                   q->userbits,
0417         .qth_min    = q->parms.qth_min >> q->parms.Wlog,
0418         .qth_max    = q->parms.qth_max >> q->parms.Wlog,
0419         .Wlog       = q->parms.Wlog,
0420         .Plog       = q->parms.Plog,
0421         .Scell_log  = q->parms.Scell_log,
0422     };
0423     int err;
0424 
0425     err = red_dump_offload_stats(sch);
0426     if (err)
0427         goto nla_put_failure;
0428 
0429     opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0430     if (opts == NULL)
0431         goto nla_put_failure;
0432     if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
0433         nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
0434         nla_put_bitfield32(skb, TCA_RED_FLAGS,
0435                    q->flags, TC_RED_SUPPORTED_FLAGS) ||
0436         tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
0437         tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
0438         goto nla_put_failure;
0439     return nla_nest_end(skb, opts);
0440 
0441 nla_put_failure:
0442     nla_nest_cancel(skb, opts);
0443     return -EMSGSIZE;
0444 }
0445 
0446 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
0447 {
0448     struct red_sched_data *q = qdisc_priv(sch);
0449     struct net_device *dev = qdisc_dev(sch);
0450     struct tc_red_xstats st = {0};
0451 
0452     if (sch->flags & TCQ_F_OFFLOADED) {
0453         struct tc_red_qopt_offload hw_stats_request = {
0454             .command = TC_RED_XSTATS,
0455             .handle = sch->handle,
0456             .parent = sch->parent,
0457             {
0458                 .xstats = &q->stats,
0459             },
0460         };
0461         dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
0462                           &hw_stats_request);
0463     }
0464     st.early = q->stats.prob_drop + q->stats.forced_drop;
0465     st.pdrop = q->stats.pdrop;
0466     st.other = q->stats.other;
0467     st.marked = q->stats.prob_mark + q->stats.forced_mark;
0468 
0469     return gnet_stats_copy_app(d, &st, sizeof(st));
0470 }
0471 
0472 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
0473               struct sk_buff *skb, struct tcmsg *tcm)
0474 {
0475     struct red_sched_data *q = qdisc_priv(sch);
0476 
0477     tcm->tcm_handle |= TC_H_MIN(1);
0478     tcm->tcm_info = q->qdisc->handle;
0479     return 0;
0480 }
0481 
0482 static void red_graft_offload(struct Qdisc *sch,
0483                   struct Qdisc *new, struct Qdisc *old,
0484                   struct netlink_ext_ack *extack)
0485 {
0486     struct tc_red_qopt_offload graft_offload = {
0487         .handle     = sch->handle,
0488         .parent     = sch->parent,
0489         .child_handle   = new->handle,
0490         .command    = TC_RED_GRAFT,
0491     };
0492 
0493     qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
0494                    TC_SETUP_QDISC_RED, &graft_offload, extack);
0495 }
0496 
0497 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
0498              struct Qdisc **old, struct netlink_ext_ack *extack)
0499 {
0500     struct red_sched_data *q = qdisc_priv(sch);
0501 
0502     if (new == NULL)
0503         new = &noop_qdisc;
0504 
0505     *old = qdisc_replace(sch, new, &q->qdisc);
0506 
0507     red_graft_offload(sch, new, *old, extack);
0508     return 0;
0509 }
0510 
0511 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
0512 {
0513     struct red_sched_data *q = qdisc_priv(sch);
0514     return q->qdisc;
0515 }
0516 
0517 static unsigned long red_find(struct Qdisc *sch, u32 classid)
0518 {
0519     return 1;
0520 }
0521 
0522 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
0523 {
0524     if (!walker->stop) {
0525         if (walker->count >= walker->skip)
0526             if (walker->fn(sch, 1, walker) < 0) {
0527                 walker->stop = 1;
0528                 return;
0529             }
0530         walker->count++;
0531     }
0532 }
0533 
0534 static const struct Qdisc_class_ops red_class_ops = {
0535     .graft      =   red_graft,
0536     .leaf       =   red_leaf,
0537     .find       =   red_find,
0538     .walk       =   red_walk,
0539     .dump       =   red_dump_class,
0540 };
0541 
0542 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
0543     .id     =   "red",
0544     .priv_size  =   sizeof(struct red_sched_data),
0545     .cl_ops     =   &red_class_ops,
0546     .enqueue    =   red_enqueue,
0547     .dequeue    =   red_dequeue,
0548     .peek       =   red_peek,
0549     .init       =   red_init,
0550     .reset      =   red_reset,
0551     .destroy    =   red_destroy,
0552     .change     =   red_change,
0553     .dump       =   red_dump,
0554     .dump_stats =   red_dump_stats,
0555     .owner      =   THIS_MODULE,
0556 };
0557 
0558 static int __init red_module_init(void)
0559 {
0560     return register_qdisc(&red_qdisc_ops);
0561 }
0562 
0563 static void __exit red_module_exit(void)
0564 {
0565     unregister_qdisc(&red_qdisc_ops);
0566 }
0567 
0568 module_init(red_module_init)
0569 module_exit(red_module_exit)
0570 
0571 MODULE_LICENSE("GPL");