net/sched/sch_codel.c

0001 /*
0002  * Codel - The Controlled-Delay Active Queue Management algorithm
0003  *
0004  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
0005  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
0006  *
0007  *  Implemented on linux by :
0008  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
0009  *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
0010  *
0011  * Redistribution and use in source and binary forms, with or without
0012  * modification, are permitted provided that the following conditions
0013  * are met:
0014  * 1. Redistributions of source code must retain the above copyright
0015  *    notice, this list of conditions, and the following disclaimer,
0016  *    without modification.
0017  * 2. Redistributions in binary form must reproduce the above copyright
0018  *    notice, this list of conditions and the following disclaimer in the
0019  *    documentation and/or other materials provided with the distribution.
0020  * 3. The names of the authors may not be used to endorse or promote products
0021  *    derived from this software without specific prior written permission.
0022  *
0023  * Alternatively, provided that this notice is retained in full, this
0024  * software may be distributed under the terms of the GNU General
0025  * Public License ("GPL") version 2, in which case the provisions of the
0026  * GPL apply INSTEAD OF those given above.
0027  *
0028  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0029  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0030  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0031  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0032  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0033  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0034  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0035  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0036  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0037  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0038  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
0039  * DAMAGE.
0040  *
0041  */
0042
0043 #include <linux/module.h>
0044 #include <linux/slab.h>
0045 #include <linux/types.h>
0046 #include <linux/kernel.h>
0047 #include <linux/errno.h>
0048 #include <linux/skbuff.h>
0049 #include <linux/prefetch.h>
0050 #include <net/pkt_sched.h>
0051 #include <net/codel.h>
0052 #include <net/codel_impl.h>
0053 #include <net/codel_qdisc.h>
0054
0055
0056 #define DEFAULT_CODEL_LIMIT 1000
0057
0058 struct codel_sched_data {
0059     struct codel_params params;
0060     struct codel_vars   vars;
0061     struct codel_stats  stats;
0062     u32         drop_overlimit;
0063 };
0064
0065 /* This is the specific function called from codel_dequeue()
0066  * to dequeue a packet from queue. Note: backlog is handled in
0067  * codel, we dont need to reduce it here.
0068  */
0069 static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
0070 {
0071     struct Qdisc *sch = ctx;
0072     struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
0073
0074     if (skb) {
0075         sch->qstats.backlog -= qdisc_pkt_len(skb);
0076         prefetch(&skb->end); /* we'll need skb_shinfo() */
0077     }
0078     return skb;
0079 }
0080
0081 static void drop_func(struct sk_buff *skb, void *ctx)
0082 {
0083     struct Qdisc *sch = ctx;
0084
0085     kfree_skb(skb);
0086     qdisc_qstats_drop(sch);
0087 }
0088
0089 static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
0090 {
0091     struct codel_sched_data *q = qdisc_priv(sch);
0092     struct sk_buff *skb;
0093
0094     skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars,
0095                 &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
0096                 drop_func, dequeue_func);
0097
0098     /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
0099      * or HTB crashes. Defer it for next round.
0100      */
0101     if (q->stats.drop_count && sch->q.qlen) {
0102         qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
0103         q->stats.drop_count = 0;
0104         q->stats.drop_len = 0;
0105     }
0106     if (skb)
0107         qdisc_bstats_update(sch, skb);
0108     return skb;
0109 }
0110
0111 static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0112                    struct sk_buff **to_free)
0113 {
0114     struct codel_sched_data *q;
0115
0116     if (likely(qdisc_qlen(sch) < sch->limit)) {
0117         codel_set_enqueue_time(skb);
0118         return qdisc_enqueue_tail(skb, sch);
0119     }
0120     q = qdisc_priv(sch);
0121     q->drop_overlimit++;
0122     return qdisc_drop(skb, sch, to_free);
0123 }
0124
0125 static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
0126     [TCA_CODEL_TARGET]  = { .type = NLA_U32 },
0127     [TCA_CODEL_LIMIT]   = { .type = NLA_U32 },
0128     [TCA_CODEL_INTERVAL]    = { .type = NLA_U32 },
0129     [TCA_CODEL_ECN]     = { .type = NLA_U32 },
0130     [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
0131 };
0132
0133 static int codel_change(struct Qdisc *sch, struct nlattr *opt,
0134             struct netlink_ext_ack *extack)
0135 {
0136     struct codel_sched_data *q = qdisc_priv(sch);
0137     struct nlattr *tb[TCA_CODEL_MAX + 1];
0138     unsigned int qlen, dropped = 0;
0139     int err;
0140
0141     if (!opt)
0142         return -EINVAL;
0143
0144     err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
0145                       codel_policy, NULL);
0146     if (err < 0)
0147         return err;
0148
0149     sch_tree_lock(sch);
0150
0151     if (tb[TCA_CODEL_TARGET]) {
0152         u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
0153
0154         q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
0155     }
0156
0157     if (tb[TCA_CODEL_CE_THRESHOLD]) {
0158         u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
0159
0160         q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
0161     }
0162
0163     if (tb[TCA_CODEL_INTERVAL]) {
0164         u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
0165
0166         q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
0167     }
0168
0169     if (tb[TCA_CODEL_LIMIT])
0170         sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
0171
0172     if (tb[TCA_CODEL_ECN])
0173         q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
0174
0175     qlen = sch->q.qlen;
0176     while (sch->q.qlen > sch->limit) {
0177         struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
0178
0179         dropped += qdisc_pkt_len(skb);
0180         qdisc_qstats_backlog_dec(sch, skb);
0181         rtnl_qdisc_drop(skb, sch);
0182     }
0183     qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
0184
0185     sch_tree_unlock(sch);
0186     return 0;
0187 }
0188
0189 static int codel_init(struct Qdisc *sch, struct nlattr *opt,
0190               struct netlink_ext_ack *extack)
0191 {
0192     struct codel_sched_data *q = qdisc_priv(sch);
0193
0194     sch->limit = DEFAULT_CODEL_LIMIT;
0195
0196     codel_params_init(&q->params);
0197     codel_vars_init(&q->vars);
0198     codel_stats_init(&q->stats);
0199     q->params.mtu = psched_mtu(qdisc_dev(sch));
0200
0201     if (opt) {
0202         int err = codel_change(sch, opt, extack);
0203
0204         if (err)
0205             return err;
0206     }
0207
0208     if (sch->limit >= 1)
0209         sch->flags |= TCQ_F_CAN_BYPASS;
0210     else
0211         sch->flags &= ~TCQ_F_CAN_BYPASS;
0212
0213     return 0;
0214 }
0215
0216 static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
0217 {
0218     struct codel_sched_data *q = qdisc_priv(sch);
0219     struct nlattr *opts;
0220
0221     opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0222     if (opts == NULL)
0223         goto nla_put_failure;
0224
0225     if (nla_put_u32(skb, TCA_CODEL_TARGET,
0226             codel_time_to_us(q->params.target)) ||
0227         nla_put_u32(skb, TCA_CODEL_LIMIT,
0228             sch->limit) ||
0229         nla_put_u32(skb, TCA_CODEL_INTERVAL,
0230             codel_time_to_us(q->params.interval)) ||
0231         nla_put_u32(skb, TCA_CODEL_ECN,
0232             q->params.ecn))
0233         goto nla_put_failure;
0234     if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
0235         nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
0236             codel_time_to_us(q->params.ce_threshold)))
0237         goto nla_put_failure;
0238     return nla_nest_end(skb, opts);
0239
0240 nla_put_failure:
0241     nla_nest_cancel(skb, opts);
0242     return -1;
0243 }
0244
0245 static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
0246 {
0247     const struct codel_sched_data *q = qdisc_priv(sch);
0248     struct tc_codel_xstats st = {
0249         .maxpacket  = q->stats.maxpacket,
0250         .count      = q->vars.count,
0251         .lastcount  = q->vars.lastcount,
0252         .drop_overlimit = q->drop_overlimit,
0253         .ldelay     = codel_time_to_us(q->vars.ldelay),
0254         .dropping   = q->vars.dropping,
0255         .ecn_mark   = q->stats.ecn_mark,
0256         .ce_mark    = q->stats.ce_mark,
0257     };
0258
0259     if (q->vars.dropping) {
0260         codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
0261
0262         if (delta >= 0)
0263             st.drop_next = codel_time_to_us(delta);
0264         else
0265             st.drop_next = -codel_time_to_us(-delta);
0266     }
0267
0268     return gnet_stats_copy_app(d, &st, sizeof(st));
0269 }
0270
0271 static void codel_reset(struct Qdisc *sch)
0272 {
0273     struct codel_sched_data *q = qdisc_priv(sch);
0274
0275     qdisc_reset_queue(sch);
0276     codel_vars_init(&q->vars);
0277 }
0278
0279 static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
0280     .id     =   "codel",
0281     .priv_size  =   sizeof(struct codel_sched_data),
0282
0283     .enqueue    =   codel_qdisc_enqueue,
0284     .dequeue    =   codel_qdisc_dequeue,
0285     .peek       =   qdisc_peek_dequeued,
0286     .init       =   codel_init,
0287     .reset      =   codel_reset,
0288     .change     =   codel_change,
0289     .dump       =   codel_dump,
0290     .dump_stats =   codel_dump_stats,
0291     .owner      =   THIS_MODULE,
0292 };
0293
0294 static int __init codel_module_init(void)
0295 {
0296     return register_qdisc(&codel_qdisc_ops);
0297 }
0298
0299 static void __exit codel_module_exit(void)
0300 {
0301     unregister_qdisc(&codel_qdisc_ops);
0302 }
0303
0304 module_init(codel_module_init)
0305 module_exit(codel_module_exit)
0306
0307 MODULE_DESCRIPTION("Controlled Delay queue discipline");
0308 MODULE_AUTHOR("Dave Taht");
0309 MODULE_AUTHOR("Eric Dumazet");
0310 MODULE_LICENSE("Dual BSD/GPL");