net/sched/sch_gred.c

0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/sch_gred.c Generic Random Early Detection queue.
0004  *
0005  * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
0006  *
0007  *             991129: -  Bug fix with grio mode
0008  *             - a better sing. AvgQ mode with Grio(WRED)
0009  *             - A finer grained VQ dequeue based on suggestion
0010  *               from Ren Liu
0011  *             - More error checks
0012  *
0013  *  For all the glorious comments look at include/net/red.h
0014  */
0015
0016 #include <linux/slab.h>
0017 #include <linux/module.h>
0018 #include <linux/types.h>
0019 #include <linux/kernel.h>
0020 #include <linux/skbuff.h>
0021 #include <net/pkt_cls.h>
0022 #include <net/pkt_sched.h>
0023 #include <net/red.h>
0024
0025 #define GRED_DEF_PRIO (MAX_DPs / 2)
0026 #define GRED_VQ_MASK (MAX_DPs - 1)
0027
0028 #define GRED_VQ_RED_FLAGS   (TC_RED_ECN | TC_RED_HARDDROP)
0029
0030 struct gred_sched_data;
0031 struct gred_sched;
0032
0033 struct gred_sched_data {
0034     u32     limit;      /* HARD maximal queue length    */
0035     u32     DP;     /* the drop parameters */
0036     u32     red_flags;  /* virtualQ version of red_flags */
0037     u64     bytesin;    /* bytes seen on virtualQ so far*/
0038     u32     packetsin;  /* packets seen on virtualQ so far*/
0039     u32     backlog;    /* bytes on the virtualQ */
0040     u8      prio;       /* the prio of this vq */
0041
0042     struct red_parms parms;
0043     struct red_vars  vars;
0044     struct red_stats stats;
0045 };
0046
0047 enum {
0048     GRED_WRED_MODE = 1,
0049     GRED_RIO_MODE,
0050 };
0051
0052 struct gred_sched {
0053     struct gred_sched_data *tab[MAX_DPs];
0054     unsigned long   flags;
0055     u32     red_flags;
0056     u32         DPs;
0057     u32         def;
0058     struct red_vars wred_set;
0059     struct tc_gred_qopt_offload *opt;
0060 };
0061
0062 static inline int gred_wred_mode(struct gred_sched *table)
0063 {
0064     return test_bit(GRED_WRED_MODE, &table->flags);
0065 }
0066
0067 static inline void gred_enable_wred_mode(struct gred_sched *table)
0068 {
0069     __set_bit(GRED_WRED_MODE, &table->flags);
0070 }
0071
0072 static inline void gred_disable_wred_mode(struct gred_sched *table)
0073 {
0074     __clear_bit(GRED_WRED_MODE, &table->flags);
0075 }
0076
0077 static inline int gred_rio_mode(struct gred_sched *table)
0078 {
0079     return test_bit(GRED_RIO_MODE, &table->flags);
0080 }
0081
0082 static inline void gred_enable_rio_mode(struct gred_sched *table)
0083 {
0084     __set_bit(GRED_RIO_MODE, &table->flags);
0085 }
0086
0087 static inline void gred_disable_rio_mode(struct gred_sched *table)
0088 {
0089     __clear_bit(GRED_RIO_MODE, &table->flags);
0090 }
0091
0092 static inline int gred_wred_mode_check(struct Qdisc *sch)
0093 {
0094     struct gred_sched *table = qdisc_priv(sch);
0095     int i;
0096
0097     /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
0098     for (i = 0; i < table->DPs; i++) {
0099         struct gred_sched_data *q = table->tab[i];
0100         int n;
0101
0102         if (q == NULL)
0103             continue;
0104
0105         for (n = i + 1; n < table->DPs; n++)
0106             if (table->tab[n] && table->tab[n]->prio == q->prio)
0107                 return 1;
0108     }
0109
0110     return 0;
0111 }
0112
0113 static inline unsigned int gred_backlog(struct gred_sched *table,
0114                     struct gred_sched_data *q,
0115                     struct Qdisc *sch)
0116 {
0117     if (gred_wred_mode(table))
0118         return sch->qstats.backlog;
0119     else
0120         return q->backlog;
0121 }
0122
0123 static inline u16 tc_index_to_dp(struct sk_buff *skb)
0124 {
0125     return skb->tc_index & GRED_VQ_MASK;
0126 }
0127
0128 static inline void gred_load_wred_set(const struct gred_sched *table,
0129                       struct gred_sched_data *q)
0130 {
0131     q->vars.qavg = table->wred_set.qavg;
0132     q->vars.qidlestart = table->wred_set.qidlestart;
0133 }
0134
0135 static inline void gred_store_wred_set(struct gred_sched *table,
0136                        struct gred_sched_data *q)
0137 {
0138     table->wred_set.qavg = q->vars.qavg;
0139     table->wred_set.qidlestart = q->vars.qidlestart;
0140 }
0141
0142 static int gred_use_ecn(struct gred_sched_data *q)
0143 {
0144     return q->red_flags & TC_RED_ECN;
0145 }
0146
0147 static int gred_use_harddrop(struct gred_sched_data *q)
0148 {
0149     return q->red_flags & TC_RED_HARDDROP;
0150 }
0151
0152 static bool gred_per_vq_red_flags_used(struct gred_sched *table)
0153 {
0154     unsigned int i;
0155
0156     /* Local per-vq flags couldn't have been set unless global are 0 */
0157     if (table->red_flags)
0158         return false;
0159     for (i = 0; i < MAX_DPs; i++)
0160         if (table->tab[i] && table->tab[i]->red_flags)
0161             return true;
0162     return false;
0163 }
0164
0165 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0166             struct sk_buff **to_free)
0167 {
0168     struct gred_sched_data *q = NULL;
0169     struct gred_sched *t = qdisc_priv(sch);
0170     unsigned long qavg = 0;
0171     u16 dp = tc_index_to_dp(skb);
0172
0173     if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
0174         dp = t->def;
0175
0176         q = t->tab[dp];
0177         if (!q) {
0178             /* Pass through packets not assigned to a DP
0179              * if no default DP has been configured. This
0180              * allows for DP flows to be left untouched.
0181              */
0182             if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
0183                     sch->limit))
0184                 return qdisc_enqueue_tail(skb, sch);
0185             else
0186                 goto drop;
0187         }
0188
0189         /* fix tc_index? --could be controversial but needed for
0190            requeueing */
0191         skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
0192     }
0193
0194     /* sum up all the qaves of prios < ours to get the new qave */
0195     if (!gred_wred_mode(t) && gred_rio_mode(t)) {
0196         int i;
0197
0198         for (i = 0; i < t->DPs; i++) {
0199             if (t->tab[i] && t->tab[i]->prio < q->prio &&
0200                 !red_is_idling(&t->tab[i]->vars))
0201                 qavg += t->tab[i]->vars.qavg;
0202         }
0203
0204     }
0205
0206     q->packetsin++;
0207     q->bytesin += qdisc_pkt_len(skb);
0208
0209     if (gred_wred_mode(t))
0210         gred_load_wred_set(t, q);
0211
0212     q->vars.qavg = red_calc_qavg(&q->parms,
0213                      &q->vars,
0214                      gred_backlog(t, q, sch));
0215
0216     if (red_is_idling(&q->vars))
0217         red_end_of_idle_period(&q->vars);
0218
0219     if (gred_wred_mode(t))
0220         gred_store_wred_set(t, q);
0221
0222     switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) {
0223     case RED_DONT_MARK:
0224         break;
0225
0226     case RED_PROB_MARK:
0227         qdisc_qstats_overlimit(sch);
0228         if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) {
0229             q->stats.prob_drop++;
0230             goto congestion_drop;
0231         }
0232
0233         q->stats.prob_mark++;
0234         break;
0235
0236     case RED_HARD_MARK:
0237         qdisc_qstats_overlimit(sch);
0238         if (gred_use_harddrop(q) || !gred_use_ecn(q) ||
0239             !INET_ECN_set_ce(skb)) {
0240             q->stats.forced_drop++;
0241             goto congestion_drop;
0242         }
0243         q->stats.forced_mark++;
0244         break;
0245     }
0246
0247     if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) {
0248         q->backlog += qdisc_pkt_len(skb);
0249         return qdisc_enqueue_tail(skb, sch);
0250     }
0251
0252     q->stats.pdrop++;
0253 drop:
0254     return qdisc_drop(skb, sch, to_free);
0255
0256 congestion_drop:
0257     qdisc_drop(skb, sch, to_free);
0258     return NET_XMIT_CN;
0259 }
0260
0261 static struct sk_buff *gred_dequeue(struct Qdisc *sch)
0262 {
0263     struct sk_buff *skb;
0264     struct gred_sched *t = qdisc_priv(sch);
0265
0266     skb = qdisc_dequeue_head(sch);
0267
0268     if (skb) {
0269         struct gred_sched_data *q;
0270         u16 dp = tc_index_to_dp(skb);
0271
0272         if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
0273             net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n",
0274                          tc_index_to_dp(skb));
0275         } else {
0276             q->backlog -= qdisc_pkt_len(skb);
0277
0278             if (gred_wred_mode(t)) {
0279                 if (!sch->qstats.backlog)
0280                     red_start_of_idle_period(&t->wred_set);
0281             } else {
0282                 if (!q->backlog)
0283                     red_start_of_idle_period(&q->vars);
0284             }
0285         }
0286
0287         return skb;
0288     }
0289
0290     return NULL;
0291 }
0292
0293 static void gred_reset(struct Qdisc *sch)
0294 {
0295     int i;
0296     struct gred_sched *t = qdisc_priv(sch);
0297
0298     qdisc_reset_queue(sch);
0299
0300     for (i = 0; i < t->DPs; i++) {
0301         struct gred_sched_data *q = t->tab[i];
0302
0303         if (!q)
0304             continue;
0305
0306         red_restart(&q->vars);
0307         q->backlog = 0;
0308     }
0309 }
0310
0311 static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
0312 {
0313     struct gred_sched *table = qdisc_priv(sch);
0314     struct net_device *dev = qdisc_dev(sch);
0315     struct tc_gred_qopt_offload *opt = table->opt;
0316
0317     if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
0318         return;
0319
0320     memset(opt, 0, sizeof(*opt));
0321     opt->command = command;
0322     opt->handle = sch->handle;
0323     opt->parent = sch->parent;
0324
0325     if (command == TC_GRED_REPLACE) {
0326         unsigned int i;
0327
0328         opt->set.grio_on = gred_rio_mode(table);
0329         opt->set.wred_on = gred_wred_mode(table);
0330         opt->set.dp_cnt = table->DPs;
0331         opt->set.dp_def = table->def;
0332
0333         for (i = 0; i < table->DPs; i++) {
0334             struct gred_sched_data *q = table->tab[i];
0335
0336             if (!q)
0337                 continue;
0338             opt->set.tab[i].present = true;
0339             opt->set.tab[i].limit = q->limit;
0340             opt->set.tab[i].prio = q->prio;
0341             opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
0342             opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
0343             opt->set.tab[i].is_ecn = gred_use_ecn(q);
0344             opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
0345             opt->set.tab[i].probability = q->parms.max_P;
0346             opt->set.tab[i].backlog = &q->backlog;
0347         }
0348         opt->set.qstats = &sch->qstats;
0349     }
0350
0351     dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
0352 }
0353
0354 static int gred_offload_dump_stats(struct Qdisc *sch)
0355 {
0356     struct gred_sched *table = qdisc_priv(sch);
0357     struct tc_gred_qopt_offload *hw_stats;
0358     u64 bytes = 0, packets = 0;
0359     unsigned int i;
0360     int ret;
0361
0362     hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL);
0363     if (!hw_stats)
0364         return -ENOMEM;
0365
0366     hw_stats->command = TC_GRED_STATS;
0367     hw_stats->handle = sch->handle;
0368     hw_stats->parent = sch->parent;
0369
0370     for (i = 0; i < MAX_DPs; i++) {
0371         gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
0372         if (table->tab[i])
0373             hw_stats->stats.xstats[i] = &table->tab[i]->stats;
0374     }
0375
0376     ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
0377     /* Even if driver returns failure adjust the stats - in case offload
0378      * ended but driver still wants to adjust the values.
0379      */
0380     for (i = 0; i < MAX_DPs; i++) {
0381         if (!table->tab[i])
0382             continue;
0383         table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
0384         table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
0385         table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
0386
0387         bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
0388         packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
0389         sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
0390         sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
0391         sch->qstats.drops += hw_stats->stats.qstats[i].drops;
0392         sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
0393         sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
0394     }
0395     _bstats_update(&sch->bstats, bytes, packets);
0396
0397     kfree(hw_stats);
0398     return ret;
0399 }
0400
0401 static inline void gred_destroy_vq(struct gred_sched_data *q)
0402 {
0403     kfree(q);
0404 }
0405
0406 static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps,
0407                  struct netlink_ext_ack *extack)
0408 {
0409     struct gred_sched *table = qdisc_priv(sch);
0410     struct tc_gred_sopt *sopt;
0411     bool red_flags_changed;
0412     int i;
0413
0414     if (!dps)
0415         return -EINVAL;
0416
0417     sopt = nla_data(dps);
0418
0419     if (sopt->DPs > MAX_DPs) {
0420         NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high");
0421         return -EINVAL;
0422     }
0423     if (sopt->DPs == 0) {
0424         NL_SET_ERR_MSG_MOD(extack,
0425                    "number of virtual queues can't be 0");
0426         return -EINVAL;
0427     }
0428     if (sopt->def_DP >= sopt->DPs) {
0429         NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count");
0430         return -EINVAL;
0431     }
0432     if (sopt->flags && gred_per_vq_red_flags_used(table)) {
0433         NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used");
0434         return -EINVAL;
0435     }
0436
0437     sch_tree_lock(sch);
0438     table->DPs = sopt->DPs;
0439     table->def = sopt->def_DP;
0440     red_flags_changed = table->red_flags != sopt->flags;
0441     table->red_flags = sopt->flags;
0442
0443     /*
0444      * Every entry point to GRED is synchronized with the above code
0445      * and the DP is checked against DPs, i.e. shadowed VQs can no
0446      * longer be found so we can unlock right here.
0447      */
0448     sch_tree_unlock(sch);
0449
0450     if (sopt->grio) {
0451         gred_enable_rio_mode(table);
0452         gred_disable_wred_mode(table);
0453         if (gred_wred_mode_check(sch))
0454             gred_enable_wred_mode(table);
0455     } else {
0456         gred_disable_rio_mode(table);
0457         gred_disable_wred_mode(table);
0458     }
0459
0460     if (red_flags_changed)
0461         for (i = 0; i < table->DPs; i++)
0462             if (table->tab[i])
0463                 table->tab[i]->red_flags =
0464                     table->red_flags & GRED_VQ_RED_FLAGS;
0465
0466     for (i = table->DPs; i < MAX_DPs; i++) {
0467         if (table->tab[i]) {
0468             pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
0469                 i);
0470             gred_destroy_vq(table->tab[i]);
0471             table->tab[i] = NULL;
0472         }
0473     }
0474
0475     gred_offload(sch, TC_GRED_REPLACE);
0476     return 0;
0477 }
0478
0479 static inline int gred_change_vq(struct Qdisc *sch, int dp,
0480                  struct tc_gred_qopt *ctl, int prio,
0481                  u8 *stab, u32 max_P,
0482                  struct gred_sched_data **prealloc,
0483                  struct netlink_ext_ack *extack)
0484 {
0485     struct gred_sched *table = qdisc_priv(sch);
0486     struct gred_sched_data *q = table->tab[dp];
0487
0488     if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) {
0489         NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
0490         return -EINVAL;
0491     }
0492
0493     if (!q) {
0494         table->tab[dp] = q = *prealloc;
0495         *prealloc = NULL;
0496         if (!q)
0497             return -ENOMEM;
0498         q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS;
0499     }
0500
0501     q->DP = dp;
0502     q->prio = prio;
0503     if (ctl->limit > sch->limit)
0504         q->limit = sch->limit;
0505     else
0506         q->limit = ctl->limit;
0507
0508     if (q->backlog == 0)
0509         red_end_of_idle_period(&q->vars);
0510
0511     red_set_parms(&q->parms,
0512               ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
0513               ctl->Scell_log, stab, max_P);
0514     red_set_vars(&q->vars);
0515     return 0;
0516 }
0517
0518 static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = {
0519     [TCA_GRED_VQ_DP]    = { .type = NLA_U32 },
0520     [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 },
0521 };
0522
0523 static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = {
0524     [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED },
0525 };
0526
0527 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
0528     [TCA_GRED_PARMS]    = { .len = sizeof(struct tc_gred_qopt) },
0529     [TCA_GRED_STAB]     = { .len = 256 },
0530     [TCA_GRED_DPS]      = { .len = sizeof(struct tc_gred_sopt) },
0531     [TCA_GRED_MAX_P]    = { .type = NLA_U32 },
0532     [TCA_GRED_LIMIT]    = { .type = NLA_U32 },
0533     [TCA_GRED_VQ_LIST]  = { .type = NLA_NESTED },
0534 };
0535
0536 static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry)
0537 {
0538     struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
0539     u32 dp;
0540
0541     nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
0542                     gred_vq_policy, NULL);
0543
0544     dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
0545
0546     if (tb[TCA_GRED_VQ_FLAGS])
0547         table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
0548 }
0549
0550 static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs)
0551 {
0552     const struct nlattr *attr;
0553     int rem;
0554
0555     nla_for_each_nested(attr, vqs, rem) {
0556         switch (nla_type(attr)) {
0557         case TCA_GRED_VQ_ENTRY:
0558             gred_vq_apply(table, attr);
0559             break;
0560         }
0561     }
0562 }
0563
0564 static int gred_vq_validate(struct gred_sched *table, u32 cdp,
0565                 const struct nlattr *entry,
0566                 struct netlink_ext_ack *extack)
0567 {
0568     struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
0569     int err;
0570     u32 dp;
0571
0572     err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
0573                       gred_vq_policy, extack);
0574     if (err < 0)
0575         return err;
0576
0577     if (!tb[TCA_GRED_VQ_DP]) {
0578         NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified");
0579         return -EINVAL;
0580     }
0581     dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
0582     if (dp >= table->DPs) {
0583         NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds");
0584         return -EINVAL;
0585     }
0586     if (dp != cdp && !table->tab[dp]) {
0587         NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated");
0588         return -EINVAL;
0589     }
0590
0591     if (tb[TCA_GRED_VQ_FLAGS]) {
0592         u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
0593
0594         if (table->red_flags && table->red_flags != red_flags) {
0595             NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used");
0596             return -EINVAL;
0597         }
0598         if (red_flags & ~GRED_VQ_RED_FLAGS) {
0599             NL_SET_ERR_MSG_MOD(extack,
0600                        "invalid RED flags specified");
0601             return -EINVAL;
0602         }
0603     }
0604
0605     return 0;
0606 }
0607
0608 static int gred_vqs_validate(struct gred_sched *table, u32 cdp,
0609                  struct nlattr *vqs, struct netlink_ext_ack *extack)
0610 {
0611     const struct nlattr *attr;
0612     int rem, err;
0613
0614     err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX,
0615                          gred_vqe_policy, extack);
0616     if (err < 0)
0617         return err;
0618
0619     nla_for_each_nested(attr, vqs, rem) {
0620         switch (nla_type(attr)) {
0621         case TCA_GRED_VQ_ENTRY:
0622             err = gred_vq_validate(table, cdp, attr, extack);
0623             if (err)
0624                 return err;
0625             break;
0626         default:
0627             NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes");
0628             return -EINVAL;
0629         }
0630     }
0631
0632     if (rem > 0) {
0633         NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list");
0634         return -EINVAL;
0635     }
0636
0637     return 0;
0638 }
0639
0640 static int gred_change(struct Qdisc *sch, struct nlattr *opt,
0641                struct netlink_ext_ack *extack)
0642 {
0643     struct gred_sched *table = qdisc_priv(sch);
0644     struct tc_gred_qopt *ctl;
0645     struct nlattr *tb[TCA_GRED_MAX + 1];
0646     int err, prio = GRED_DEF_PRIO;
0647     u8 *stab;
0648     u32 max_P;
0649     struct gred_sched_data *prealloc;
0650
0651     if (opt == NULL)
0652         return -EINVAL;
0653
0654     err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
0655                       extack);
0656     if (err < 0)
0657         return err;
0658
0659     if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
0660         if (tb[TCA_GRED_LIMIT] != NULL)
0661             sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
0662         return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
0663     }
0664
0665     if (tb[TCA_GRED_PARMS] == NULL ||
0666         tb[TCA_GRED_STAB] == NULL ||
0667         tb[TCA_GRED_LIMIT] != NULL) {
0668         NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time");
0669         return -EINVAL;
0670     }
0671
0672     max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
0673
0674     ctl = nla_data(tb[TCA_GRED_PARMS]);
0675     stab = nla_data(tb[TCA_GRED_STAB]);
0676
0677     if (ctl->DP >= table->DPs) {
0678         NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count");
0679         return -EINVAL;
0680     }
0681
0682     if (tb[TCA_GRED_VQ_LIST]) {
0683         err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST],
0684                     extack);
0685         if (err)
0686             return err;
0687     }
0688
0689     if (gred_rio_mode(table)) {
0690         if (ctl->prio == 0) {
0691             int def_prio = GRED_DEF_PRIO;
0692
0693             if (table->tab[table->def])
0694                 def_prio = table->tab[table->def]->prio;
0695
0696             printk(KERN_DEBUG "GRED: DP %u does not have a prio "
0697                    "setting default to %d\n", ctl->DP, def_prio);
0698
0699             prio = def_prio;
0700         } else
0701             prio = ctl->prio;
0702     }
0703
0704     prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
0705     sch_tree_lock(sch);
0706
0707     err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc,
0708                  extack);
0709     if (err < 0)
0710         goto err_unlock_free;
0711
0712     if (tb[TCA_GRED_VQ_LIST])
0713         gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]);
0714
0715     if (gred_rio_mode(table)) {
0716         gred_disable_wred_mode(table);
0717         if (gred_wred_mode_check(sch))
0718             gred_enable_wred_mode(table);
0719     }
0720
0721     sch_tree_unlock(sch);
0722     kfree(prealloc);
0723
0724     gred_offload(sch, TC_GRED_REPLACE);
0725     return 0;
0726
0727 err_unlock_free:
0728     sch_tree_unlock(sch);
0729     kfree(prealloc);
0730     return err;
0731 }
0732
0733 static int gred_init(struct Qdisc *sch, struct nlattr *opt,
0734              struct netlink_ext_ack *extack)
0735 {
0736     struct gred_sched *table = qdisc_priv(sch);
0737     struct nlattr *tb[TCA_GRED_MAX + 1];
0738     int err;
0739
0740     if (!opt)
0741         return -EINVAL;
0742
0743     err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
0744                       extack);
0745     if (err < 0)
0746         return err;
0747
0748     if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) {
0749         NL_SET_ERR_MSG_MOD(extack,
0750                    "virtual queue configuration can't be specified at initialization time");
0751         return -EINVAL;
0752     }
0753
0754     if (tb[TCA_GRED_LIMIT])
0755         sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
0756     else
0757         sch->limit = qdisc_dev(sch)->tx_queue_len
0758                      * psched_mtu(qdisc_dev(sch));
0759
0760     if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
0761         table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
0762         if (!table->opt)
0763             return -ENOMEM;
0764     }
0765
0766     return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
0767 }
0768
0769 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
0770 {
0771     struct gred_sched *table = qdisc_priv(sch);
0772     struct nlattr *parms, *vqs, *opts = NULL;
0773     int i;
0774     u32 max_p[MAX_DPs];
0775     struct tc_gred_sopt sopt = {
0776         .DPs    = table->DPs,
0777         .def_DP = table->def,
0778         .grio   = gred_rio_mode(table),
0779         .flags  = table->red_flags,
0780     };
0781
0782     if (gred_offload_dump_stats(sch))
0783         goto nla_put_failure;
0784
0785     opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0786     if (opts == NULL)
0787         goto nla_put_failure;
0788     if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt))
0789         goto nla_put_failure;
0790
0791     for (i = 0; i < MAX_DPs; i++) {
0792         struct gred_sched_data *q = table->tab[i];
0793
0794         max_p[i] = q ? q->parms.max_P : 0;
0795     }
0796     if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
0797         goto nla_put_failure;
0798
0799     if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
0800         goto nla_put_failure;
0801
0802     /* Old style all-in-one dump of VQs */
0803     parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS);
0804     if (parms == NULL)
0805         goto nla_put_failure;
0806
0807     for (i = 0; i < MAX_DPs; i++) {
0808         struct gred_sched_data *q = table->tab[i];
0809         struct tc_gred_qopt opt;
0810         unsigned long qavg;
0811
0812         memset(&opt, 0, sizeof(opt));
0813
0814         if (!q) {
0815             /* hack -- fix at some point with proper message
0816                This is how we indicate to tc that there is no VQ
0817                at this DP */
0818
0819             opt.DP = MAX_DPs + i;
0820             goto append_opt;
0821         }
0822
0823         opt.limit   = q->limit;
0824         opt.DP      = q->DP;
0825         opt.backlog = gred_backlog(table, q, sch);
0826         opt.prio    = q->prio;
0827         opt.qth_min = q->parms.qth_min >> q->parms.Wlog;
0828         opt.qth_max = q->parms.qth_max >> q->parms.Wlog;
0829         opt.Wlog    = q->parms.Wlog;
0830         opt.Plog    = q->parms.Plog;
0831         opt.Scell_log   = q->parms.Scell_log;
0832         opt.other   = q->stats.other;
0833         opt.early   = q->stats.prob_drop;
0834         opt.forced  = q->stats.forced_drop;
0835         opt.pdrop   = q->stats.pdrop;
0836         opt.packets = q->packetsin;
0837         opt.bytesin = q->bytesin;
0838
0839         if (gred_wred_mode(table))
0840             gred_load_wred_set(table, q);
0841
0842         qavg = red_calc_qavg(&q->parms, &q->vars,
0843                      q->vars.qavg >> q->parms.Wlog);
0844         opt.qave = qavg >> q->parms.Wlog;
0845
0846 append_opt:
0847         if (nla_append(skb, sizeof(opt), &opt) < 0)
0848             goto nla_put_failure;
0849     }
0850
0851     nla_nest_end(skb, parms);
0852
0853     /* Dump the VQs again, in more structured way */
0854     vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST);
0855     if (!vqs)
0856         goto nla_put_failure;
0857
0858     for (i = 0; i < MAX_DPs; i++) {
0859         struct gred_sched_data *q = table->tab[i];
0860         struct nlattr *vq;
0861
0862         if (!q)
0863             continue;
0864
0865         vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY);
0866         if (!vq)
0867             goto nla_put_failure;
0868
0869         if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP))
0870             goto nla_put_failure;
0871
0872         if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags))
0873             goto nla_put_failure;
0874
0875         /* Stats */
0876         if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin,
0877                       TCA_GRED_VQ_PAD))
0878             goto nla_put_failure;
0879         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin))
0880             goto nla_put_failure;
0881         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG,
0882                 gred_backlog(table, q, sch)))
0883             goto nla_put_failure;
0884         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP,
0885                 q->stats.prob_drop))
0886             goto nla_put_failure;
0887         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK,
0888                 q->stats.prob_mark))
0889             goto nla_put_failure;
0890         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP,
0891                 q->stats.forced_drop))
0892             goto nla_put_failure;
0893         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK,
0894                 q->stats.forced_mark))
0895             goto nla_put_failure;
0896         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
0897             goto nla_put_failure;
0898         if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
0899             goto nla_put_failure;
0900
0901         nla_nest_end(skb, vq);
0902     }
0903     nla_nest_end(skb, vqs);
0904
0905     return nla_nest_end(skb, opts);
0906
0907 nla_put_failure:
0908     nla_nest_cancel(skb, opts);
0909     return -EMSGSIZE;
0910 }
0911
0912 static void gred_destroy(struct Qdisc *sch)
0913 {
0914     struct gred_sched *table = qdisc_priv(sch);
0915     int i;
0916
0917     for (i = 0; i < table->DPs; i++) {
0918         if (table->tab[i])
0919             gred_destroy_vq(table->tab[i]);
0920     }
0921     gred_offload(sch, TC_GRED_DESTROY);
0922     kfree(table->opt);
0923 }
0924
0925 static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
0926     .id     =   "gred",
0927     .priv_size  =   sizeof(struct gred_sched),
0928     .enqueue    =   gred_enqueue,
0929     .dequeue    =   gred_dequeue,
0930     .peek       =   qdisc_peek_head,
0931     .init       =   gred_init,
0932     .reset      =   gred_reset,
0933     .destroy    =   gred_destroy,
0934     .change     =   gred_change,
0935     .dump       =   gred_dump,
0936     .owner      =   THIS_MODULE,
0937 };
0938
0939 static int __init gred_module_init(void)
0940 {
0941     return register_qdisc(&gred_qdisc_ops);
0942 }
0943
0944 static void __exit gred_module_exit(void)
0945 {
0946     unregister_qdisc(&gred_qdisc_ops);
0947 }
0948
0949 module_init(gred_module_init)
0950 module_exit(gred_module_exit)
0951
0952 MODULE_LICENSE("GPL");