Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
0004  *
0005  * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0006  */
0007 
0008 #include <linux/module.h>
0009 #include <linux/slab.h>
0010 #include <linux/types.h>
0011 #include <linux/kernel.h>
0012 #include <linux/string.h>
0013 #include <linux/errno.h>
0014 #include <linux/skbuff.h>
0015 #include <net/netlink.h>
0016 #include <net/pkt_sched.h>
0017 #include <net/pkt_cls.h>
0018 
0019 
0020 /*  Class-Based Queueing (CBQ) algorithm.
0021     =======================================
0022 
0023     Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
0024          Management Models for Packet Networks",
0025          IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
0026 
0027          [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
0028 
0029          [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
0030          Parameters", 1996
0031 
0032          [4] Sally Floyd and Michael Speer, "Experimental Results
0033          for Class-Based Queueing", 1998, not published.
0034 
0035     -----------------------------------------------------------------------
0036 
0037     Algorithm skeleton was taken from NS simulator cbq.cc.
0038     If someone wants to check this code against the LBL version,
0039     he should take into account that ONLY the skeleton was borrowed,
0040     the implementation is different. Particularly:
0041 
0042     --- The WRR algorithm is different. Our version looks more
0043     reasonable (I hope) and works when quanta are allowed to be
0044     less than MTU, which is always the case when real time classes
0045     have small rates. Note, that the statement of [3] is
0046     incomplete, delay may actually be estimated even if class
0047     per-round allotment is less than MTU. Namely, if per-round
0048     allotment is W*r_i, and r_1+...+r_k = r < 1
0049 
0050     delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
0051 
0052     In the worst case we have IntServ estimate with D = W*r+k*MTU
0053     and C = MTU*r. The proof (if correct at all) is trivial.
0054 
0055 
0056     --- It seems that cbq-2.0 is not very accurate. At least, I cannot
0057     interpret some places, which look like wrong translations
0058     from NS. Anyone is advised to find these differences
0059     and explain to me, why I am wrong 8).
0060 
0061     --- Linux has no EOI event, so that we cannot estimate true class
0062     idle time. Workaround is to consider the next dequeue event
0063     as sign that previous packet is finished. This is wrong because of
0064     internal device queueing, but on a permanently loaded link it is true.
0065     Moreover, combined with clock integrator, this scheme looks
0066     very close to an ideal solution.  */
0067 
0068 struct cbq_sched_data;
0069 
0070 
0071 struct cbq_class {
0072     struct Qdisc_class_common common;
0073     struct cbq_class    *next_alive;    /* next class with backlog in this priority band */
0074 
0075 /* Parameters */
0076     unsigned char       priority;   /* class priority */
0077     unsigned char       priority2;  /* priority to be used after overlimit */
0078     unsigned char       ewma_log;   /* time constant for idle time calculation */
0079 
0080     u32         defmap;
0081 
0082     /* Link-sharing scheduler parameters */
0083     long            maxidle;    /* Class parameters: see below. */
0084     long            offtime;
0085     long            minidle;
0086     u32         avpkt;
0087     struct qdisc_rate_table *R_tab;
0088 
0089     /* General scheduler (WRR) parameters */
0090     long            allot;
0091     long            quantum;    /* Allotment per WRR round */
0092     long            weight;     /* Relative allotment: see below */
0093 
0094     struct Qdisc        *qdisc;     /* Ptr to CBQ discipline */
0095     struct cbq_class    *split;     /* Ptr to split node */
0096     struct cbq_class    *share;     /* Ptr to LS parent in the class tree */
0097     struct cbq_class    *tparent;   /* Ptr to tree parent in the class tree */
0098     struct cbq_class    *borrow;    /* NULL if class is bandwidth limited;
0099                            parent otherwise */
0100     struct cbq_class    *sibling;   /* Sibling chain */
0101     struct cbq_class    *children;  /* Pointer to children chain */
0102 
0103     struct Qdisc        *q;     /* Elementary queueing discipline */
0104 
0105 
0106 /* Variables */
0107     unsigned char       cpriority;  /* Effective priority */
0108     unsigned char       delayed;
0109     unsigned char       level;      /* level of the class in hierarchy:
0110                            0 for leaf classes, and maximal
0111                            level of children + 1 for nodes.
0112                          */
0113 
0114     psched_time_t       last;       /* Last end of service */
0115     psched_time_t       undertime;
0116     long            avgidle;
0117     long            deficit;    /* Saved deficit for WRR */
0118     psched_time_t       penalized;
0119     struct gnet_stats_basic_sync bstats;
0120     struct gnet_stats_queue qstats;
0121     struct net_rate_estimator __rcu *rate_est;
0122     struct tc_cbq_xstats    xstats;
0123 
0124     struct tcf_proto __rcu  *filter_list;
0125     struct tcf_block    *block;
0126 
0127     int         filters;
0128 
0129     struct cbq_class    *defaults[TC_PRIO_MAX + 1];
0130 };
0131 
0132 struct cbq_sched_data {
0133     struct Qdisc_class_hash clhash;         /* Hash table of all classes */
0134     int         nclasses[TC_CBQ_MAXPRIO + 1];
0135     unsigned int        quanta[TC_CBQ_MAXPRIO + 1];
0136 
0137     struct cbq_class    link;
0138 
0139     unsigned int        activemask;
0140     struct cbq_class    *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
0141                                    with backlog */
0142 
0143 #ifdef CONFIG_NET_CLS_ACT
0144     struct cbq_class    *rx_class;
0145 #endif
0146     struct cbq_class    *tx_class;
0147     struct cbq_class    *tx_borrowed;
0148     int         tx_len;
0149     psched_time_t       now;        /* Cached timestamp */
0150     unsigned int        pmask;
0151 
0152     struct qdisc_watchdog   watchdog;   /* Watchdog timer,
0153                            started when CBQ has
0154                            backlog, but cannot
0155                            transmit just now */
0156     psched_tdiff_t      wd_expires;
0157     int         toplevel;
0158     u32         hgenerator;
0159 };
0160 
0161 
0162 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
0163 
0164 static inline struct cbq_class *
0165 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
0166 {
0167     struct Qdisc_class_common *clc;
0168 
0169     clc = qdisc_class_find(&q->clhash, classid);
0170     if (clc == NULL)
0171         return NULL;
0172     return container_of(clc, struct cbq_class, common);
0173 }
0174 
0175 #ifdef CONFIG_NET_CLS_ACT
0176 
0177 static struct cbq_class *
0178 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
0179 {
0180     struct cbq_class *cl;
0181 
0182     for (cl = this->tparent; cl; cl = cl->tparent) {
0183         struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
0184 
0185         if (new != NULL && new != this)
0186             return new;
0187     }
0188     return NULL;
0189 }
0190 
0191 #endif
0192 
0193 /* Classify packet. The procedure is pretty complicated, but
0194  * it allows us to combine link sharing and priority scheduling
0195  * transparently.
0196  *
0197  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
0198  * so that it resolves to split nodes. Then packets are classified
0199  * by logical priority, or a more specific classifier may be attached
0200  * to the split node.
0201  */
0202 
0203 static struct cbq_class *
0204 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
0205 {
0206     struct cbq_sched_data *q = qdisc_priv(sch);
0207     struct cbq_class *head = &q->link;
0208     struct cbq_class **defmap;
0209     struct cbq_class *cl = NULL;
0210     u32 prio = skb->priority;
0211     struct tcf_proto *fl;
0212     struct tcf_result res;
0213 
0214     /*
0215      *  Step 1. If skb->priority points to one of our classes, use it.
0216      */
0217     if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
0218         (cl = cbq_class_lookup(q, prio)) != NULL)
0219         return cl;
0220 
0221     *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
0222     for (;;) {
0223         int result = 0;
0224         defmap = head->defaults;
0225 
0226         fl = rcu_dereference_bh(head->filter_list);
0227         /*
0228          * Step 2+n. Apply classifier.
0229          */
0230         result = tcf_classify(skb, NULL, fl, &res, true);
0231         if (!fl || result < 0)
0232             goto fallback;
0233 
0234         cl = (void *)res.class;
0235         if (!cl) {
0236             if (TC_H_MAJ(res.classid))
0237                 cl = cbq_class_lookup(q, res.classid);
0238             else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
0239                 cl = defmap[TC_PRIO_BESTEFFORT];
0240 
0241             if (cl == NULL)
0242                 goto fallback;
0243         }
0244         if (cl->level >= head->level)
0245             goto fallback;
0246 #ifdef CONFIG_NET_CLS_ACT
0247         switch (result) {
0248         case TC_ACT_QUEUED:
0249         case TC_ACT_STOLEN:
0250         case TC_ACT_TRAP:
0251             *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
0252             fallthrough;
0253         case TC_ACT_SHOT:
0254             return NULL;
0255         case TC_ACT_RECLASSIFY:
0256             return cbq_reclassify(skb, cl);
0257         }
0258 #endif
0259         if (cl->level == 0)
0260             return cl;
0261 
0262         /*
0263          * Step 3+n. If classifier selected a link sharing class,
0264          *     apply agency specific classifier.
0265          *     Repeat this procedure until we hit a leaf node.
0266          */
0267         head = cl;
0268     }
0269 
0270 fallback:
0271     cl = head;
0272 
0273     /*
0274      * Step 4. No success...
0275      */
0276     if (TC_H_MAJ(prio) == 0 &&
0277         !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
0278         !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
0279         return head;
0280 
0281     return cl;
0282 }
0283 
0284 /*
0285  * A packet has just been enqueued on the empty class.
0286  * cbq_activate_class adds it to the tail of active class list
0287  * of its priority band.
0288  */
0289 
0290 static inline void cbq_activate_class(struct cbq_class *cl)
0291 {
0292     struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
0293     int prio = cl->cpriority;
0294     struct cbq_class *cl_tail;
0295 
0296     cl_tail = q->active[prio];
0297     q->active[prio] = cl;
0298 
0299     if (cl_tail != NULL) {
0300         cl->next_alive = cl_tail->next_alive;
0301         cl_tail->next_alive = cl;
0302     } else {
0303         cl->next_alive = cl;
0304         q->activemask |= (1<<prio);
0305     }
0306 }
0307 
0308 /*
0309  * Unlink class from active chain.
0310  * Note that this same procedure is done directly in cbq_dequeue*
0311  * during round-robin procedure.
0312  */
0313 
0314 static void cbq_deactivate_class(struct cbq_class *this)
0315 {
0316     struct cbq_sched_data *q = qdisc_priv(this->qdisc);
0317     int prio = this->cpriority;
0318     struct cbq_class *cl;
0319     struct cbq_class *cl_prev = q->active[prio];
0320 
0321     do {
0322         cl = cl_prev->next_alive;
0323         if (cl == this) {
0324             cl_prev->next_alive = cl->next_alive;
0325             cl->next_alive = NULL;
0326 
0327             if (cl == q->active[prio]) {
0328                 q->active[prio] = cl_prev;
0329                 if (cl == q->active[prio]) {
0330                     q->active[prio] = NULL;
0331                     q->activemask &= ~(1<<prio);
0332                     return;
0333                 }
0334             }
0335             return;
0336         }
0337     } while ((cl_prev = cl) != q->active[prio]);
0338 }
0339 
0340 static void
0341 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
0342 {
0343     int toplevel = q->toplevel;
0344 
0345     if (toplevel > cl->level) {
0346         psched_time_t now = psched_get_time();
0347 
0348         do {
0349             if (cl->undertime < now) {
0350                 q->toplevel = cl->level;
0351                 return;
0352             }
0353         } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
0354     }
0355 }
0356 
0357 static int
0358 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0359         struct sk_buff **to_free)
0360 {
0361     struct cbq_sched_data *q = qdisc_priv(sch);
0362     int ret;
0363     struct cbq_class *cl = cbq_classify(skb, sch, &ret);
0364 
0365 #ifdef CONFIG_NET_CLS_ACT
0366     q->rx_class = cl;
0367 #endif
0368     if (cl == NULL) {
0369         if (ret & __NET_XMIT_BYPASS)
0370             qdisc_qstats_drop(sch);
0371         __qdisc_drop(skb, to_free);
0372         return ret;
0373     }
0374 
0375     ret = qdisc_enqueue(skb, cl->q, to_free);
0376     if (ret == NET_XMIT_SUCCESS) {
0377         sch->q.qlen++;
0378         cbq_mark_toplevel(q, cl);
0379         if (!cl->next_alive)
0380             cbq_activate_class(cl);
0381         return ret;
0382     }
0383 
0384     if (net_xmit_drop_count(ret)) {
0385         qdisc_qstats_drop(sch);
0386         cbq_mark_toplevel(q, cl);
0387         cl->qstats.drops++;
0388     }
0389     return ret;
0390 }
0391 
0392 /* Overlimit action: penalize leaf class by adding offtime */
0393 static void cbq_overlimit(struct cbq_class *cl)
0394 {
0395     struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
0396     psched_tdiff_t delay = cl->undertime - q->now;
0397 
0398     if (!cl->delayed) {
0399         delay += cl->offtime;
0400 
0401         /*
0402          * Class goes to sleep, so that it will have no
0403          * chance to work avgidle. Let's forgive it 8)
0404          *
0405          * BTW cbq-2.0 has a crap in this
0406          * place, apparently they forgot to shift it by cl->ewma_log.
0407          */
0408         if (cl->avgidle < 0)
0409             delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
0410         if (cl->avgidle < cl->minidle)
0411             cl->avgidle = cl->minidle;
0412         if (delay <= 0)
0413             delay = 1;
0414         cl->undertime = q->now + delay;
0415 
0416         cl->xstats.overactions++;
0417         cl->delayed = 1;
0418     }
0419     if (q->wd_expires == 0 || q->wd_expires > delay)
0420         q->wd_expires = delay;
0421 
0422     /* Dirty work! We must schedule wakeups based on
0423      * real available rate, rather than leaf rate,
0424      * which may be tiny (even zero).
0425      */
0426     if (q->toplevel == TC_CBQ_MAXLEVEL) {
0427         struct cbq_class *b;
0428         psched_tdiff_t base_delay = q->wd_expires;
0429 
0430         for (b = cl->borrow; b; b = b->borrow) {
0431             delay = b->undertime - q->now;
0432             if (delay < base_delay) {
0433                 if (delay <= 0)
0434                     delay = 1;
0435                 base_delay = delay;
0436             }
0437         }
0438 
0439         q->wd_expires = base_delay;
0440     }
0441 }
0442 
0443 /*
0444  * It is mission critical procedure.
0445  *
0446  * We "regenerate" toplevel cutoff, if transmitting class
0447  * has backlog and it is not regulated. It is not part of
0448  * original CBQ description, but looks more reasonable.
0449  * Probably, it is wrong. This question needs further investigation.
0450  */
0451 
0452 static inline void
0453 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
0454             struct cbq_class *borrowed)
0455 {
0456     if (cl && q->toplevel >= borrowed->level) {
0457         if (cl->q->q.qlen > 1) {
0458             do {
0459                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
0460                     q->toplevel = borrowed->level;
0461                     return;
0462                 }
0463             } while ((borrowed = borrowed->borrow) != NULL);
0464         }
0465 #if 0
0466     /* It is not necessary now. Uncommenting it
0467        will save CPU cycles, but decrease fairness.
0468      */
0469         q->toplevel = TC_CBQ_MAXLEVEL;
0470 #endif
0471     }
0472 }
0473 
0474 static void
0475 cbq_update(struct cbq_sched_data *q)
0476 {
0477     struct cbq_class *this = q->tx_class;
0478     struct cbq_class *cl = this;
0479     int len = q->tx_len;
0480     psched_time_t now;
0481 
0482     q->tx_class = NULL;
0483     /* Time integrator. We calculate EOS time
0484      * by adding expected packet transmission time.
0485      */
0486     now = q->now + L2T(&q->link, len);
0487 
0488     for ( ; cl; cl = cl->share) {
0489         long avgidle = cl->avgidle;
0490         long idle;
0491 
0492         _bstats_update(&cl->bstats, len, 1);
0493 
0494         /*
0495          * (now - last) is total time between packet right edges.
0496          * (last_pktlen/rate) is "virtual" busy time, so that
0497          *
0498          *  idle = (now - last) - last_pktlen/rate
0499          */
0500 
0501         idle = now - cl->last;
0502         if ((unsigned long)idle > 128*1024*1024) {
0503             avgidle = cl->maxidle;
0504         } else {
0505             idle -= L2T(cl, len);
0506 
0507         /* true_avgidle := (1-W)*true_avgidle + W*idle,
0508          * where W=2^{-ewma_log}. But cl->avgidle is scaled:
0509          * cl->avgidle == true_avgidle/W,
0510          * hence:
0511          */
0512             avgidle += idle - (avgidle>>cl->ewma_log);
0513         }
0514 
0515         if (avgidle <= 0) {
0516             /* Overlimit or at-limit */
0517 
0518             if (avgidle < cl->minidle)
0519                 avgidle = cl->minidle;
0520 
0521             cl->avgidle = avgidle;
0522 
0523             /* Calculate expected time, when this class
0524              * will be allowed to send.
0525              * It will occur, when:
0526              * (1-W)*true_avgidle + W*delay = 0, i.e.
0527              * idle = (1/W - 1)*(-true_avgidle)
0528              * or
0529              * idle = (1 - W)*(-cl->avgidle);
0530              */
0531             idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
0532 
0533             /*
0534              * That is not all.
0535              * To maintain the rate allocated to the class,
0536              * we add to undertime virtual clock,
0537              * necessary to complete transmitted packet.
0538              * (len/phys_bandwidth has been already passed
0539              * to the moment of cbq_update)
0540              */
0541 
0542             idle -= L2T(&q->link, len);
0543             idle += L2T(cl, len);
0544 
0545             cl->undertime = now + idle;
0546         } else {
0547             /* Underlimit */
0548 
0549             cl->undertime = PSCHED_PASTPERFECT;
0550             if (avgidle > cl->maxidle)
0551                 cl->avgidle = cl->maxidle;
0552             else
0553                 cl->avgidle = avgidle;
0554         }
0555         if ((s64)(now - cl->last) > 0)
0556             cl->last = now;
0557     }
0558 
0559     cbq_update_toplevel(q, this, q->tx_borrowed);
0560 }
0561 
0562 static inline struct cbq_class *
0563 cbq_under_limit(struct cbq_class *cl)
0564 {
0565     struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
0566     struct cbq_class *this_cl = cl;
0567 
0568     if (cl->tparent == NULL)
0569         return cl;
0570 
0571     if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
0572         cl->delayed = 0;
0573         return cl;
0574     }
0575 
0576     do {
0577         /* It is very suspicious place. Now overlimit
0578          * action is generated for not bounded classes
0579          * only if link is completely congested.
0580          * Though it is in agree with ancestor-only paradigm,
0581          * it looks very stupid. Particularly,
0582          * it means that this chunk of code will either
0583          * never be called or result in strong amplification
0584          * of burstiness. Dangerous, silly, and, however,
0585          * no another solution exists.
0586          */
0587         cl = cl->borrow;
0588         if (!cl) {
0589             this_cl->qstats.overlimits++;
0590             cbq_overlimit(this_cl);
0591             return NULL;
0592         }
0593         if (cl->level > q->toplevel)
0594             return NULL;
0595     } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
0596 
0597     cl->delayed = 0;
0598     return cl;
0599 }
0600 
0601 static inline struct sk_buff *
0602 cbq_dequeue_prio(struct Qdisc *sch, int prio)
0603 {
0604     struct cbq_sched_data *q = qdisc_priv(sch);
0605     struct cbq_class *cl_tail, *cl_prev, *cl;
0606     struct sk_buff *skb;
0607     int deficit;
0608 
0609     cl_tail = cl_prev = q->active[prio];
0610     cl = cl_prev->next_alive;
0611 
0612     do {
0613         deficit = 0;
0614 
0615         /* Start round */
0616         do {
0617             struct cbq_class *borrow = cl;
0618 
0619             if (cl->q->q.qlen &&
0620                 (borrow = cbq_under_limit(cl)) == NULL)
0621                 goto skip_class;
0622 
0623             if (cl->deficit <= 0) {
0624                 /* Class exhausted its allotment per
0625                  * this round. Switch to the next one.
0626                  */
0627                 deficit = 1;
0628                 cl->deficit += cl->quantum;
0629                 goto next_class;
0630             }
0631 
0632             skb = cl->q->dequeue(cl->q);
0633 
0634             /* Class did not give us any skb :-(
0635              * It could occur even if cl->q->q.qlen != 0
0636              * f.e. if cl->q == "tbf"
0637              */
0638             if (skb == NULL)
0639                 goto skip_class;
0640 
0641             cl->deficit -= qdisc_pkt_len(skb);
0642             q->tx_class = cl;
0643             q->tx_borrowed = borrow;
0644             if (borrow != cl) {
0645 #ifndef CBQ_XSTATS_BORROWS_BYTES
0646                 borrow->xstats.borrows++;
0647                 cl->xstats.borrows++;
0648 #else
0649                 borrow->xstats.borrows += qdisc_pkt_len(skb);
0650                 cl->xstats.borrows += qdisc_pkt_len(skb);
0651 #endif
0652             }
0653             q->tx_len = qdisc_pkt_len(skb);
0654 
0655             if (cl->deficit <= 0) {
0656                 q->active[prio] = cl;
0657                 cl = cl->next_alive;
0658                 cl->deficit += cl->quantum;
0659             }
0660             return skb;
0661 
0662 skip_class:
0663             if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
0664                 /* Class is empty or penalized.
0665                  * Unlink it from active chain.
0666                  */
0667                 cl_prev->next_alive = cl->next_alive;
0668                 cl->next_alive = NULL;
0669 
0670                 /* Did cl_tail point to it? */
0671                 if (cl == cl_tail) {
0672                     /* Repair it! */
0673                     cl_tail = cl_prev;
0674 
0675                     /* Was it the last class in this band? */
0676                     if (cl == cl_tail) {
0677                         /* Kill the band! */
0678                         q->active[prio] = NULL;
0679                         q->activemask &= ~(1<<prio);
0680                         if (cl->q->q.qlen)
0681                             cbq_activate_class(cl);
0682                         return NULL;
0683                     }
0684 
0685                     q->active[prio] = cl_tail;
0686                 }
0687                 if (cl->q->q.qlen)
0688                     cbq_activate_class(cl);
0689 
0690                 cl = cl_prev;
0691             }
0692 
0693 next_class:
0694             cl_prev = cl;
0695             cl = cl->next_alive;
0696         } while (cl_prev != cl_tail);
0697     } while (deficit);
0698 
0699     q->active[prio] = cl_prev;
0700 
0701     return NULL;
0702 }
0703 
0704 static inline struct sk_buff *
0705 cbq_dequeue_1(struct Qdisc *sch)
0706 {
0707     struct cbq_sched_data *q = qdisc_priv(sch);
0708     struct sk_buff *skb;
0709     unsigned int activemask;
0710 
0711     activemask = q->activemask & 0xFF;
0712     while (activemask) {
0713         int prio = ffz(~activemask);
0714         activemask &= ~(1<<prio);
0715         skb = cbq_dequeue_prio(sch, prio);
0716         if (skb)
0717             return skb;
0718     }
0719     return NULL;
0720 }
0721 
0722 static struct sk_buff *
0723 cbq_dequeue(struct Qdisc *sch)
0724 {
0725     struct sk_buff *skb;
0726     struct cbq_sched_data *q = qdisc_priv(sch);
0727     psched_time_t now;
0728 
0729     now = psched_get_time();
0730 
0731     if (q->tx_class)
0732         cbq_update(q);
0733 
0734     q->now = now;
0735 
0736     for (;;) {
0737         q->wd_expires = 0;
0738 
0739         skb = cbq_dequeue_1(sch);
0740         if (skb) {
0741             qdisc_bstats_update(sch, skb);
0742             sch->q.qlen--;
0743             return skb;
0744         }
0745 
0746         /* All the classes are overlimit.
0747          *
0748          * It is possible, if:
0749          *
0750          * 1. Scheduler is empty.
0751          * 2. Toplevel cutoff inhibited borrowing.
0752          * 3. Root class is overlimit.
0753          *
0754          * Reset 2d and 3d conditions and retry.
0755          *
0756          * Note, that NS and cbq-2.0 are buggy, peeking
0757          * an arbitrary class is appropriate for ancestor-only
0758          * sharing, but not for toplevel algorithm.
0759          *
0760          * Our version is better, but slower, because it requires
0761          * two passes, but it is unavoidable with top-level sharing.
0762          */
0763 
0764         if (q->toplevel == TC_CBQ_MAXLEVEL &&
0765             q->link.undertime == PSCHED_PASTPERFECT)
0766             break;
0767 
0768         q->toplevel = TC_CBQ_MAXLEVEL;
0769         q->link.undertime = PSCHED_PASTPERFECT;
0770     }
0771 
0772     /* No packets in scheduler or nobody wants to give them to us :-(
0773      * Sigh... start watchdog timer in the last case.
0774      */
0775 
0776     if (sch->q.qlen) {
0777         qdisc_qstats_overlimit(sch);
0778         if (q->wd_expires)
0779             qdisc_watchdog_schedule(&q->watchdog,
0780                         now + q->wd_expires);
0781     }
0782     return NULL;
0783 }
0784 
0785 /* CBQ class maintenance routines */
0786 
0787 static void cbq_adjust_levels(struct cbq_class *this)
0788 {
0789     if (this == NULL)
0790         return;
0791 
0792     do {
0793         int level = 0;
0794         struct cbq_class *cl;
0795 
0796         cl = this->children;
0797         if (cl) {
0798             do {
0799                 if (cl->level > level)
0800                     level = cl->level;
0801             } while ((cl = cl->sibling) != this->children);
0802         }
0803         this->level = level + 1;
0804     } while ((this = this->tparent) != NULL);
0805 }
0806 
0807 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
0808 {
0809     struct cbq_class *cl;
0810     unsigned int h;
0811 
0812     if (q->quanta[prio] == 0)
0813         return;
0814 
0815     for (h = 0; h < q->clhash.hashsize; h++) {
0816         hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
0817             /* BUGGGG... Beware! This expression suffer of
0818              * arithmetic overflows!
0819              */
0820             if (cl->priority == prio) {
0821                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
0822                     q->quanta[prio];
0823             }
0824             if (cl->quantum <= 0 ||
0825                 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
0826                 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
0827                     cl->common.classid, cl->quantum);
0828                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
0829             }
0830         }
0831     }
0832 }
0833 
0834 static void cbq_sync_defmap(struct cbq_class *cl)
0835 {
0836     struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
0837     struct cbq_class *split = cl->split;
0838     unsigned int h;
0839     int i;
0840 
0841     if (split == NULL)
0842         return;
0843 
0844     for (i = 0; i <= TC_PRIO_MAX; i++) {
0845         if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
0846             split->defaults[i] = NULL;
0847     }
0848 
0849     for (i = 0; i <= TC_PRIO_MAX; i++) {
0850         int level = split->level;
0851 
0852         if (split->defaults[i])
0853             continue;
0854 
0855         for (h = 0; h < q->clhash.hashsize; h++) {
0856             struct cbq_class *c;
0857 
0858             hlist_for_each_entry(c, &q->clhash.hash[h],
0859                          common.hnode) {
0860                 if (c->split == split && c->level < level &&
0861                     c->defmap & (1<<i)) {
0862                     split->defaults[i] = c;
0863                     level = c->level;
0864                 }
0865             }
0866         }
0867     }
0868 }
0869 
0870 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
0871 {
0872     struct cbq_class *split = NULL;
0873 
0874     if (splitid == 0) {
0875         split = cl->split;
0876         if (!split)
0877             return;
0878         splitid = split->common.classid;
0879     }
0880 
0881     if (split == NULL || split->common.classid != splitid) {
0882         for (split = cl->tparent; split; split = split->tparent)
0883             if (split->common.classid == splitid)
0884                 break;
0885     }
0886 
0887     if (split == NULL)
0888         return;
0889 
0890     if (cl->split != split) {
0891         cl->defmap = 0;
0892         cbq_sync_defmap(cl);
0893         cl->split = split;
0894         cl->defmap = def & mask;
0895     } else
0896         cl->defmap = (cl->defmap & ~mask) | (def & mask);
0897 
0898     cbq_sync_defmap(cl);
0899 }
0900 
0901 static void cbq_unlink_class(struct cbq_class *this)
0902 {
0903     struct cbq_class *cl, **clp;
0904     struct cbq_sched_data *q = qdisc_priv(this->qdisc);
0905 
0906     qdisc_class_hash_remove(&q->clhash, &this->common);
0907 
0908     if (this->tparent) {
0909         clp = &this->sibling;
0910         cl = *clp;
0911         do {
0912             if (cl == this) {
0913                 *clp = cl->sibling;
0914                 break;
0915             }
0916             clp = &cl->sibling;
0917         } while ((cl = *clp) != this->sibling);
0918 
0919         if (this->tparent->children == this) {
0920             this->tparent->children = this->sibling;
0921             if (this->sibling == this)
0922                 this->tparent->children = NULL;
0923         }
0924     } else {
0925         WARN_ON(this->sibling != this);
0926     }
0927 }
0928 
0929 static void cbq_link_class(struct cbq_class *this)
0930 {
0931     struct cbq_sched_data *q = qdisc_priv(this->qdisc);
0932     struct cbq_class *parent = this->tparent;
0933 
0934     this->sibling = this;
0935     qdisc_class_hash_insert(&q->clhash, &this->common);
0936 
0937     if (parent == NULL)
0938         return;
0939 
0940     if (parent->children == NULL) {
0941         parent->children = this;
0942     } else {
0943         this->sibling = parent->children->sibling;
0944         parent->children->sibling = this;
0945     }
0946 }
0947 
0948 static void
0949 cbq_reset(struct Qdisc *sch)
0950 {
0951     struct cbq_sched_data *q = qdisc_priv(sch);
0952     struct cbq_class *cl;
0953     int prio;
0954     unsigned int h;
0955 
0956     q->activemask = 0;
0957     q->pmask = 0;
0958     q->tx_class = NULL;
0959     q->tx_borrowed = NULL;
0960     qdisc_watchdog_cancel(&q->watchdog);
0961     q->toplevel = TC_CBQ_MAXLEVEL;
0962     q->now = psched_get_time();
0963 
0964     for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
0965         q->active[prio] = NULL;
0966 
0967     for (h = 0; h < q->clhash.hashsize; h++) {
0968         hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
0969             qdisc_reset(cl->q);
0970 
0971             cl->next_alive = NULL;
0972             cl->undertime = PSCHED_PASTPERFECT;
0973             cl->avgidle = cl->maxidle;
0974             cl->deficit = cl->quantum;
0975             cl->cpriority = cl->priority;
0976         }
0977     }
0978     sch->q.qlen = 0;
0979 }
0980 
0981 
0982 static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
0983 {
0984     if (lss->change & TCF_CBQ_LSS_FLAGS) {
0985         cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
0986         cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
0987     }
0988     if (lss->change & TCF_CBQ_LSS_EWMA)
0989         cl->ewma_log = lss->ewma_log;
0990     if (lss->change & TCF_CBQ_LSS_AVPKT)
0991         cl->avpkt = lss->avpkt;
0992     if (lss->change & TCF_CBQ_LSS_MINIDLE)
0993         cl->minidle = -(long)lss->minidle;
0994     if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
0995         cl->maxidle = lss->maxidle;
0996         cl->avgidle = lss->maxidle;
0997     }
0998     if (lss->change & TCF_CBQ_LSS_OFFTIME)
0999         cl->offtime = lss->offtime;
1000 }
1001 
1002 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1003 {
1004     q->nclasses[cl->priority]--;
1005     q->quanta[cl->priority] -= cl->weight;
1006     cbq_normalize_quanta(q, cl->priority);
1007 }
1008 
1009 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1010 {
1011     q->nclasses[cl->priority]++;
1012     q->quanta[cl->priority] += cl->weight;
1013     cbq_normalize_quanta(q, cl->priority);
1014 }
1015 
1016 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1017 {
1018     struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1019 
1020     if (wrr->allot)
1021         cl->allot = wrr->allot;
1022     if (wrr->weight)
1023         cl->weight = wrr->weight;
1024     if (wrr->priority) {
1025         cl->priority = wrr->priority - 1;
1026         cl->cpriority = cl->priority;
1027         if (cl->priority >= cl->priority2)
1028             cl->priority2 = TC_CBQ_MAXPRIO - 1;
1029     }
1030 
1031     cbq_addprio(q, cl);
1032     return 0;
1033 }
1034 
1035 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1036 {
1037     cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1038     return 0;
1039 }
1040 
1041 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1042     [TCA_CBQ_LSSOPT]    = { .len = sizeof(struct tc_cbq_lssopt) },
1043     [TCA_CBQ_WRROPT]    = { .len = sizeof(struct tc_cbq_wrropt) },
1044     [TCA_CBQ_FOPT]      = { .len = sizeof(struct tc_cbq_fopt) },
1045     [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1046     [TCA_CBQ_RATE]      = { .len = sizeof(struct tc_ratespec) },
1047     [TCA_CBQ_RTAB]      = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1048     [TCA_CBQ_POLICE]    = { .len = sizeof(struct tc_cbq_police) },
1049 };
1050 
1051 static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
1052              struct nlattr *opt,
1053              struct netlink_ext_ack *extack)
1054 {
1055     int err;
1056 
1057     if (!opt) {
1058         NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
1059         return -EINVAL;
1060     }
1061 
1062     err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
1063                       cbq_policy, extack);
1064     if (err < 0)
1065         return err;
1066 
1067     if (tb[TCA_CBQ_WRROPT]) {
1068         const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
1069 
1070         if (wrr->priority > TC_CBQ_MAXPRIO) {
1071             NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
1072             err = -EINVAL;
1073         }
1074     }
1075     return err;
1076 }
1077 
1078 static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
1079             struct netlink_ext_ack *extack)
1080 {
1081     struct cbq_sched_data *q = qdisc_priv(sch);
1082     struct nlattr *tb[TCA_CBQ_MAX + 1];
1083     struct tc_ratespec *r;
1084     int err;
1085 
1086     qdisc_watchdog_init(&q->watchdog, sch);
1087 
1088     err = cbq_opt_parse(tb, opt, extack);
1089     if (err < 0)
1090         return err;
1091 
1092     if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
1093         NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
1094         return -EINVAL;
1095     }
1096 
1097     r = nla_data(tb[TCA_CBQ_RATE]);
1098 
1099     q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
1100     if (!q->link.R_tab)
1101         return -EINVAL;
1102 
1103     err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
1104     if (err)
1105         goto put_rtab;
1106 
1107     err = qdisc_class_hash_init(&q->clhash);
1108     if (err < 0)
1109         goto put_block;
1110 
1111     q->link.sibling = &q->link;
1112     q->link.common.classid = sch->handle;
1113     q->link.qdisc = sch;
1114     q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1115                       sch->handle, NULL);
1116     if (!q->link.q)
1117         q->link.q = &noop_qdisc;
1118     else
1119         qdisc_hash_add(q->link.q, true);
1120 
1121     q->link.priority = TC_CBQ_MAXPRIO - 1;
1122     q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1123     q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1124     q->link.allot = psched_mtu(qdisc_dev(sch));
1125     q->link.quantum = q->link.allot;
1126     q->link.weight = q->link.R_tab->rate.rate;
1127 
1128     q->link.ewma_log = TC_CBQ_DEF_EWMA;
1129     q->link.avpkt = q->link.allot/2;
1130     q->link.minidle = -0x7FFFFFFF;
1131 
1132     q->toplevel = TC_CBQ_MAXLEVEL;
1133     q->now = psched_get_time();
1134 
1135     cbq_link_class(&q->link);
1136 
1137     if (tb[TCA_CBQ_LSSOPT])
1138         cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1139 
1140     cbq_addprio(q, &q->link);
1141     return 0;
1142 
1143 put_block:
1144     tcf_block_put(q->link.block);
1145 
1146 put_rtab:
1147     qdisc_put_rtab(q->link.R_tab);
1148     return err;
1149 }
1150 
1151 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1152 {
1153     unsigned char *b = skb_tail_pointer(skb);
1154 
1155     if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1156         goto nla_put_failure;
1157     return skb->len;
1158 
1159 nla_put_failure:
1160     nlmsg_trim(skb, b);
1161     return -1;
1162 }
1163 
1164 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1165 {
1166     unsigned char *b = skb_tail_pointer(skb);
1167     struct tc_cbq_lssopt opt;
1168 
1169     opt.flags = 0;
1170     if (cl->borrow == NULL)
1171         opt.flags |= TCF_CBQ_LSS_BOUNDED;
1172     if (cl->share == NULL)
1173         opt.flags |= TCF_CBQ_LSS_ISOLATED;
1174     opt.ewma_log = cl->ewma_log;
1175     opt.level = cl->level;
1176     opt.avpkt = cl->avpkt;
1177     opt.maxidle = cl->maxidle;
1178     opt.minidle = (u32)(-cl->minidle);
1179     opt.offtime = cl->offtime;
1180     opt.change = ~0;
1181     if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1182         goto nla_put_failure;
1183     return skb->len;
1184 
1185 nla_put_failure:
1186     nlmsg_trim(skb, b);
1187     return -1;
1188 }
1189 
1190 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1191 {
1192     unsigned char *b = skb_tail_pointer(skb);
1193     struct tc_cbq_wrropt opt;
1194 
1195     memset(&opt, 0, sizeof(opt));
1196     opt.flags = 0;
1197     opt.allot = cl->allot;
1198     opt.priority = cl->priority + 1;
1199     opt.cpriority = cl->cpriority + 1;
1200     opt.weight = cl->weight;
1201     if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1202         goto nla_put_failure;
1203     return skb->len;
1204 
1205 nla_put_failure:
1206     nlmsg_trim(skb, b);
1207     return -1;
1208 }
1209 
1210 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1211 {
1212     unsigned char *b = skb_tail_pointer(skb);
1213     struct tc_cbq_fopt opt;
1214 
1215     if (cl->split || cl->defmap) {
1216         opt.split = cl->split ? cl->split->common.classid : 0;
1217         opt.defmap = cl->defmap;
1218         opt.defchange = ~0;
1219         if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1220             goto nla_put_failure;
1221     }
1222     return skb->len;
1223 
1224 nla_put_failure:
1225     nlmsg_trim(skb, b);
1226     return -1;
1227 }
1228 
1229 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1230 {
1231     if (cbq_dump_lss(skb, cl) < 0 ||
1232         cbq_dump_rate(skb, cl) < 0 ||
1233         cbq_dump_wrr(skb, cl) < 0 ||
1234         cbq_dump_fopt(skb, cl) < 0)
1235         return -1;
1236     return 0;
1237 }
1238 
1239 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1240 {
1241     struct cbq_sched_data *q = qdisc_priv(sch);
1242     struct nlattr *nest;
1243 
1244     nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1245     if (nest == NULL)
1246         goto nla_put_failure;
1247     if (cbq_dump_attr(skb, &q->link) < 0)
1248         goto nla_put_failure;
1249     return nla_nest_end(skb, nest);
1250 
1251 nla_put_failure:
1252     nla_nest_cancel(skb, nest);
1253     return -1;
1254 }
1255 
1256 static int
1257 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1258 {
1259     struct cbq_sched_data *q = qdisc_priv(sch);
1260 
1261     q->link.xstats.avgidle = q->link.avgidle;
1262     return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1263 }
1264 
1265 static int
1266 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1267            struct sk_buff *skb, struct tcmsg *tcm)
1268 {
1269     struct cbq_class *cl = (struct cbq_class *)arg;
1270     struct nlattr *nest;
1271 
1272     if (cl->tparent)
1273         tcm->tcm_parent = cl->tparent->common.classid;
1274     else
1275         tcm->tcm_parent = TC_H_ROOT;
1276     tcm->tcm_handle = cl->common.classid;
1277     tcm->tcm_info = cl->q->handle;
1278 
1279     nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1280     if (nest == NULL)
1281         goto nla_put_failure;
1282     if (cbq_dump_attr(skb, cl) < 0)
1283         goto nla_put_failure;
1284     return nla_nest_end(skb, nest);
1285 
1286 nla_put_failure:
1287     nla_nest_cancel(skb, nest);
1288     return -1;
1289 }
1290 
1291 static int
1292 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1293     struct gnet_dump *d)
1294 {
1295     struct cbq_sched_data *q = qdisc_priv(sch);
1296     struct cbq_class *cl = (struct cbq_class *)arg;
1297     __u32 qlen;
1298 
1299     cl->xstats.avgidle = cl->avgidle;
1300     cl->xstats.undertime = 0;
1301     qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
1302 
1303     if (cl->undertime != PSCHED_PASTPERFECT)
1304         cl->xstats.undertime = cl->undertime - q->now;
1305 
1306     if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1307         gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1308         gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
1309         return -1;
1310 
1311     return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1312 }
1313 
1314 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1315              struct Qdisc **old, struct netlink_ext_ack *extack)
1316 {
1317     struct cbq_class *cl = (struct cbq_class *)arg;
1318 
1319     if (new == NULL) {
1320         new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1321                     cl->common.classid, extack);
1322         if (new == NULL)
1323             return -ENOBUFS;
1324     }
1325 
1326     *old = qdisc_replace(sch, new, &cl->q);
1327     return 0;
1328 }
1329 
1330 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1331 {
1332     struct cbq_class *cl = (struct cbq_class *)arg;
1333 
1334     return cl->q;
1335 }
1336 
1337 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1338 {
1339     struct cbq_class *cl = (struct cbq_class *)arg;
1340 
1341     cbq_deactivate_class(cl);
1342 }
1343 
1344 static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
1345 {
1346     struct cbq_sched_data *q = qdisc_priv(sch);
1347 
1348     return (unsigned long)cbq_class_lookup(q, classid);
1349 }
1350 
1351 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1352 {
1353     struct cbq_sched_data *q = qdisc_priv(sch);
1354 
1355     WARN_ON(cl->filters);
1356 
1357     tcf_block_put(cl->block);
1358     qdisc_put(cl->q);
1359     qdisc_put_rtab(cl->R_tab);
1360     gen_kill_estimator(&cl->rate_est);
1361     if (cl != &q->link)
1362         kfree(cl);
1363 }
1364 
1365 static void cbq_destroy(struct Qdisc *sch)
1366 {
1367     struct cbq_sched_data *q = qdisc_priv(sch);
1368     struct hlist_node *next;
1369     struct cbq_class *cl;
1370     unsigned int h;
1371 
1372 #ifdef CONFIG_NET_CLS_ACT
1373     q->rx_class = NULL;
1374 #endif
1375     /*
1376      * Filters must be destroyed first because we don't destroy the
1377      * classes from root to leafs which means that filters can still
1378      * be bound to classes which have been destroyed already. --TGR '04
1379      */
1380     for (h = 0; h < q->clhash.hashsize; h++) {
1381         hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1382             tcf_block_put(cl->block);
1383             cl->block = NULL;
1384         }
1385     }
1386     for (h = 0; h < q->clhash.hashsize; h++) {
1387         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1388                       common.hnode)
1389             cbq_destroy_class(sch, cl);
1390     }
1391     qdisc_class_hash_destroy(&q->clhash);
1392 }
1393 
1394 static int
1395 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1396          unsigned long *arg, struct netlink_ext_ack *extack)
1397 {
1398     int err;
1399     struct cbq_sched_data *q = qdisc_priv(sch);
1400     struct cbq_class *cl = (struct cbq_class *)*arg;
1401     struct nlattr *opt = tca[TCA_OPTIONS];
1402     struct nlattr *tb[TCA_CBQ_MAX + 1];
1403     struct cbq_class *parent;
1404     struct qdisc_rate_table *rtab = NULL;
1405 
1406     err = cbq_opt_parse(tb, opt, extack);
1407     if (err < 0)
1408         return err;
1409 
1410     if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
1411         NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
1412         return -EOPNOTSUPP;
1413     }
1414 
1415     if (cl) {
1416         /* Check parent */
1417         if (parentid) {
1418             if (cl->tparent &&
1419                 cl->tparent->common.classid != parentid) {
1420                 NL_SET_ERR_MSG(extack, "Invalid parent id");
1421                 return -EINVAL;
1422             }
1423             if (!cl->tparent && parentid != TC_H_ROOT) {
1424                 NL_SET_ERR_MSG(extack, "Parent must be root");
1425                 return -EINVAL;
1426             }
1427         }
1428 
1429         if (tb[TCA_CBQ_RATE]) {
1430             rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1431                           tb[TCA_CBQ_RTAB], extack);
1432             if (rtab == NULL)
1433                 return -EINVAL;
1434         }
1435 
1436         if (tca[TCA_RATE]) {
1437             err = gen_replace_estimator(&cl->bstats, NULL,
1438                             &cl->rate_est,
1439                             NULL,
1440                             true,
1441                             tca[TCA_RATE]);
1442             if (err) {
1443                 NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
1444                 qdisc_put_rtab(rtab);
1445                 return err;
1446             }
1447         }
1448 
1449         /* Change class parameters */
1450         sch_tree_lock(sch);
1451 
1452         if (cl->next_alive != NULL)
1453             cbq_deactivate_class(cl);
1454 
1455         if (rtab) {
1456             qdisc_put_rtab(cl->R_tab);
1457             cl->R_tab = rtab;
1458         }
1459 
1460         if (tb[TCA_CBQ_LSSOPT])
1461             cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1462 
1463         if (tb[TCA_CBQ_WRROPT]) {
1464             cbq_rmprio(q, cl);
1465             cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1466         }
1467 
1468         if (tb[TCA_CBQ_FOPT])
1469             cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1470 
1471         if (cl->q->q.qlen)
1472             cbq_activate_class(cl);
1473 
1474         sch_tree_unlock(sch);
1475 
1476         return 0;
1477     }
1478 
1479     if (parentid == TC_H_ROOT)
1480         return -EINVAL;
1481 
1482     if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
1483         NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
1484         return -EINVAL;
1485     }
1486 
1487     rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
1488                   extack);
1489     if (rtab == NULL)
1490         return -EINVAL;
1491 
1492     if (classid) {
1493         err = -EINVAL;
1494         if (TC_H_MAJ(classid ^ sch->handle) ||
1495             cbq_class_lookup(q, classid)) {
1496             NL_SET_ERR_MSG(extack, "Specified class not found");
1497             goto failure;
1498         }
1499     } else {
1500         int i;
1501         classid = TC_H_MAKE(sch->handle, 0x8000);
1502 
1503         for (i = 0; i < 0x8000; i++) {
1504             if (++q->hgenerator >= 0x8000)
1505                 q->hgenerator = 1;
1506             if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1507                 break;
1508         }
1509         err = -ENOSR;
1510         if (i >= 0x8000) {
1511             NL_SET_ERR_MSG(extack, "Unable to generate classid");
1512             goto failure;
1513         }
1514         classid = classid|q->hgenerator;
1515     }
1516 
1517     parent = &q->link;
1518     if (parentid) {
1519         parent = cbq_class_lookup(q, parentid);
1520         err = -EINVAL;
1521         if (!parent) {
1522             NL_SET_ERR_MSG(extack, "Failed to find parentid");
1523             goto failure;
1524         }
1525     }
1526 
1527     err = -ENOBUFS;
1528     cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1529     if (cl == NULL)
1530         goto failure;
1531 
1532     gnet_stats_basic_sync_init(&cl->bstats);
1533     err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1534     if (err) {
1535         kfree(cl);
1536         goto failure;
1537     }
1538 
1539     if (tca[TCA_RATE]) {
1540         err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
1541                     NULL, true, tca[TCA_RATE]);
1542         if (err) {
1543             NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
1544             tcf_block_put(cl->block);
1545             kfree(cl);
1546             goto failure;
1547         }
1548     }
1549 
1550     cl->R_tab = rtab;
1551     rtab = NULL;
1552     cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
1553                   NULL);
1554     if (!cl->q)
1555         cl->q = &noop_qdisc;
1556     else
1557         qdisc_hash_add(cl->q, true);
1558 
1559     cl->common.classid = classid;
1560     cl->tparent = parent;
1561     cl->qdisc = sch;
1562     cl->allot = parent->allot;
1563     cl->quantum = cl->allot;
1564     cl->weight = cl->R_tab->rate.rate;
1565 
1566     sch_tree_lock(sch);
1567     cbq_link_class(cl);
1568     cl->borrow = cl->tparent;
1569     if (cl->tparent != &q->link)
1570         cl->share = cl->tparent;
1571     cbq_adjust_levels(parent);
1572     cl->minidle = -0x7FFFFFFF;
1573     cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1574     cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1575     if (cl->ewma_log == 0)
1576         cl->ewma_log = q->link.ewma_log;
1577     if (cl->maxidle == 0)
1578         cl->maxidle = q->link.maxidle;
1579     if (cl->avpkt == 0)
1580         cl->avpkt = q->link.avpkt;
1581     if (tb[TCA_CBQ_FOPT])
1582         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1583     sch_tree_unlock(sch);
1584 
1585     qdisc_class_hash_grow(sch, &q->clhash);
1586 
1587     *arg = (unsigned long)cl;
1588     return 0;
1589 
1590 failure:
1591     qdisc_put_rtab(rtab);
1592     return err;
1593 }
1594 
1595 static int cbq_delete(struct Qdisc *sch, unsigned long arg,
1596               struct netlink_ext_ack *extack)
1597 {
1598     struct cbq_sched_data *q = qdisc_priv(sch);
1599     struct cbq_class *cl = (struct cbq_class *)arg;
1600 
1601     if (cl->filters || cl->children || cl == &q->link)
1602         return -EBUSY;
1603 
1604     sch_tree_lock(sch);
1605 
1606     qdisc_purge_queue(cl->q);
1607 
1608     if (cl->next_alive)
1609         cbq_deactivate_class(cl);
1610 
1611     if (q->tx_borrowed == cl)
1612         q->tx_borrowed = q->tx_class;
1613     if (q->tx_class == cl) {
1614         q->tx_class = NULL;
1615         q->tx_borrowed = NULL;
1616     }
1617 #ifdef CONFIG_NET_CLS_ACT
1618     if (q->rx_class == cl)
1619         q->rx_class = NULL;
1620 #endif
1621 
1622     cbq_unlink_class(cl);
1623     cbq_adjust_levels(cl->tparent);
1624     cl->defmap = 0;
1625     cbq_sync_defmap(cl);
1626 
1627     cbq_rmprio(q, cl);
1628     sch_tree_unlock(sch);
1629 
1630     cbq_destroy_class(sch, cl);
1631     return 0;
1632 }
1633 
1634 static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
1635                        struct netlink_ext_ack *extack)
1636 {
1637     struct cbq_sched_data *q = qdisc_priv(sch);
1638     struct cbq_class *cl = (struct cbq_class *)arg;
1639 
1640     if (cl == NULL)
1641         cl = &q->link;
1642 
1643     return cl->block;
1644 }
1645 
1646 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1647                      u32 classid)
1648 {
1649     struct cbq_sched_data *q = qdisc_priv(sch);
1650     struct cbq_class *p = (struct cbq_class *)parent;
1651     struct cbq_class *cl = cbq_class_lookup(q, classid);
1652 
1653     if (cl) {
1654         if (p && p->level <= cl->level)
1655             return 0;
1656         cl->filters++;
1657         return (unsigned long)cl;
1658     }
1659     return 0;
1660 }
1661 
1662 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1663 {
1664     struct cbq_class *cl = (struct cbq_class *)arg;
1665 
1666     cl->filters--;
1667 }
1668 
1669 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1670 {
1671     struct cbq_sched_data *q = qdisc_priv(sch);
1672     struct cbq_class *cl;
1673     unsigned int h;
1674 
1675     if (arg->stop)
1676         return;
1677 
1678     for (h = 0; h < q->clhash.hashsize; h++) {
1679         hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1680             if (arg->count < arg->skip) {
1681                 arg->count++;
1682                 continue;
1683             }
1684             if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1685                 arg->stop = 1;
1686                 return;
1687             }
1688             arg->count++;
1689         }
1690     }
1691 }
1692 
1693 static const struct Qdisc_class_ops cbq_class_ops = {
1694     .graft      =   cbq_graft,
1695     .leaf       =   cbq_leaf,
1696     .qlen_notify    =   cbq_qlen_notify,
1697     .find       =   cbq_find,
1698     .change     =   cbq_change_class,
1699     .delete     =   cbq_delete,
1700     .walk       =   cbq_walk,
1701     .tcf_block  =   cbq_tcf_block,
1702     .bind_tcf   =   cbq_bind_filter,
1703     .unbind_tcf =   cbq_unbind_filter,
1704     .dump       =   cbq_dump_class,
1705     .dump_stats =   cbq_dump_class_stats,
1706 };
1707 
1708 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1709     .next       =   NULL,
1710     .cl_ops     =   &cbq_class_ops,
1711     .id     =   "cbq",
1712     .priv_size  =   sizeof(struct cbq_sched_data),
1713     .enqueue    =   cbq_enqueue,
1714     .dequeue    =   cbq_dequeue,
1715     .peek       =   qdisc_peek_dequeued,
1716     .init       =   cbq_init,
1717     .reset      =   cbq_reset,
1718     .destroy    =   cbq_destroy,
1719     .change     =   NULL,
1720     .dump       =   cbq_dump,
1721     .dump_stats =   cbq_dump_stats,
1722     .owner      =   THIS_MODULE,
1723 };
1724 
1725 static int __init cbq_module_init(void)
1726 {
1727     return register_qdisc(&cbq_qdisc_ops);
1728 }
1729 static void __exit cbq_module_exit(void)
1730 {
1731     unregister_qdisc(&cbq_qdisc_ops);
1732 }
1733 module_init(cbq_module_init)
1734 module_exit(cbq_module_exit)
1735 MODULE_LICENSE("GPL");