Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
0004  *
0005  * Authors: Martin Devera, <devik@cdi.cz>
0006  *
0007  * Credits (in time order) for older HTB versions:
0008  *              Stef Coene <stef.coene@docum.org>
0009  *          HTB support at LARTC mailing list
0010  *      Ondrej Kraus, <krauso@barr.cz>
0011  *          found missing INIT_QDISC(htb)
0012  *      Vladimir Smelhaus, Aamer Akhter, Bert Hubert
0013  *          helped a lot to locate nasty class stall bug
0014  *      Andi Kleen, Jamal Hadi, Bert Hubert
0015  *          code review and helpful comments on shaping
0016  *      Tomasz Wrona, <tw@eter.tym.pl>
0017  *          created test case so that I was able to fix nasty bug
0018  *      Wilfried Weissmann
0019  *          spotted bug in dequeue code and helped with fix
0020  *      Jiri Fojtasek
0021  *          fixed requeue routine
0022  *      and many others. thanks.
0023  */
0024 #include <linux/module.h>
0025 #include <linux/moduleparam.h>
0026 #include <linux/types.h>
0027 #include <linux/kernel.h>
0028 #include <linux/string.h>
0029 #include <linux/errno.h>
0030 #include <linux/skbuff.h>
0031 #include <linux/list.h>
0032 #include <linux/compiler.h>
0033 #include <linux/rbtree.h>
0034 #include <linux/workqueue.h>
0035 #include <linux/slab.h>
0036 #include <net/netlink.h>
0037 #include <net/sch_generic.h>
0038 #include <net/pkt_sched.h>
0039 #include <net/pkt_cls.h>
0040 
0041 /* HTB algorithm.
0042     Author: devik@cdi.cz
0043     ========================================================================
0044     HTB is like TBF with multiple classes. It is also similar to CBQ because
0045     it allows to assign priority to each class in hierarchy.
0046     In fact it is another implementation of Floyd's formal sharing.
0047 
0048     Levels:
0049     Each class is assigned level. Leaf has ALWAYS level 0 and root
0050     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
0051     one less than their parent.
0052 */
0053 
0054 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
0055 #define HTB_VER 0x30011     /* major must be matched with number supplied by TC as version */
0056 
0057 #if HTB_VER >> 16 != TC_HTB_PROTOVER
0058 #error "Mismatched sch_htb.c and pkt_sch.h"
0059 #endif
0060 
0061 /* Module parameter and sysfs export */
0062 module_param    (htb_hysteresis, int, 0640);
0063 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
0064 
0065 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
0066 module_param(htb_rate_est, int, 0640);
0067 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
0068 
0069 /* used internaly to keep status of single class */
0070 enum htb_cmode {
0071     HTB_CANT_SEND,      /* class can't send and can't borrow */
0072     HTB_MAY_BORROW,     /* class can't send but may borrow */
0073     HTB_CAN_SEND        /* class can send */
0074 };
0075 
0076 struct htb_prio {
0077     union {
0078         struct rb_root  row;
0079         struct rb_root  feed;
0080     };
0081     struct rb_node  *ptr;
0082     /* When class changes from state 1->2 and disconnects from
0083      * parent's feed then we lost ptr value and start from the
0084      * first child again. Here we store classid of the
0085      * last valid ptr (used when ptr is NULL).
0086      */
0087     u32     last_ptr_id;
0088 };
0089 
0090 /* interior & leaf nodes; props specific to leaves are marked L:
0091  * To reduce false sharing, place mostly read fields at beginning,
0092  * and mostly written ones at the end.
0093  */
0094 struct htb_class {
0095     struct Qdisc_class_common common;
0096     struct psched_ratecfg   rate;
0097     struct psched_ratecfg   ceil;
0098     s64         buffer, cbuffer;/* token bucket depth/rate */
0099     s64         mbuffer;    /* max wait time */
0100     u32         prio;       /* these two are used only by leaves... */
0101     int         quantum;    /* but stored for parent-to-leaf return */
0102 
0103     struct tcf_proto __rcu  *filter_list;   /* class attached filters */
0104     struct tcf_block    *block;
0105     int         filter_cnt;
0106 
0107     int         level;      /* our level (see above) */
0108     unsigned int        children;
0109     struct htb_class    *parent;    /* parent class */
0110 
0111     struct net_rate_estimator __rcu *rate_est;
0112 
0113     /*
0114      * Written often fields
0115      */
0116     struct gnet_stats_basic_sync bstats;
0117     struct gnet_stats_basic_sync bstats_bias;
0118     struct tc_htb_xstats    xstats; /* our special stats */
0119 
0120     /* token bucket parameters */
0121     s64         tokens, ctokens;/* current number of tokens */
0122     s64         t_c;        /* checkpoint time */
0123 
0124     union {
0125         struct htb_class_leaf {
0126             int     deficit[TC_HTB_MAXDEPTH];
0127             struct Qdisc    *q;
0128             struct netdev_queue *offload_queue;
0129         } leaf;
0130         struct htb_class_inner {
0131             struct htb_prio clprio[TC_HTB_NUMPRIO];
0132         } inner;
0133     };
0134     s64         pq_key;
0135 
0136     int         prio_activity;  /* for which prios are we active */
0137     enum htb_cmode      cmode;      /* current mode of the class */
0138     struct rb_node      pq_node;    /* node for event queue */
0139     struct rb_node      node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
0140 
0141     unsigned int drops ____cacheline_aligned_in_smp;
0142     unsigned int        overlimits;
0143 };
0144 
0145 struct htb_level {
0146     struct rb_root  wait_pq;
0147     struct htb_prio hprio[TC_HTB_NUMPRIO];
0148 };
0149 
0150 struct htb_sched {
0151     struct Qdisc_class_hash clhash;
0152     int         defcls;     /* class where unclassified flows go to */
0153     int         rate2quantum;   /* quant = rate / rate2quantum */
0154 
0155     /* filters for qdisc itself */
0156     struct tcf_proto __rcu  *filter_list;
0157     struct tcf_block    *block;
0158 
0159 #define HTB_WARN_TOOMANYEVENTS  0x1
0160     unsigned int        warned; /* only one warning */
0161     int         direct_qlen;
0162     struct work_struct  work;
0163 
0164     /* non shaped skbs; let them go directly thru */
0165     struct qdisc_skb_head   direct_queue;
0166     u32         direct_pkts;
0167     u32         overlimits;
0168 
0169     struct qdisc_watchdog   watchdog;
0170 
0171     s64         now;    /* cached dequeue time */
0172 
0173     /* time of nearest event per level (row) */
0174     s64         near_ev_cache[TC_HTB_MAXDEPTH];
0175 
0176     int         row_mask[TC_HTB_MAXDEPTH];
0177 
0178     struct htb_level    hlevel[TC_HTB_MAXDEPTH];
0179 
0180     struct Qdisc        **direct_qdiscs;
0181     unsigned int            num_direct_qdiscs;
0182 
0183     bool            offload;
0184 };
0185 
0186 /* find class in global hash table using given handle */
0187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
0188 {
0189     struct htb_sched *q = qdisc_priv(sch);
0190     struct Qdisc_class_common *clc;
0191 
0192     clc = qdisc_class_find(&q->clhash, handle);
0193     if (clc == NULL)
0194         return NULL;
0195     return container_of(clc, struct htb_class, common);
0196 }
0197 
0198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
0199 {
0200     return (unsigned long)htb_find(handle, sch);
0201 }
0202 /**
0203  * htb_classify - classify a packet into class
0204  *
0205  * It returns NULL if the packet should be dropped or -1 if the packet
0206  * should be passed directly thru. In all other cases leaf class is returned.
0207  * We allow direct class selection by classid in priority. The we examine
0208  * filters in qdisc and in inner nodes (if higher filter points to the inner
0209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
0210  * internal fifo (direct). These packets then go directly thru. If we still
0211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
0212  * then finish and return direct queue.
0213  */
0214 #define HTB_DIRECT ((struct htb_class *)-1L)
0215 
0216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
0217                       int *qerr)
0218 {
0219     struct htb_sched *q = qdisc_priv(sch);
0220     struct htb_class *cl;
0221     struct tcf_result res;
0222     struct tcf_proto *tcf;
0223     int result;
0224 
0225     /* allow to select class by setting skb->priority to valid classid;
0226      * note that nfmark can be used too by attaching filter fw with no
0227      * rules in it
0228      */
0229     if (skb->priority == sch->handle)
0230         return HTB_DIRECT;  /* X:0 (direct flow) selected */
0231     cl = htb_find(skb->priority, sch);
0232     if (cl) {
0233         if (cl->level == 0)
0234             return cl;
0235         /* Start with inner filter chain if a non-leaf class is selected */
0236         tcf = rcu_dereference_bh(cl->filter_list);
0237     } else {
0238         tcf = rcu_dereference_bh(q->filter_list);
0239     }
0240 
0241     *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
0242     while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
0243 #ifdef CONFIG_NET_CLS_ACT
0244         switch (result) {
0245         case TC_ACT_QUEUED:
0246         case TC_ACT_STOLEN:
0247         case TC_ACT_TRAP:
0248             *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
0249             fallthrough;
0250         case TC_ACT_SHOT:
0251             return NULL;
0252         }
0253 #endif
0254         cl = (void *)res.class;
0255         if (!cl) {
0256             if (res.classid == sch->handle)
0257                 return HTB_DIRECT;  /* X:0 (direct flow) */
0258             cl = htb_find(res.classid, sch);
0259             if (!cl)
0260                 break;  /* filter selected invalid classid */
0261         }
0262         if (!cl->level)
0263             return cl;  /* we hit leaf; return it */
0264 
0265         /* we have got inner class; apply inner filter chain */
0266         tcf = rcu_dereference_bh(cl->filter_list);
0267     }
0268     /* classification failed; try to use default class */
0269     cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
0270     if (!cl || cl->level)
0271         return HTB_DIRECT;  /* bad default .. this is safe bet */
0272     return cl;
0273 }
0274 
0275 /**
0276  * htb_add_to_id_tree - adds class to the round robin list
0277  * @root: the root of the tree
0278  * @cl: the class to add
0279  * @prio: the give prio in class
0280  *
0281  * Routine adds class to the list (actually tree) sorted by classid.
0282  * Make sure that class is not already on such list for given prio.
0283  */
0284 static void htb_add_to_id_tree(struct rb_root *root,
0285                    struct htb_class *cl, int prio)
0286 {
0287     struct rb_node **p = &root->rb_node, *parent = NULL;
0288 
0289     while (*p) {
0290         struct htb_class *c;
0291         parent = *p;
0292         c = rb_entry(parent, struct htb_class, node[prio]);
0293 
0294         if (cl->common.classid > c->common.classid)
0295             p = &parent->rb_right;
0296         else
0297             p = &parent->rb_left;
0298     }
0299     rb_link_node(&cl->node[prio], parent, p);
0300     rb_insert_color(&cl->node[prio], root);
0301 }
0302 
0303 /**
0304  * htb_add_to_wait_tree - adds class to the event queue with delay
0305  * @q: the priority event queue
0306  * @cl: the class to add
0307  * @delay: delay in microseconds
0308  *
0309  * The class is added to priority event queue to indicate that class will
0310  * change its mode in cl->pq_key microseconds. Make sure that class is not
0311  * already in the queue.
0312  */
0313 static void htb_add_to_wait_tree(struct htb_sched *q,
0314                  struct htb_class *cl, s64 delay)
0315 {
0316     struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
0317 
0318     cl->pq_key = q->now + delay;
0319     if (cl->pq_key == q->now)
0320         cl->pq_key++;
0321 
0322     /* update the nearest event cache */
0323     if (q->near_ev_cache[cl->level] > cl->pq_key)
0324         q->near_ev_cache[cl->level] = cl->pq_key;
0325 
0326     while (*p) {
0327         struct htb_class *c;
0328         parent = *p;
0329         c = rb_entry(parent, struct htb_class, pq_node);
0330         if (cl->pq_key >= c->pq_key)
0331             p = &parent->rb_right;
0332         else
0333             p = &parent->rb_left;
0334     }
0335     rb_link_node(&cl->pq_node, parent, p);
0336     rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
0337 }
0338 
0339 /**
0340  * htb_next_rb_node - finds next node in binary tree
0341  * @n: the current node in binary tree
0342  *
0343  * When we are past last key we return NULL.
0344  * Average complexity is 2 steps per call.
0345  */
0346 static inline void htb_next_rb_node(struct rb_node **n)
0347 {
0348     *n = rb_next(*n);
0349 }
0350 
0351 /**
0352  * htb_add_class_to_row - add class to its row
0353  * @q: the priority event queue
0354  * @cl: the class to add
0355  * @mask: the given priorities in class in bitmap
0356  *
0357  * The class is added to row at priorities marked in mask.
0358  * It does nothing if mask == 0.
0359  */
0360 static inline void htb_add_class_to_row(struct htb_sched *q,
0361                     struct htb_class *cl, int mask)
0362 {
0363     q->row_mask[cl->level] |= mask;
0364     while (mask) {
0365         int prio = ffz(~mask);
0366         mask &= ~(1 << prio);
0367         htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
0368     }
0369 }
0370 
0371 /* If this triggers, it is a bug in this code, but it need not be fatal */
0372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
0373 {
0374     if (RB_EMPTY_NODE(rb)) {
0375         WARN_ON(1);
0376     } else {
0377         rb_erase(rb, root);
0378         RB_CLEAR_NODE(rb);
0379     }
0380 }
0381 
0382 
0383 /**
0384  * htb_remove_class_from_row - removes class from its row
0385  * @q: the priority event queue
0386  * @cl: the class to add
0387  * @mask: the given priorities in class in bitmap
0388  *
0389  * The class is removed from row at priorities marked in mask.
0390  * It does nothing if mask == 0.
0391  */
0392 static inline void htb_remove_class_from_row(struct htb_sched *q,
0393                          struct htb_class *cl, int mask)
0394 {
0395     int m = 0;
0396     struct htb_level *hlevel = &q->hlevel[cl->level];
0397 
0398     while (mask) {
0399         int prio = ffz(~mask);
0400         struct htb_prio *hprio = &hlevel->hprio[prio];
0401 
0402         mask &= ~(1 << prio);
0403         if (hprio->ptr == cl->node + prio)
0404             htb_next_rb_node(&hprio->ptr);
0405 
0406         htb_safe_rb_erase(cl->node + prio, &hprio->row);
0407         if (!hprio->row.rb_node)
0408             m |= 1 << prio;
0409     }
0410     q->row_mask[cl->level] &= ~m;
0411 }
0412 
0413 /**
0414  * htb_activate_prios - creates active classe's feed chain
0415  * @q: the priority event queue
0416  * @cl: the class to activate
0417  *
0418  * The class is connected to ancestors and/or appropriate rows
0419  * for priorities it is participating on. cl->cmode must be new
0420  * (activated) mode. It does nothing if cl->prio_activity == 0.
0421  */
0422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
0423 {
0424     struct htb_class *p = cl->parent;
0425     long m, mask = cl->prio_activity;
0426 
0427     while (cl->cmode == HTB_MAY_BORROW && p && mask) {
0428         m = mask;
0429         while (m) {
0430             int prio = ffz(~m);
0431             m &= ~(1 << prio);
0432 
0433             if (p->inner.clprio[prio].feed.rb_node)
0434                 /* parent already has its feed in use so that
0435                  * reset bit in mask as parent is already ok
0436                  */
0437                 mask &= ~(1 << prio);
0438 
0439             htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
0440         }
0441         p->prio_activity |= mask;
0442         cl = p;
0443         p = cl->parent;
0444 
0445     }
0446     if (cl->cmode == HTB_CAN_SEND && mask)
0447         htb_add_class_to_row(q, cl, mask);
0448 }
0449 
0450 /**
0451  * htb_deactivate_prios - remove class from feed chain
0452  * @q: the priority event queue
0453  * @cl: the class to deactivate
0454  *
0455  * cl->cmode must represent old mode (before deactivation). It does
0456  * nothing if cl->prio_activity == 0. Class is removed from all feed
0457  * chains and rows.
0458  */
0459 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
0460 {
0461     struct htb_class *p = cl->parent;
0462     long m, mask = cl->prio_activity;
0463 
0464     while (cl->cmode == HTB_MAY_BORROW && p && mask) {
0465         m = mask;
0466         mask = 0;
0467         while (m) {
0468             int prio = ffz(~m);
0469             m &= ~(1 << prio);
0470 
0471             if (p->inner.clprio[prio].ptr == cl->node + prio) {
0472                 /* we are removing child which is pointed to from
0473                  * parent feed - forget the pointer but remember
0474                  * classid
0475                  */
0476                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
0477                 p->inner.clprio[prio].ptr = NULL;
0478             }
0479 
0480             htb_safe_rb_erase(cl->node + prio,
0481                       &p->inner.clprio[prio].feed);
0482 
0483             if (!p->inner.clprio[prio].feed.rb_node)
0484                 mask |= 1 << prio;
0485         }
0486 
0487         p->prio_activity &= ~mask;
0488         cl = p;
0489         p = cl->parent;
0490 
0491     }
0492     if (cl->cmode == HTB_CAN_SEND && mask)
0493         htb_remove_class_from_row(q, cl, mask);
0494 }
0495 
0496 static inline s64 htb_lowater(const struct htb_class *cl)
0497 {
0498     if (htb_hysteresis)
0499         return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
0500     else
0501         return 0;
0502 }
0503 static inline s64 htb_hiwater(const struct htb_class *cl)
0504 {
0505     if (htb_hysteresis)
0506         return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
0507     else
0508         return 0;
0509 }
0510 
0511 
0512 /**
0513  * htb_class_mode - computes and returns current class mode
0514  * @cl: the target class
0515  * @diff: diff time in microseconds
0516  *
0517  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
0518  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
0519  * from now to time when cl will change its state.
0520  * Also it is worth to note that class mode doesn't change simply
0521  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
0522  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
0523  * mode transitions per time unit. The speed gain is about 1/6.
0524  */
0525 static inline enum htb_cmode
0526 htb_class_mode(struct htb_class *cl, s64 *diff)
0527 {
0528     s64 toks;
0529 
0530     if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
0531         *diff = -toks;
0532         return HTB_CANT_SEND;
0533     }
0534 
0535     if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
0536         return HTB_CAN_SEND;
0537 
0538     *diff = -toks;
0539     return HTB_MAY_BORROW;
0540 }
0541 
0542 /**
0543  * htb_change_class_mode - changes classe's mode
0544  * @q: the priority event queue
0545  * @cl: the target class
0546  * @diff: diff time in microseconds
0547  *
0548  * This should be the only way how to change classe's mode under normal
0549  * circumstances. Routine will update feed lists linkage, change mode
0550  * and add class to the wait event queue if appropriate. New mode should
0551  * be different from old one and cl->pq_key has to be valid if changing
0552  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
0553  */
0554 static void
0555 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
0556 {
0557     enum htb_cmode new_mode = htb_class_mode(cl, diff);
0558 
0559     if (new_mode == cl->cmode)
0560         return;
0561 
0562     if (new_mode == HTB_CANT_SEND) {
0563         cl->overlimits++;
0564         q->overlimits++;
0565     }
0566 
0567     if (cl->prio_activity) {    /* not necessary: speed optimization */
0568         if (cl->cmode != HTB_CANT_SEND)
0569             htb_deactivate_prios(q, cl);
0570         cl->cmode = new_mode;
0571         if (new_mode != HTB_CANT_SEND)
0572             htb_activate_prios(q, cl);
0573     } else
0574         cl->cmode = new_mode;
0575 }
0576 
0577 /**
0578  * htb_activate - inserts leaf cl into appropriate active feeds
0579  * @q: the priority event queue
0580  * @cl: the target class
0581  *
0582  * Routine learns (new) priority of leaf and activates feed chain
0583  * for the prio. It can be called on already active leaf safely.
0584  * It also adds leaf into droplist.
0585  */
0586 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
0587 {
0588     WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
0589 
0590     if (!cl->prio_activity) {
0591         cl->prio_activity = 1 << cl->prio;
0592         htb_activate_prios(q, cl);
0593     }
0594 }
0595 
0596 /**
0597  * htb_deactivate - remove leaf cl from active feeds
0598  * @q: the priority event queue
0599  * @cl: the target class
0600  *
0601  * Make sure that leaf is active. In the other words it can't be called
0602  * with non-active leaf. It also removes class from the drop list.
0603  */
0604 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
0605 {
0606     WARN_ON(!cl->prio_activity);
0607 
0608     htb_deactivate_prios(q, cl);
0609     cl->prio_activity = 0;
0610 }
0611 
0612 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0613                struct sk_buff **to_free)
0614 {
0615     int ret;
0616     unsigned int len = qdisc_pkt_len(skb);
0617     struct htb_sched *q = qdisc_priv(sch);
0618     struct htb_class *cl = htb_classify(skb, sch, &ret);
0619 
0620     if (cl == HTB_DIRECT) {
0621         /* enqueue to helper queue */
0622         if (q->direct_queue.qlen < q->direct_qlen) {
0623             __qdisc_enqueue_tail(skb, &q->direct_queue);
0624             q->direct_pkts++;
0625         } else {
0626             return qdisc_drop(skb, sch, to_free);
0627         }
0628 #ifdef CONFIG_NET_CLS_ACT
0629     } else if (!cl) {
0630         if (ret & __NET_XMIT_BYPASS)
0631             qdisc_qstats_drop(sch);
0632         __qdisc_drop(skb, to_free);
0633         return ret;
0634 #endif
0635     } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
0636                     to_free)) != NET_XMIT_SUCCESS) {
0637         if (net_xmit_drop_count(ret)) {
0638             qdisc_qstats_drop(sch);
0639             cl->drops++;
0640         }
0641         return ret;
0642     } else {
0643         htb_activate(q, cl);
0644     }
0645 
0646     sch->qstats.backlog += len;
0647     sch->q.qlen++;
0648     return NET_XMIT_SUCCESS;
0649 }
0650 
0651 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
0652 {
0653     s64 toks = diff + cl->tokens;
0654 
0655     if (toks > cl->buffer)
0656         toks = cl->buffer;
0657     toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
0658     if (toks <= -cl->mbuffer)
0659         toks = 1 - cl->mbuffer;
0660 
0661     cl->tokens = toks;
0662 }
0663 
0664 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
0665 {
0666     s64 toks = diff + cl->ctokens;
0667 
0668     if (toks > cl->cbuffer)
0669         toks = cl->cbuffer;
0670     toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
0671     if (toks <= -cl->mbuffer)
0672         toks = 1 - cl->mbuffer;
0673 
0674     cl->ctokens = toks;
0675 }
0676 
0677 /**
0678  * htb_charge_class - charges amount "bytes" to leaf and ancestors
0679  * @q: the priority event queue
0680  * @cl: the class to start iterate
0681  * @level: the minimum level to account
0682  * @skb: the socket buffer
0683  *
0684  * Routine assumes that packet "bytes" long was dequeued from leaf cl
0685  * borrowing from "level". It accounts bytes to ceil leaky bucket for
0686  * leaf and all ancestors and to rate bucket for ancestors at levels
0687  * "level" and higher. It also handles possible change of mode resulting
0688  * from the update. Note that mode can also increase here (MAY_BORROW to
0689  * CAN_SEND) because we can use more precise clock that event queue here.
0690  * In such case we remove class from event queue first.
0691  */
0692 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
0693                  int level, struct sk_buff *skb)
0694 {
0695     int bytes = qdisc_pkt_len(skb);
0696     enum htb_cmode old_mode;
0697     s64 diff;
0698 
0699     while (cl) {
0700         diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
0701         if (cl->level >= level) {
0702             if (cl->level == level)
0703                 cl->xstats.lends++;
0704             htb_accnt_tokens(cl, bytes, diff);
0705         } else {
0706             cl->xstats.borrows++;
0707             cl->tokens += diff; /* we moved t_c; update tokens */
0708         }
0709         htb_accnt_ctokens(cl, bytes, diff);
0710         cl->t_c = q->now;
0711 
0712         old_mode = cl->cmode;
0713         diff = 0;
0714         htb_change_class_mode(q, cl, &diff);
0715         if (old_mode != cl->cmode) {
0716             if (old_mode != HTB_CAN_SEND)
0717                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
0718             if (cl->cmode != HTB_CAN_SEND)
0719                 htb_add_to_wait_tree(q, cl, diff);
0720         }
0721 
0722         /* update basic stats except for leaves which are already updated */
0723         if (cl->level)
0724             bstats_update(&cl->bstats, skb);
0725 
0726         cl = cl->parent;
0727     }
0728 }
0729 
0730 /**
0731  * htb_do_events - make mode changes to classes at the level
0732  * @q: the priority event queue
0733  * @level: which wait_pq in 'q->hlevel'
0734  * @start: start jiffies
0735  *
0736  * Scans event queue for pending events and applies them. Returns time of
0737  * next pending event (0 for no event in pq, q->now for too many events).
0738  * Note: Applied are events whose have cl->pq_key <= q->now.
0739  */
0740 static s64 htb_do_events(struct htb_sched *q, const int level,
0741              unsigned long start)
0742 {
0743     /* don't run for longer than 2 jiffies; 2 is used instead of
0744      * 1 to simplify things when jiffy is going to be incremented
0745      * too soon
0746      */
0747     unsigned long stop_at = start + 2;
0748     struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
0749 
0750     while (time_before(jiffies, stop_at)) {
0751         struct htb_class *cl;
0752         s64 diff;
0753         struct rb_node *p = rb_first(wait_pq);
0754 
0755         if (!p)
0756             return 0;
0757 
0758         cl = rb_entry(p, struct htb_class, pq_node);
0759         if (cl->pq_key > q->now)
0760             return cl->pq_key;
0761 
0762         htb_safe_rb_erase(p, wait_pq);
0763         diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
0764         htb_change_class_mode(q, cl, &diff);
0765         if (cl->cmode != HTB_CAN_SEND)
0766             htb_add_to_wait_tree(q, cl, diff);
0767     }
0768 
0769     /* too much load - let's continue after a break for scheduling */
0770     if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
0771         pr_warn("htb: too many events!\n");
0772         q->warned |= HTB_WARN_TOOMANYEVENTS;
0773     }
0774 
0775     return q->now;
0776 }
0777 
0778 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
0779  * is no such one exists.
0780  */
0781 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
0782                           u32 id)
0783 {
0784     struct rb_node *r = NULL;
0785     while (n) {
0786         struct htb_class *cl =
0787             rb_entry(n, struct htb_class, node[prio]);
0788 
0789         if (id > cl->common.classid) {
0790             n = n->rb_right;
0791         } else if (id < cl->common.classid) {
0792             r = n;
0793             n = n->rb_left;
0794         } else {
0795             return n;
0796         }
0797     }
0798     return r;
0799 }
0800 
0801 /**
0802  * htb_lookup_leaf - returns next leaf class in DRR order
0803  * @hprio: the current one
0804  * @prio: which prio in class
0805  *
0806  * Find leaf where current feed pointers points to.
0807  */
0808 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
0809 {
0810     int i;
0811     struct {
0812         struct rb_node *root;
0813         struct rb_node **pptr;
0814         u32 *pid;
0815     } stk[TC_HTB_MAXDEPTH], *sp = stk;
0816 
0817     BUG_ON(!hprio->row.rb_node);
0818     sp->root = hprio->row.rb_node;
0819     sp->pptr = &hprio->ptr;
0820     sp->pid = &hprio->last_ptr_id;
0821 
0822     for (i = 0; i < 65535; i++) {
0823         if (!*sp->pptr && *sp->pid) {
0824             /* ptr was invalidated but id is valid - try to recover
0825              * the original or next ptr
0826              */
0827             *sp->pptr =
0828                 htb_id_find_next_upper(prio, sp->root, *sp->pid);
0829         }
0830         *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
0831                  * can become out of date quickly
0832                  */
0833         if (!*sp->pptr) {   /* we are at right end; rewind & go up */
0834             *sp->pptr = sp->root;
0835             while ((*sp->pptr)->rb_left)
0836                 *sp->pptr = (*sp->pptr)->rb_left;
0837             if (sp > stk) {
0838                 sp--;
0839                 if (!*sp->pptr) {
0840                     WARN_ON(1);
0841                     return NULL;
0842                 }
0843                 htb_next_rb_node(sp->pptr);
0844             }
0845         } else {
0846             struct htb_class *cl;
0847             struct htb_prio *clp;
0848 
0849             cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
0850             if (!cl->level)
0851                 return cl;
0852             clp = &cl->inner.clprio[prio];
0853             (++sp)->root = clp->feed.rb_node;
0854             sp->pptr = &clp->ptr;
0855             sp->pid = &clp->last_ptr_id;
0856         }
0857     }
0858     WARN_ON(1);
0859     return NULL;
0860 }
0861 
0862 /* dequeues packet at given priority and level; call only if
0863  * you are sure that there is active class at prio/level
0864  */
0865 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
0866                     const int level)
0867 {
0868     struct sk_buff *skb = NULL;
0869     struct htb_class *cl, *start;
0870     struct htb_level *hlevel = &q->hlevel[level];
0871     struct htb_prio *hprio = &hlevel->hprio[prio];
0872 
0873     /* look initial class up in the row */
0874     start = cl = htb_lookup_leaf(hprio, prio);
0875 
0876     do {
0877 next:
0878         if (unlikely(!cl))
0879             return NULL;
0880 
0881         /* class can be empty - it is unlikely but can be true if leaf
0882          * qdisc drops packets in enqueue routine or if someone used
0883          * graft operation on the leaf since last dequeue;
0884          * simply deactivate and skip such class
0885          */
0886         if (unlikely(cl->leaf.q->q.qlen == 0)) {
0887             struct htb_class *next;
0888             htb_deactivate(q, cl);
0889 
0890             /* row/level might become empty */
0891             if ((q->row_mask[level] & (1 << prio)) == 0)
0892                 return NULL;
0893 
0894             next = htb_lookup_leaf(hprio, prio);
0895 
0896             if (cl == start)    /* fix start if we just deleted it */
0897                 start = next;
0898             cl = next;
0899             goto next;
0900         }
0901 
0902         skb = cl->leaf.q->dequeue(cl->leaf.q);
0903         if (likely(skb != NULL))
0904             break;
0905 
0906         qdisc_warn_nonwc("htb", cl->leaf.q);
0907         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
0908                      &q->hlevel[0].hprio[prio].ptr);
0909         cl = htb_lookup_leaf(hprio, prio);
0910 
0911     } while (cl != start);
0912 
0913     if (likely(skb != NULL)) {
0914         bstats_update(&cl->bstats, skb);
0915         cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
0916         if (cl->leaf.deficit[level] < 0) {
0917             cl->leaf.deficit[level] += cl->quantum;
0918             htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
0919                          &q->hlevel[0].hprio[prio].ptr);
0920         }
0921         /* this used to be after charge_class but this constelation
0922          * gives us slightly better performance
0923          */
0924         if (!cl->leaf.q->q.qlen)
0925             htb_deactivate(q, cl);
0926         htb_charge_class(q, cl, level, skb);
0927     }
0928     return skb;
0929 }
0930 
0931 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
0932 {
0933     struct sk_buff *skb;
0934     struct htb_sched *q = qdisc_priv(sch);
0935     int level;
0936     s64 next_event;
0937     unsigned long start_at;
0938 
0939     /* try to dequeue direct packets as high prio (!) to minimize cpu work */
0940     skb = __qdisc_dequeue_head(&q->direct_queue);
0941     if (skb != NULL) {
0942 ok:
0943         qdisc_bstats_update(sch, skb);
0944         qdisc_qstats_backlog_dec(sch, skb);
0945         sch->q.qlen--;
0946         return skb;
0947     }
0948 
0949     if (!sch->q.qlen)
0950         goto fin;
0951     q->now = ktime_get_ns();
0952     start_at = jiffies;
0953 
0954     next_event = q->now + 5LLU * NSEC_PER_SEC;
0955 
0956     for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
0957         /* common case optimization - skip event handler quickly */
0958         int m;
0959         s64 event = q->near_ev_cache[level];
0960 
0961         if (q->now >= event) {
0962             event = htb_do_events(q, level, start_at);
0963             if (!event)
0964                 event = q->now + NSEC_PER_SEC;
0965             q->near_ev_cache[level] = event;
0966         }
0967 
0968         if (next_event > event)
0969             next_event = event;
0970 
0971         m = ~q->row_mask[level];
0972         while (m != (int)(-1)) {
0973             int prio = ffz(m);
0974 
0975             m |= 1 << prio;
0976             skb = htb_dequeue_tree(q, prio, level);
0977             if (likely(skb != NULL))
0978                 goto ok;
0979         }
0980     }
0981     if (likely(next_event > q->now))
0982         qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
0983     else
0984         schedule_work(&q->work);
0985 fin:
0986     return skb;
0987 }
0988 
0989 /* reset all classes */
0990 /* always caled under BH & queue lock */
0991 static void htb_reset(struct Qdisc *sch)
0992 {
0993     struct htb_sched *q = qdisc_priv(sch);
0994     struct htb_class *cl;
0995     unsigned int i;
0996 
0997     for (i = 0; i < q->clhash.hashsize; i++) {
0998         hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
0999             if (cl->level)
1000                 memset(&cl->inner, 0, sizeof(cl->inner));
1001             else {
1002                 if (cl->leaf.q && !q->offload)
1003                     qdisc_reset(cl->leaf.q);
1004             }
1005             cl->prio_activity = 0;
1006             cl->cmode = HTB_CAN_SEND;
1007         }
1008     }
1009     qdisc_watchdog_cancel(&q->watchdog);
1010     __qdisc_reset_queue(&q->direct_queue);
1011     sch->q.qlen = 0;
1012     sch->qstats.backlog = 0;
1013     memset(q->hlevel, 0, sizeof(q->hlevel));
1014     memset(q->row_mask, 0, sizeof(q->row_mask));
1015 }
1016 
1017 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1018     [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1019     [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1020     [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021     [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022     [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1023     [TCA_HTB_RATE64] = { .type = NLA_U64 },
1024     [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1025     [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1026 };
1027 
1028 static void htb_work_func(struct work_struct *work)
1029 {
1030     struct htb_sched *q = container_of(work, struct htb_sched, work);
1031     struct Qdisc *sch = q->watchdog.qdisc;
1032 
1033     rcu_read_lock();
1034     __netif_schedule(qdisc_root(sch));
1035     rcu_read_unlock();
1036 }
1037 
1038 static void htb_set_lockdep_class_child(struct Qdisc *q)
1039 {
1040     static struct lock_class_key child_key;
1041 
1042     lockdep_set_class(qdisc_lock(q), &child_key);
1043 }
1044 
1045 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1046 {
1047     return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1048 }
1049 
1050 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1051             struct netlink_ext_ack *extack)
1052 {
1053     struct net_device *dev = qdisc_dev(sch);
1054     struct tc_htb_qopt_offload offload_opt;
1055     struct htb_sched *q = qdisc_priv(sch);
1056     struct nlattr *tb[TCA_HTB_MAX + 1];
1057     struct tc_htb_glob *gopt;
1058     unsigned int ntx;
1059     bool offload;
1060     int err;
1061 
1062     qdisc_watchdog_init(&q->watchdog, sch);
1063     INIT_WORK(&q->work, htb_work_func);
1064 
1065     if (!opt)
1066         return -EINVAL;
1067 
1068     err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1069     if (err)
1070         return err;
1071 
1072     err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1073                       NULL);
1074     if (err < 0)
1075         return err;
1076 
1077     if (!tb[TCA_HTB_INIT])
1078         return -EINVAL;
1079 
1080     gopt = nla_data(tb[TCA_HTB_INIT]);
1081     if (gopt->version != HTB_VER >> 16)
1082         return -EINVAL;
1083 
1084     offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1085 
1086     if (offload) {
1087         if (sch->parent != TC_H_ROOT) {
1088             NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1089             return -EOPNOTSUPP;
1090         }
1091 
1092         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1093             NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1094             return -EOPNOTSUPP;
1095         }
1096 
1097         q->num_direct_qdiscs = dev->real_num_tx_queues;
1098         q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1099                        sizeof(*q->direct_qdiscs),
1100                        GFP_KERNEL);
1101         if (!q->direct_qdiscs)
1102             return -ENOMEM;
1103     }
1104 
1105     err = qdisc_class_hash_init(&q->clhash);
1106     if (err < 0)
1107         goto err_free_direct_qdiscs;
1108 
1109     qdisc_skb_head_init(&q->direct_queue);
1110 
1111     if (tb[TCA_HTB_DIRECT_QLEN])
1112         q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1113     else
1114         q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1115 
1116     if ((q->rate2quantum = gopt->rate2quantum) < 1)
1117         q->rate2quantum = 1;
1118     q->defcls = gopt->defcls;
1119 
1120     if (!offload)
1121         return 0;
1122 
1123     for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1124         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1125         struct Qdisc *qdisc;
1126 
1127         qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1128                       TC_H_MAKE(sch->handle, 0), extack);
1129         if (!qdisc) {
1130             err = -ENOMEM;
1131             goto err_free_qdiscs;
1132         }
1133 
1134         htb_set_lockdep_class_child(qdisc);
1135         q->direct_qdiscs[ntx] = qdisc;
1136         qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1137     }
1138 
1139     sch->flags |= TCQ_F_MQROOT;
1140 
1141     offload_opt = (struct tc_htb_qopt_offload) {
1142         .command = TC_HTB_CREATE,
1143         .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1144         .classid = TC_H_MIN(q->defcls),
1145         .extack = extack,
1146     };
1147     err = htb_offload(dev, &offload_opt);
1148     if (err)
1149         goto err_free_qdiscs;
1150 
1151     /* Defer this assignment, so that htb_destroy skips offload-related
1152      * parts (especially calling ndo_setup_tc) on errors.
1153      */
1154     q->offload = true;
1155 
1156     return 0;
1157 
1158 err_free_qdiscs:
1159     for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1160          ntx++)
1161         qdisc_put(q->direct_qdiscs[ntx]);
1162 
1163     qdisc_class_hash_destroy(&q->clhash);
1164     /* Prevent use-after-free and double-free when htb_destroy gets called.
1165      */
1166     q->clhash.hash = NULL;
1167     q->clhash.hashsize = 0;
1168 
1169 err_free_direct_qdiscs:
1170     kfree(q->direct_qdiscs);
1171     q->direct_qdiscs = NULL;
1172     return err;
1173 }
1174 
1175 static void htb_attach_offload(struct Qdisc *sch)
1176 {
1177     struct net_device *dev = qdisc_dev(sch);
1178     struct htb_sched *q = qdisc_priv(sch);
1179     unsigned int ntx;
1180 
1181     for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1182         struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1183 
1184         old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1185         qdisc_put(old);
1186         qdisc_hash_add(qdisc, false);
1187     }
1188     for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1189         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1190         struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1191 
1192         qdisc_put(old);
1193     }
1194 
1195     kfree(q->direct_qdiscs);
1196     q->direct_qdiscs = NULL;
1197 }
1198 
1199 static void htb_attach_software(struct Qdisc *sch)
1200 {
1201     struct net_device *dev = qdisc_dev(sch);
1202     unsigned int ntx;
1203 
1204     /* Resemble qdisc_graft behavior. */
1205     for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1206         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1207         struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1208 
1209         qdisc_refcount_inc(sch);
1210 
1211         qdisc_put(old);
1212     }
1213 }
1214 
1215 static void htb_attach(struct Qdisc *sch)
1216 {
1217     struct htb_sched *q = qdisc_priv(sch);
1218 
1219     if (q->offload)
1220         htb_attach_offload(sch);
1221     else
1222         htb_attach_software(sch);
1223 }
1224 
1225 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1226 {
1227     struct htb_sched *q = qdisc_priv(sch);
1228     struct nlattr *nest;
1229     struct tc_htb_glob gopt;
1230 
1231     if (q->offload)
1232         sch->flags |= TCQ_F_OFFLOADED;
1233     else
1234         sch->flags &= ~TCQ_F_OFFLOADED;
1235 
1236     sch->qstats.overlimits = q->overlimits;
1237     /* Its safe to not acquire qdisc lock. As we hold RTNL,
1238      * no change can happen on the qdisc parameters.
1239      */
1240 
1241     gopt.direct_pkts = q->direct_pkts;
1242     gopt.version = HTB_VER;
1243     gopt.rate2quantum = q->rate2quantum;
1244     gopt.defcls = q->defcls;
1245     gopt.debug = 0;
1246 
1247     nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1248     if (nest == NULL)
1249         goto nla_put_failure;
1250     if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1251         nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1252         goto nla_put_failure;
1253     if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1254         goto nla_put_failure;
1255 
1256     return nla_nest_end(skb, nest);
1257 
1258 nla_put_failure:
1259     nla_nest_cancel(skb, nest);
1260     return -1;
1261 }
1262 
1263 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1264               struct sk_buff *skb, struct tcmsg *tcm)
1265 {
1266     struct htb_class *cl = (struct htb_class *)arg;
1267     struct htb_sched *q = qdisc_priv(sch);
1268     struct nlattr *nest;
1269     struct tc_htb_opt opt;
1270 
1271     /* Its safe to not acquire qdisc lock. As we hold RTNL,
1272      * no change can happen on the class parameters.
1273      */
1274     tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1275     tcm->tcm_handle = cl->common.classid;
1276     if (!cl->level && cl->leaf.q)
1277         tcm->tcm_info = cl->leaf.q->handle;
1278 
1279     nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1280     if (nest == NULL)
1281         goto nla_put_failure;
1282 
1283     memset(&opt, 0, sizeof(opt));
1284 
1285     psched_ratecfg_getrate(&opt.rate, &cl->rate);
1286     opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1287     psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1288     opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1289     opt.quantum = cl->quantum;
1290     opt.prio = cl->prio;
1291     opt.level = cl->level;
1292     if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1293         goto nla_put_failure;
1294     if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1295         goto nla_put_failure;
1296     if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1297         nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1298                   TCA_HTB_PAD))
1299         goto nla_put_failure;
1300     if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1301         nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1302                   TCA_HTB_PAD))
1303         goto nla_put_failure;
1304 
1305     return nla_nest_end(skb, nest);
1306 
1307 nla_put_failure:
1308     nla_nest_cancel(skb, nest);
1309     return -1;
1310 }
1311 
1312 static void htb_offload_aggregate_stats(struct htb_sched *q,
1313                     struct htb_class *cl)
1314 {
1315     u64 bytes = 0, packets = 0;
1316     struct htb_class *c;
1317     unsigned int i;
1318 
1319     gnet_stats_basic_sync_init(&cl->bstats);
1320 
1321     for (i = 0; i < q->clhash.hashsize; i++) {
1322         hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1323             struct htb_class *p = c;
1324 
1325             while (p && p->level < cl->level)
1326                 p = p->parent;
1327 
1328             if (p != cl)
1329                 continue;
1330 
1331             bytes += u64_stats_read(&c->bstats_bias.bytes);
1332             packets += u64_stats_read(&c->bstats_bias.packets);
1333             if (c->level == 0) {
1334                 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1335                 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1336             }
1337         }
1338     }
1339     _bstats_update(&cl->bstats, bytes, packets);
1340 }
1341 
1342 static int
1343 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1344 {
1345     struct htb_class *cl = (struct htb_class *)arg;
1346     struct htb_sched *q = qdisc_priv(sch);
1347     struct gnet_stats_queue qs = {
1348         .drops = cl->drops,
1349         .overlimits = cl->overlimits,
1350     };
1351     __u32 qlen = 0;
1352 
1353     if (!cl->level && cl->leaf.q)
1354         qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1355 
1356     cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1357                     INT_MIN, INT_MAX);
1358     cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1359                      INT_MIN, INT_MAX);
1360 
1361     if (q->offload) {
1362         if (!cl->level) {
1363             if (cl->leaf.q)
1364                 cl->bstats = cl->leaf.q->bstats;
1365             else
1366                 gnet_stats_basic_sync_init(&cl->bstats);
1367             _bstats_update(&cl->bstats,
1368                        u64_stats_read(&cl->bstats_bias.bytes),
1369                        u64_stats_read(&cl->bstats_bias.packets));
1370         } else {
1371             htb_offload_aggregate_stats(q, cl);
1372         }
1373     }
1374 
1375     if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1376         gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1377         gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1378         return -1;
1379 
1380     return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1381 }
1382 
1383 static struct netdev_queue *
1384 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1385 {
1386     struct net_device *dev = qdisc_dev(sch);
1387     struct tc_htb_qopt_offload offload_opt;
1388     struct htb_sched *q = qdisc_priv(sch);
1389     int err;
1390 
1391     if (!q->offload)
1392         return sch->dev_queue;
1393 
1394     offload_opt = (struct tc_htb_qopt_offload) {
1395         .command = TC_HTB_LEAF_QUERY_QUEUE,
1396         .classid = TC_H_MIN(tcm->tcm_parent),
1397     };
1398     err = htb_offload(dev, &offload_opt);
1399     if (err || offload_opt.qid >= dev->num_tx_queues)
1400         return NULL;
1401     return netdev_get_tx_queue(dev, offload_opt.qid);
1402 }
1403 
1404 static struct Qdisc *
1405 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1406 {
1407     struct net_device *dev = dev_queue->dev;
1408     struct Qdisc *old_q;
1409 
1410     if (dev->flags & IFF_UP)
1411         dev_deactivate(dev);
1412     old_q = dev_graft_qdisc(dev_queue, new_q);
1413     if (new_q)
1414         new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1415     if (dev->flags & IFF_UP)
1416         dev_activate(dev);
1417 
1418     return old_q;
1419 }
1420 
1421 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1422 {
1423     struct netdev_queue *queue;
1424 
1425     queue = cl->leaf.offload_queue;
1426     if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1427         WARN_ON(cl->leaf.q->dev_queue != queue);
1428 
1429     return queue;
1430 }
1431 
1432 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1433                    struct htb_class *cl_new, bool destroying)
1434 {
1435     struct netdev_queue *queue_old, *queue_new;
1436     struct net_device *dev = qdisc_dev(sch);
1437 
1438     queue_old = htb_offload_get_queue(cl_old);
1439     queue_new = htb_offload_get_queue(cl_new);
1440 
1441     if (!destroying) {
1442         struct Qdisc *qdisc;
1443 
1444         if (dev->flags & IFF_UP)
1445             dev_deactivate(dev);
1446         qdisc = dev_graft_qdisc(queue_old, NULL);
1447         WARN_ON(qdisc != cl_old->leaf.q);
1448     }
1449 
1450     if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1451         cl_old->leaf.q->dev_queue = queue_new;
1452     cl_old->leaf.offload_queue = queue_new;
1453 
1454     if (!destroying) {
1455         struct Qdisc *qdisc;
1456 
1457         qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1458         if (dev->flags & IFF_UP)
1459             dev_activate(dev);
1460         WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1461     }
1462 }
1463 
1464 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1465              struct Qdisc **old, struct netlink_ext_ack *extack)
1466 {
1467     struct netdev_queue *dev_queue = sch->dev_queue;
1468     struct htb_class *cl = (struct htb_class *)arg;
1469     struct htb_sched *q = qdisc_priv(sch);
1470     struct Qdisc *old_q;
1471 
1472     if (cl->level)
1473         return -EINVAL;
1474 
1475     if (q->offload)
1476         dev_queue = htb_offload_get_queue(cl);
1477 
1478     if (!new) {
1479         new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1480                     cl->common.classid, extack);
1481         if (!new)
1482             return -ENOBUFS;
1483     }
1484 
1485     if (q->offload) {
1486         htb_set_lockdep_class_child(new);
1487         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1488         qdisc_refcount_inc(new);
1489         old_q = htb_graft_helper(dev_queue, new);
1490     }
1491 
1492     *old = qdisc_replace(sch, new, &cl->leaf.q);
1493 
1494     if (q->offload) {
1495         WARN_ON(old_q != *old);
1496         qdisc_put(old_q);
1497     }
1498 
1499     return 0;
1500 }
1501 
1502 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1503 {
1504     struct htb_class *cl = (struct htb_class *)arg;
1505     return !cl->level ? cl->leaf.q : NULL;
1506 }
1507 
1508 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1509 {
1510     struct htb_class *cl = (struct htb_class *)arg;
1511 
1512     htb_deactivate(qdisc_priv(sch), cl);
1513 }
1514 
1515 static inline int htb_parent_last_child(struct htb_class *cl)
1516 {
1517     if (!cl->parent)
1518         /* the root class */
1519         return 0;
1520     if (cl->parent->children > 1)
1521         /* not the last child */
1522         return 0;
1523     return 1;
1524 }
1525 
1526 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1527                    struct Qdisc *new_q)
1528 {
1529     struct htb_sched *q = qdisc_priv(sch);
1530     struct htb_class *parent = cl->parent;
1531 
1532     WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1533 
1534     if (parent->cmode != HTB_CAN_SEND)
1535         htb_safe_rb_erase(&parent->pq_node,
1536                   &q->hlevel[parent->level].wait_pq);
1537 
1538     parent->level = 0;
1539     memset(&parent->inner, 0, sizeof(parent->inner));
1540     parent->leaf.q = new_q ? new_q : &noop_qdisc;
1541     parent->tokens = parent->buffer;
1542     parent->ctokens = parent->cbuffer;
1543     parent->t_c = ktime_get_ns();
1544     parent->cmode = HTB_CAN_SEND;
1545     if (q->offload)
1546         parent->leaf.offload_queue = cl->leaf.offload_queue;
1547 }
1548 
1549 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1550                        struct netdev_queue *dev_queue,
1551                        struct Qdisc *new_q)
1552 {
1553     struct Qdisc *old_q;
1554 
1555     /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1556     if (new_q)
1557         qdisc_refcount_inc(new_q);
1558     old_q = htb_graft_helper(dev_queue, new_q);
1559     WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1560 }
1561 
1562 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1563                      bool last_child, bool destroying,
1564                      struct netlink_ext_ack *extack)
1565 {
1566     struct tc_htb_qopt_offload offload_opt;
1567     struct netdev_queue *dev_queue;
1568     struct Qdisc *q = cl->leaf.q;
1569     struct Qdisc *old = NULL;
1570     int err;
1571 
1572     if (cl->level)
1573         return -EINVAL;
1574 
1575     WARN_ON(!q);
1576     dev_queue = htb_offload_get_queue(cl);
1577     old = htb_graft_helper(dev_queue, NULL);
1578     if (destroying)
1579         /* Before HTB is destroyed, the kernel grafts noop_qdisc to
1580          * all queues.
1581          */
1582         WARN_ON(!(old->flags & TCQ_F_BUILTIN));
1583     else
1584         WARN_ON(old != q);
1585 
1586     if (cl->parent) {
1587         _bstats_update(&cl->parent->bstats_bias,
1588                    u64_stats_read(&q->bstats.bytes),
1589                    u64_stats_read(&q->bstats.packets));
1590     }
1591 
1592     offload_opt = (struct tc_htb_qopt_offload) {
1593         .command = !last_child ? TC_HTB_LEAF_DEL :
1594                destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1595                TC_HTB_LEAF_DEL_LAST,
1596         .classid = cl->common.classid,
1597         .extack = extack,
1598     };
1599     err = htb_offload(qdisc_dev(sch), &offload_opt);
1600 
1601     if (!err || destroying)
1602         qdisc_put(old);
1603     else
1604         htb_graft_helper(dev_queue, old);
1605 
1606     if (last_child)
1607         return err;
1608 
1609     if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1610         u32 classid = TC_H_MAJ(sch->handle) |
1611                   TC_H_MIN(offload_opt.classid);
1612         struct htb_class *moved_cl = htb_find(classid, sch);
1613 
1614         htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1615     }
1616 
1617     return err;
1618 }
1619 
1620 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1621 {
1622     if (!cl->level) {
1623         WARN_ON(!cl->leaf.q);
1624         qdisc_put(cl->leaf.q);
1625     }
1626     gen_kill_estimator(&cl->rate_est);
1627     tcf_block_put(cl->block);
1628     kfree(cl);
1629 }
1630 
1631 static void htb_destroy(struct Qdisc *sch)
1632 {
1633     struct net_device *dev = qdisc_dev(sch);
1634     struct tc_htb_qopt_offload offload_opt;
1635     struct htb_sched *q = qdisc_priv(sch);
1636     struct hlist_node *next;
1637     bool nonempty, changed;
1638     struct htb_class *cl;
1639     unsigned int i;
1640 
1641     cancel_work_sync(&q->work);
1642     qdisc_watchdog_cancel(&q->watchdog);
1643     /* This line used to be after htb_destroy_class call below
1644      * and surprisingly it worked in 2.4. But it must precede it
1645      * because filter need its target class alive to be able to call
1646      * unbind_filter on it (without Oops).
1647      */
1648     tcf_block_put(q->block);
1649 
1650     for (i = 0; i < q->clhash.hashsize; i++) {
1651         hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1652             tcf_block_put(cl->block);
1653             cl->block = NULL;
1654         }
1655     }
1656 
1657     do {
1658         nonempty = false;
1659         changed = false;
1660         for (i = 0; i < q->clhash.hashsize; i++) {
1661             hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1662                           common.hnode) {
1663                 bool last_child;
1664 
1665                 if (!q->offload) {
1666                     htb_destroy_class(sch, cl);
1667                     continue;
1668                 }
1669 
1670                 nonempty = true;
1671 
1672                 if (cl->level)
1673                     continue;
1674 
1675                 changed = true;
1676 
1677                 last_child = htb_parent_last_child(cl);
1678                 htb_destroy_class_offload(sch, cl, last_child,
1679                               true, NULL);
1680                 qdisc_class_hash_remove(&q->clhash,
1681                             &cl->common);
1682                 if (cl->parent)
1683                     cl->parent->children--;
1684                 if (last_child)
1685                     htb_parent_to_leaf(sch, cl, NULL);
1686                 htb_destroy_class(sch, cl);
1687             }
1688         }
1689     } while (changed);
1690     WARN_ON(nonempty);
1691 
1692     qdisc_class_hash_destroy(&q->clhash);
1693     __qdisc_reset_queue(&q->direct_queue);
1694 
1695     if (!q->offload)
1696         return;
1697 
1698     offload_opt = (struct tc_htb_qopt_offload) {
1699         .command = TC_HTB_DESTROY,
1700     };
1701     htb_offload(dev, &offload_opt);
1702 
1703     if (!q->direct_qdiscs)
1704         return;
1705     for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1706         qdisc_put(q->direct_qdiscs[i]);
1707     kfree(q->direct_qdiscs);
1708 }
1709 
1710 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1711               struct netlink_ext_ack *extack)
1712 {
1713     struct htb_sched *q = qdisc_priv(sch);
1714     struct htb_class *cl = (struct htb_class *)arg;
1715     struct Qdisc *new_q = NULL;
1716     int last_child = 0;
1717     int err;
1718 
1719     /* TODO: why don't allow to delete subtree ? references ? does
1720      * tc subsys guarantee us that in htb_destroy it holds no class
1721      * refs so that we can remove children safely there ?
1722      */
1723     if (cl->children || cl->filter_cnt)
1724         return -EBUSY;
1725 
1726     if (!cl->level && htb_parent_last_child(cl))
1727         last_child = 1;
1728 
1729     if (q->offload) {
1730         err = htb_destroy_class_offload(sch, cl, last_child, false,
1731                         extack);
1732         if (err)
1733             return err;
1734     }
1735 
1736     if (last_child) {
1737         struct netdev_queue *dev_queue = sch->dev_queue;
1738 
1739         if (q->offload)
1740             dev_queue = htb_offload_get_queue(cl);
1741 
1742         new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1743                       cl->parent->common.classid,
1744                       NULL);
1745         if (q->offload) {
1746             if (new_q)
1747                 htb_set_lockdep_class_child(new_q);
1748             htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1749         }
1750     }
1751 
1752     sch_tree_lock(sch);
1753 
1754     if (!cl->level)
1755         qdisc_purge_queue(cl->leaf.q);
1756 
1757     /* delete from hash and active; remainder in destroy_class */
1758     qdisc_class_hash_remove(&q->clhash, &cl->common);
1759     if (cl->parent)
1760         cl->parent->children--;
1761 
1762     if (cl->prio_activity)
1763         htb_deactivate(q, cl);
1764 
1765     if (cl->cmode != HTB_CAN_SEND)
1766         htb_safe_rb_erase(&cl->pq_node,
1767                   &q->hlevel[cl->level].wait_pq);
1768 
1769     if (last_child)
1770         htb_parent_to_leaf(sch, cl, new_q);
1771 
1772     sch_tree_unlock(sch);
1773 
1774     htb_destroy_class(sch, cl);
1775     return 0;
1776 }
1777 
1778 static int htb_change_class(struct Qdisc *sch, u32 classid,
1779                 u32 parentid, struct nlattr **tca,
1780                 unsigned long *arg, struct netlink_ext_ack *extack)
1781 {
1782     int err = -EINVAL;
1783     struct htb_sched *q = qdisc_priv(sch);
1784     struct htb_class *cl = (struct htb_class *)*arg, *parent;
1785     struct tc_htb_qopt_offload offload_opt;
1786     struct nlattr *opt = tca[TCA_OPTIONS];
1787     struct nlattr *tb[TCA_HTB_MAX + 1];
1788     struct Qdisc *parent_qdisc = NULL;
1789     struct netdev_queue *dev_queue;
1790     struct tc_htb_opt *hopt;
1791     u64 rate64, ceil64;
1792     int warn = 0;
1793 
1794     /* extract all subattrs from opt attr */
1795     if (!opt)
1796         goto failure;
1797 
1798     err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1799                       NULL);
1800     if (err < 0)
1801         goto failure;
1802 
1803     err = -EINVAL;
1804     if (tb[TCA_HTB_PARMS] == NULL)
1805         goto failure;
1806 
1807     parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1808 
1809     hopt = nla_data(tb[TCA_HTB_PARMS]);
1810     if (!hopt->rate.rate || !hopt->ceil.rate)
1811         goto failure;
1812 
1813     if (q->offload) {
1814         /* Options not supported by the offload. */
1815         if (hopt->rate.overhead || hopt->ceil.overhead) {
1816             NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1817             goto failure;
1818         }
1819         if (hopt->rate.mpu || hopt->ceil.mpu) {
1820             NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1821             goto failure;
1822         }
1823         if (hopt->quantum) {
1824             NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1825             goto failure;
1826         }
1827         if (hopt->prio) {
1828             NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1829             goto failure;
1830         }
1831     }
1832 
1833     /* Keeping backward compatible with rate_table based iproute2 tc */
1834     if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1835         qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1836                           NULL));
1837 
1838     if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1839         qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1840                           NULL));
1841 
1842     rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1843     ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1844 
1845     if (!cl) {      /* new class */
1846         struct net_device *dev = qdisc_dev(sch);
1847         struct Qdisc *new_q, *old_q;
1848         int prio;
1849         struct {
1850             struct nlattr       nla;
1851             struct gnet_estimator   opt;
1852         } est = {
1853             .nla = {
1854                 .nla_len    = nla_attr_size(sizeof(est.opt)),
1855                 .nla_type   = TCA_RATE,
1856             },
1857             .opt = {
1858                 /* 4s interval, 16s averaging constant */
1859                 .interval   = 2,
1860                 .ewma_log   = 2,
1861             },
1862         };
1863 
1864         /* check for valid classid */
1865         if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1866             htb_find(classid, sch))
1867             goto failure;
1868 
1869         /* check maximal depth */
1870         if (parent && parent->parent && parent->parent->level < 2) {
1871             pr_err("htb: tree is too deep\n");
1872             goto failure;
1873         }
1874         err = -ENOBUFS;
1875         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1876         if (!cl)
1877             goto failure;
1878 
1879         gnet_stats_basic_sync_init(&cl->bstats);
1880         gnet_stats_basic_sync_init(&cl->bstats_bias);
1881 
1882         err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1883         if (err) {
1884             kfree(cl);
1885             goto failure;
1886         }
1887         if (htb_rate_est || tca[TCA_RATE]) {
1888             err = gen_new_estimator(&cl->bstats, NULL,
1889                         &cl->rate_est,
1890                         NULL,
1891                         true,
1892                         tca[TCA_RATE] ? : &est.nla);
1893             if (err)
1894                 goto err_block_put;
1895         }
1896 
1897         cl->children = 0;
1898         RB_CLEAR_NODE(&cl->pq_node);
1899 
1900         for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1901             RB_CLEAR_NODE(&cl->node[prio]);
1902 
1903         cl->common.classid = classid;
1904 
1905         /* Make sure nothing interrupts us in between of two
1906          * ndo_setup_tc calls.
1907          */
1908         ASSERT_RTNL();
1909 
1910         /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1911          * so that can't be used inside of sch_tree_lock
1912          * -- thanks to Karlis Peisenieks
1913          */
1914         if (!q->offload) {
1915             dev_queue = sch->dev_queue;
1916         } else if (!(parent && !parent->level)) {
1917             /* Assign a dev_queue to this classid. */
1918             offload_opt = (struct tc_htb_qopt_offload) {
1919                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1920                 .classid = cl->common.classid,
1921                 .parent_classid = parent ?
1922                     TC_H_MIN(parent->common.classid) :
1923                     TC_HTB_CLASSID_ROOT,
1924                 .rate = max_t(u64, hopt->rate.rate, rate64),
1925                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1926                 .extack = extack,
1927             };
1928             err = htb_offload(dev, &offload_opt);
1929             if (err) {
1930                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1931                        err);
1932                 goto err_kill_estimator;
1933             }
1934             dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1935         } else { /* First child. */
1936             dev_queue = htb_offload_get_queue(parent);
1937             old_q = htb_graft_helper(dev_queue, NULL);
1938             WARN_ON(old_q != parent->leaf.q);
1939             offload_opt = (struct tc_htb_qopt_offload) {
1940                 .command = TC_HTB_LEAF_TO_INNER,
1941                 .classid = cl->common.classid,
1942                 .parent_classid =
1943                     TC_H_MIN(parent->common.classid),
1944                 .rate = max_t(u64, hopt->rate.rate, rate64),
1945                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1946                 .extack = extack,
1947             };
1948             err = htb_offload(dev, &offload_opt);
1949             if (err) {
1950                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1951                        err);
1952                 htb_graft_helper(dev_queue, old_q);
1953                 goto err_kill_estimator;
1954             }
1955             _bstats_update(&parent->bstats_bias,
1956                        u64_stats_read(&old_q->bstats.bytes),
1957                        u64_stats_read(&old_q->bstats.packets));
1958             qdisc_put(old_q);
1959         }
1960         new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1961                       classid, NULL);
1962         if (q->offload) {
1963             if (new_q) {
1964                 htb_set_lockdep_class_child(new_q);
1965                 /* One ref for cl->leaf.q, the other for
1966                  * dev_queue->qdisc.
1967                  */
1968                 qdisc_refcount_inc(new_q);
1969             }
1970             old_q = htb_graft_helper(dev_queue, new_q);
1971             /* No qdisc_put needed. */
1972             WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1973         }
1974         sch_tree_lock(sch);
1975         if (parent && !parent->level) {
1976             /* turn parent into inner node */
1977             qdisc_purge_queue(parent->leaf.q);
1978             parent_qdisc = parent->leaf.q;
1979             if (parent->prio_activity)
1980                 htb_deactivate(q, parent);
1981 
1982             /* remove from evt list because of level change */
1983             if (parent->cmode != HTB_CAN_SEND) {
1984                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1985                 parent->cmode = HTB_CAN_SEND;
1986             }
1987             parent->level = (parent->parent ? parent->parent->level
1988                      : TC_HTB_MAXDEPTH) - 1;
1989             memset(&parent->inner, 0, sizeof(parent->inner));
1990         }
1991 
1992         /* leaf (we) needs elementary qdisc */
1993         cl->leaf.q = new_q ? new_q : &noop_qdisc;
1994         if (q->offload)
1995             cl->leaf.offload_queue = dev_queue;
1996 
1997         cl->parent = parent;
1998 
1999         /* set class to be in HTB_CAN_SEND state */
2000         cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
2001         cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
2002         cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
2003         cl->t_c = ktime_get_ns();
2004         cl->cmode = HTB_CAN_SEND;
2005 
2006         /* attach to the hash list and parent's family */
2007         qdisc_class_hash_insert(&q->clhash, &cl->common);
2008         if (parent)
2009             parent->children++;
2010         if (cl->leaf.q != &noop_qdisc)
2011             qdisc_hash_add(cl->leaf.q, true);
2012     } else {
2013         if (tca[TCA_RATE]) {
2014             err = gen_replace_estimator(&cl->bstats, NULL,
2015                             &cl->rate_est,
2016                             NULL,
2017                             true,
2018                             tca[TCA_RATE]);
2019             if (err)
2020                 return err;
2021         }
2022 
2023         if (q->offload) {
2024             struct net_device *dev = qdisc_dev(sch);
2025 
2026             offload_opt = (struct tc_htb_qopt_offload) {
2027                 .command = TC_HTB_NODE_MODIFY,
2028                 .classid = cl->common.classid,
2029                 .rate = max_t(u64, hopt->rate.rate, rate64),
2030                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2031                 .extack = extack,
2032             };
2033             err = htb_offload(dev, &offload_opt);
2034             if (err)
2035                 /* Estimator was replaced, and rollback may fail
2036                  * as well, so we don't try to recover it, and
2037                  * the estimator won't work property with the
2038                  * offload anyway, because bstats are updated
2039                  * only when the stats are queried.
2040                  */
2041                 return err;
2042         }
2043 
2044         sch_tree_lock(sch);
2045     }
2046 
2047     psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2048     psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2049 
2050     /* it used to be a nasty bug here, we have to check that node
2051      * is really leaf before changing cl->leaf !
2052      */
2053     if (!cl->level) {
2054         u64 quantum = cl->rate.rate_bytes_ps;
2055 
2056         do_div(quantum, q->rate2quantum);
2057         cl->quantum = min_t(u64, quantum, INT_MAX);
2058 
2059         if (!hopt->quantum && cl->quantum < 1000) {
2060             warn = -1;
2061             cl->quantum = 1000;
2062         }
2063         if (!hopt->quantum && cl->quantum > 200000) {
2064             warn = 1;
2065             cl->quantum = 200000;
2066         }
2067         if (hopt->quantum)
2068             cl->quantum = hopt->quantum;
2069         if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2070             cl->prio = TC_HTB_NUMPRIO - 1;
2071     }
2072 
2073     cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2074     cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2075 
2076     sch_tree_unlock(sch);
2077     qdisc_put(parent_qdisc);
2078 
2079     if (warn)
2080         pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2081                 cl->common.classid, (warn == -1 ? "small" : "big"));
2082 
2083     qdisc_class_hash_grow(sch, &q->clhash);
2084 
2085     *arg = (unsigned long)cl;
2086     return 0;
2087 
2088 err_kill_estimator:
2089     gen_kill_estimator(&cl->rate_est);
2090 err_block_put:
2091     tcf_block_put(cl->block);
2092     kfree(cl);
2093 failure:
2094     return err;
2095 }
2096 
2097 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2098                        struct netlink_ext_ack *extack)
2099 {
2100     struct htb_sched *q = qdisc_priv(sch);
2101     struct htb_class *cl = (struct htb_class *)arg;
2102 
2103     return cl ? cl->block : q->block;
2104 }
2105 
2106 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2107                      u32 classid)
2108 {
2109     struct htb_class *cl = htb_find(classid, sch);
2110 
2111     /*if (cl && !cl->level) return 0;
2112      * The line above used to be there to prevent attaching filters to
2113      * leaves. But at least tc_index filter uses this just to get class
2114      * for other reasons so that we have to allow for it.
2115      * ----
2116      * 19.6.2002 As Werner explained it is ok - bind filter is just
2117      * another way to "lock" the class - unlike "get" this lock can
2118      * be broken by class during destroy IIUC.
2119      */
2120     if (cl)
2121         cl->filter_cnt++;
2122     return (unsigned long)cl;
2123 }
2124 
2125 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2126 {
2127     struct htb_class *cl = (struct htb_class *)arg;
2128 
2129     if (cl)
2130         cl->filter_cnt--;
2131 }
2132 
2133 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2134 {
2135     struct htb_sched *q = qdisc_priv(sch);
2136     struct htb_class *cl;
2137     unsigned int i;
2138 
2139     if (arg->stop)
2140         return;
2141 
2142     for (i = 0; i < q->clhash.hashsize; i++) {
2143         hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2144             if (arg->count < arg->skip) {
2145                 arg->count++;
2146                 continue;
2147             }
2148             if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2149                 arg->stop = 1;
2150                 return;
2151             }
2152             arg->count++;
2153         }
2154     }
2155 }
2156 
2157 static const struct Qdisc_class_ops htb_class_ops = {
2158     .select_queue   =   htb_select_queue,
2159     .graft      =   htb_graft,
2160     .leaf       =   htb_leaf,
2161     .qlen_notify    =   htb_qlen_notify,
2162     .find       =   htb_search,
2163     .change     =   htb_change_class,
2164     .delete     =   htb_delete,
2165     .walk       =   htb_walk,
2166     .tcf_block  =   htb_tcf_block,
2167     .bind_tcf   =   htb_bind_filter,
2168     .unbind_tcf =   htb_unbind_filter,
2169     .dump       =   htb_dump_class,
2170     .dump_stats =   htb_dump_class_stats,
2171 };
2172 
2173 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2174     .cl_ops     =   &htb_class_ops,
2175     .id     =   "htb",
2176     .priv_size  =   sizeof(struct htb_sched),
2177     .enqueue    =   htb_enqueue,
2178     .dequeue    =   htb_dequeue,
2179     .peek       =   qdisc_peek_dequeued,
2180     .init       =   htb_init,
2181     .attach     =   htb_attach,
2182     .reset      =   htb_reset,
2183     .destroy    =   htb_destroy,
2184     .dump       =   htb_dump,
2185     .owner      =   THIS_MODULE,
2186 };
2187 
2188 static int __init htb_module_init(void)
2189 {
2190     return register_qdisc(&htb_qdisc_ops);
2191 }
2192 static void __exit htb_module_exit(void)
2193 {
2194     unregister_qdisc(&htb_qdisc_ops);
2195 }
2196 
2197 module_init(htb_module_init)
2198 module_exit(htb_module_exit)
2199 MODULE_LICENSE("GPL");