0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/module.h>
0025 #include <linux/moduleparam.h>
0026 #include <linux/types.h>
0027 #include <linux/kernel.h>
0028 #include <linux/string.h>
0029 #include <linux/errno.h>
0030 #include <linux/skbuff.h>
0031 #include <linux/list.h>
0032 #include <linux/compiler.h>
0033 #include <linux/rbtree.h>
0034 #include <linux/workqueue.h>
0035 #include <linux/slab.h>
0036 #include <net/netlink.h>
0037 #include <net/sch_generic.h>
0038 #include <net/pkt_sched.h>
0039 #include <net/pkt_cls.h>
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054 static int htb_hysteresis __read_mostly = 0;
0055 #define HTB_VER 0x30011
0056
0057 #if HTB_VER >> 16 != TC_HTB_PROTOVER
0058 #error "Mismatched sch_htb.c and pkt_sch.h"
0059 #endif
0060
0061
0062 module_param (htb_hysteresis, int, 0640);
0063 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
0064
0065 static int htb_rate_est = 0;
0066 module_param(htb_rate_est, int, 0640);
0067 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
0068
0069
0070 enum htb_cmode {
0071 HTB_CANT_SEND,
0072 HTB_MAY_BORROW,
0073 HTB_CAN_SEND
0074 };
0075
0076 struct htb_prio {
0077 union {
0078 struct rb_root row;
0079 struct rb_root feed;
0080 };
0081 struct rb_node *ptr;
0082
0083
0084
0085
0086
0087 u32 last_ptr_id;
0088 };
0089
0090
0091
0092
0093
0094 struct htb_class {
0095 struct Qdisc_class_common common;
0096 struct psched_ratecfg rate;
0097 struct psched_ratecfg ceil;
0098 s64 buffer, cbuffer;
0099 s64 mbuffer;
0100 u32 prio;
0101 int quantum;
0102
0103 struct tcf_proto __rcu *filter_list;
0104 struct tcf_block *block;
0105 int filter_cnt;
0106
0107 int level;
0108 unsigned int children;
0109 struct htb_class *parent;
0110
0111 struct net_rate_estimator __rcu *rate_est;
0112
0113
0114
0115
0116 struct gnet_stats_basic_sync bstats;
0117 struct gnet_stats_basic_sync bstats_bias;
0118 struct tc_htb_xstats xstats;
0119
0120
0121 s64 tokens, ctokens;
0122 s64 t_c;
0123
0124 union {
0125 struct htb_class_leaf {
0126 int deficit[TC_HTB_MAXDEPTH];
0127 struct Qdisc *q;
0128 struct netdev_queue *offload_queue;
0129 } leaf;
0130 struct htb_class_inner {
0131 struct htb_prio clprio[TC_HTB_NUMPRIO];
0132 } inner;
0133 };
0134 s64 pq_key;
0135
0136 int prio_activity;
0137 enum htb_cmode cmode;
0138 struct rb_node pq_node;
0139 struct rb_node node[TC_HTB_NUMPRIO];
0140
0141 unsigned int drops ____cacheline_aligned_in_smp;
0142 unsigned int overlimits;
0143 };
0144
0145 struct htb_level {
0146 struct rb_root wait_pq;
0147 struct htb_prio hprio[TC_HTB_NUMPRIO];
0148 };
0149
0150 struct htb_sched {
0151 struct Qdisc_class_hash clhash;
0152 int defcls;
0153 int rate2quantum;
0154
0155
0156 struct tcf_proto __rcu *filter_list;
0157 struct tcf_block *block;
0158
0159 #define HTB_WARN_TOOMANYEVENTS 0x1
0160 unsigned int warned;
0161 int direct_qlen;
0162 struct work_struct work;
0163
0164
0165 struct qdisc_skb_head direct_queue;
0166 u32 direct_pkts;
0167 u32 overlimits;
0168
0169 struct qdisc_watchdog watchdog;
0170
0171 s64 now;
0172
0173
0174 s64 near_ev_cache[TC_HTB_MAXDEPTH];
0175
0176 int row_mask[TC_HTB_MAXDEPTH];
0177
0178 struct htb_level hlevel[TC_HTB_MAXDEPTH];
0179
0180 struct Qdisc **direct_qdiscs;
0181 unsigned int num_direct_qdiscs;
0182
0183 bool offload;
0184 };
0185
0186
0187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
0188 {
0189 struct htb_sched *q = qdisc_priv(sch);
0190 struct Qdisc_class_common *clc;
0191
0192 clc = qdisc_class_find(&q->clhash, handle);
0193 if (clc == NULL)
0194 return NULL;
0195 return container_of(clc, struct htb_class, common);
0196 }
0197
0198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
0199 {
0200 return (unsigned long)htb_find(handle, sch);
0201 }
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214 #define HTB_DIRECT ((struct htb_class *)-1L)
0215
0216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
0217 int *qerr)
0218 {
0219 struct htb_sched *q = qdisc_priv(sch);
0220 struct htb_class *cl;
0221 struct tcf_result res;
0222 struct tcf_proto *tcf;
0223 int result;
0224
0225
0226
0227
0228
0229 if (skb->priority == sch->handle)
0230 return HTB_DIRECT;
0231 cl = htb_find(skb->priority, sch);
0232 if (cl) {
0233 if (cl->level == 0)
0234 return cl;
0235
0236 tcf = rcu_dereference_bh(cl->filter_list);
0237 } else {
0238 tcf = rcu_dereference_bh(q->filter_list);
0239 }
0240
0241 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
0242 while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
0243 #ifdef CONFIG_NET_CLS_ACT
0244 switch (result) {
0245 case TC_ACT_QUEUED:
0246 case TC_ACT_STOLEN:
0247 case TC_ACT_TRAP:
0248 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
0249 fallthrough;
0250 case TC_ACT_SHOT:
0251 return NULL;
0252 }
0253 #endif
0254 cl = (void *)res.class;
0255 if (!cl) {
0256 if (res.classid == sch->handle)
0257 return HTB_DIRECT;
0258 cl = htb_find(res.classid, sch);
0259 if (!cl)
0260 break;
0261 }
0262 if (!cl->level)
0263 return cl;
0264
0265
0266 tcf = rcu_dereference_bh(cl->filter_list);
0267 }
0268
0269 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
0270 if (!cl || cl->level)
0271 return HTB_DIRECT;
0272 return cl;
0273 }
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284 static void htb_add_to_id_tree(struct rb_root *root,
0285 struct htb_class *cl, int prio)
0286 {
0287 struct rb_node **p = &root->rb_node, *parent = NULL;
0288
0289 while (*p) {
0290 struct htb_class *c;
0291 parent = *p;
0292 c = rb_entry(parent, struct htb_class, node[prio]);
0293
0294 if (cl->common.classid > c->common.classid)
0295 p = &parent->rb_right;
0296 else
0297 p = &parent->rb_left;
0298 }
0299 rb_link_node(&cl->node[prio], parent, p);
0300 rb_insert_color(&cl->node[prio], root);
0301 }
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313 static void htb_add_to_wait_tree(struct htb_sched *q,
0314 struct htb_class *cl, s64 delay)
0315 {
0316 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
0317
0318 cl->pq_key = q->now + delay;
0319 if (cl->pq_key == q->now)
0320 cl->pq_key++;
0321
0322
0323 if (q->near_ev_cache[cl->level] > cl->pq_key)
0324 q->near_ev_cache[cl->level] = cl->pq_key;
0325
0326 while (*p) {
0327 struct htb_class *c;
0328 parent = *p;
0329 c = rb_entry(parent, struct htb_class, pq_node);
0330 if (cl->pq_key >= c->pq_key)
0331 p = &parent->rb_right;
0332 else
0333 p = &parent->rb_left;
0334 }
0335 rb_link_node(&cl->pq_node, parent, p);
0336 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
0337 }
0338
0339
0340
0341
0342
0343
0344
0345
0346 static inline void htb_next_rb_node(struct rb_node **n)
0347 {
0348 *n = rb_next(*n);
0349 }
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360 static inline void htb_add_class_to_row(struct htb_sched *q,
0361 struct htb_class *cl, int mask)
0362 {
0363 q->row_mask[cl->level] |= mask;
0364 while (mask) {
0365 int prio = ffz(~mask);
0366 mask &= ~(1 << prio);
0367 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
0368 }
0369 }
0370
0371
0372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
0373 {
0374 if (RB_EMPTY_NODE(rb)) {
0375 WARN_ON(1);
0376 } else {
0377 rb_erase(rb, root);
0378 RB_CLEAR_NODE(rb);
0379 }
0380 }
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392 static inline void htb_remove_class_from_row(struct htb_sched *q,
0393 struct htb_class *cl, int mask)
0394 {
0395 int m = 0;
0396 struct htb_level *hlevel = &q->hlevel[cl->level];
0397
0398 while (mask) {
0399 int prio = ffz(~mask);
0400 struct htb_prio *hprio = &hlevel->hprio[prio];
0401
0402 mask &= ~(1 << prio);
0403 if (hprio->ptr == cl->node + prio)
0404 htb_next_rb_node(&hprio->ptr);
0405
0406 htb_safe_rb_erase(cl->node + prio, &hprio->row);
0407 if (!hprio->row.rb_node)
0408 m |= 1 << prio;
0409 }
0410 q->row_mask[cl->level] &= ~m;
0411 }
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
0423 {
0424 struct htb_class *p = cl->parent;
0425 long m, mask = cl->prio_activity;
0426
0427 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
0428 m = mask;
0429 while (m) {
0430 int prio = ffz(~m);
0431 m &= ~(1 << prio);
0432
0433 if (p->inner.clprio[prio].feed.rb_node)
0434
0435
0436
0437 mask &= ~(1 << prio);
0438
0439 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
0440 }
0441 p->prio_activity |= mask;
0442 cl = p;
0443 p = cl->parent;
0444
0445 }
0446 if (cl->cmode == HTB_CAN_SEND && mask)
0447 htb_add_class_to_row(q, cl, mask);
0448 }
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
0460 {
0461 struct htb_class *p = cl->parent;
0462 long m, mask = cl->prio_activity;
0463
0464 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
0465 m = mask;
0466 mask = 0;
0467 while (m) {
0468 int prio = ffz(~m);
0469 m &= ~(1 << prio);
0470
0471 if (p->inner.clprio[prio].ptr == cl->node + prio) {
0472
0473
0474
0475
0476 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
0477 p->inner.clprio[prio].ptr = NULL;
0478 }
0479
0480 htb_safe_rb_erase(cl->node + prio,
0481 &p->inner.clprio[prio].feed);
0482
0483 if (!p->inner.clprio[prio].feed.rb_node)
0484 mask |= 1 << prio;
0485 }
0486
0487 p->prio_activity &= ~mask;
0488 cl = p;
0489 p = cl->parent;
0490
0491 }
0492 if (cl->cmode == HTB_CAN_SEND && mask)
0493 htb_remove_class_from_row(q, cl, mask);
0494 }
0495
0496 static inline s64 htb_lowater(const struct htb_class *cl)
0497 {
0498 if (htb_hysteresis)
0499 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
0500 else
0501 return 0;
0502 }
0503 static inline s64 htb_hiwater(const struct htb_class *cl)
0504 {
0505 if (htb_hysteresis)
0506 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
0507 else
0508 return 0;
0509 }
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525 static inline enum htb_cmode
0526 htb_class_mode(struct htb_class *cl, s64 *diff)
0527 {
0528 s64 toks;
0529
0530 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
0531 *diff = -toks;
0532 return HTB_CANT_SEND;
0533 }
0534
0535 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
0536 return HTB_CAN_SEND;
0537
0538 *diff = -toks;
0539 return HTB_MAY_BORROW;
0540 }
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554 static void
0555 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
0556 {
0557 enum htb_cmode new_mode = htb_class_mode(cl, diff);
0558
0559 if (new_mode == cl->cmode)
0560 return;
0561
0562 if (new_mode == HTB_CANT_SEND) {
0563 cl->overlimits++;
0564 q->overlimits++;
0565 }
0566
0567 if (cl->prio_activity) {
0568 if (cl->cmode != HTB_CANT_SEND)
0569 htb_deactivate_prios(q, cl);
0570 cl->cmode = new_mode;
0571 if (new_mode != HTB_CANT_SEND)
0572 htb_activate_prios(q, cl);
0573 } else
0574 cl->cmode = new_mode;
0575 }
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
0587 {
0588 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
0589
0590 if (!cl->prio_activity) {
0591 cl->prio_activity = 1 << cl->prio;
0592 htb_activate_prios(q, cl);
0593 }
0594 }
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
0605 {
0606 WARN_ON(!cl->prio_activity);
0607
0608 htb_deactivate_prios(q, cl);
0609 cl->prio_activity = 0;
0610 }
0611
0612 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0613 struct sk_buff **to_free)
0614 {
0615 int ret;
0616 unsigned int len = qdisc_pkt_len(skb);
0617 struct htb_sched *q = qdisc_priv(sch);
0618 struct htb_class *cl = htb_classify(skb, sch, &ret);
0619
0620 if (cl == HTB_DIRECT) {
0621
0622 if (q->direct_queue.qlen < q->direct_qlen) {
0623 __qdisc_enqueue_tail(skb, &q->direct_queue);
0624 q->direct_pkts++;
0625 } else {
0626 return qdisc_drop(skb, sch, to_free);
0627 }
0628 #ifdef CONFIG_NET_CLS_ACT
0629 } else if (!cl) {
0630 if (ret & __NET_XMIT_BYPASS)
0631 qdisc_qstats_drop(sch);
0632 __qdisc_drop(skb, to_free);
0633 return ret;
0634 #endif
0635 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
0636 to_free)) != NET_XMIT_SUCCESS) {
0637 if (net_xmit_drop_count(ret)) {
0638 qdisc_qstats_drop(sch);
0639 cl->drops++;
0640 }
0641 return ret;
0642 } else {
0643 htb_activate(q, cl);
0644 }
0645
0646 sch->qstats.backlog += len;
0647 sch->q.qlen++;
0648 return NET_XMIT_SUCCESS;
0649 }
0650
0651 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
0652 {
0653 s64 toks = diff + cl->tokens;
0654
0655 if (toks > cl->buffer)
0656 toks = cl->buffer;
0657 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
0658 if (toks <= -cl->mbuffer)
0659 toks = 1 - cl->mbuffer;
0660
0661 cl->tokens = toks;
0662 }
0663
0664 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
0665 {
0666 s64 toks = diff + cl->ctokens;
0667
0668 if (toks > cl->cbuffer)
0669 toks = cl->cbuffer;
0670 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
0671 if (toks <= -cl->mbuffer)
0672 toks = 1 - cl->mbuffer;
0673
0674 cl->ctokens = toks;
0675 }
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
0693 int level, struct sk_buff *skb)
0694 {
0695 int bytes = qdisc_pkt_len(skb);
0696 enum htb_cmode old_mode;
0697 s64 diff;
0698
0699 while (cl) {
0700 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
0701 if (cl->level >= level) {
0702 if (cl->level == level)
0703 cl->xstats.lends++;
0704 htb_accnt_tokens(cl, bytes, diff);
0705 } else {
0706 cl->xstats.borrows++;
0707 cl->tokens += diff;
0708 }
0709 htb_accnt_ctokens(cl, bytes, diff);
0710 cl->t_c = q->now;
0711
0712 old_mode = cl->cmode;
0713 diff = 0;
0714 htb_change_class_mode(q, cl, &diff);
0715 if (old_mode != cl->cmode) {
0716 if (old_mode != HTB_CAN_SEND)
0717 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
0718 if (cl->cmode != HTB_CAN_SEND)
0719 htb_add_to_wait_tree(q, cl, diff);
0720 }
0721
0722
0723 if (cl->level)
0724 bstats_update(&cl->bstats, skb);
0725
0726 cl = cl->parent;
0727 }
0728 }
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740 static s64 htb_do_events(struct htb_sched *q, const int level,
0741 unsigned long start)
0742 {
0743
0744
0745
0746
0747 unsigned long stop_at = start + 2;
0748 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
0749
0750 while (time_before(jiffies, stop_at)) {
0751 struct htb_class *cl;
0752 s64 diff;
0753 struct rb_node *p = rb_first(wait_pq);
0754
0755 if (!p)
0756 return 0;
0757
0758 cl = rb_entry(p, struct htb_class, pq_node);
0759 if (cl->pq_key > q->now)
0760 return cl->pq_key;
0761
0762 htb_safe_rb_erase(p, wait_pq);
0763 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
0764 htb_change_class_mode(q, cl, &diff);
0765 if (cl->cmode != HTB_CAN_SEND)
0766 htb_add_to_wait_tree(q, cl, diff);
0767 }
0768
0769
0770 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
0771 pr_warn("htb: too many events!\n");
0772 q->warned |= HTB_WARN_TOOMANYEVENTS;
0773 }
0774
0775 return q->now;
0776 }
0777
0778
0779
0780
0781 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
0782 u32 id)
0783 {
0784 struct rb_node *r = NULL;
0785 while (n) {
0786 struct htb_class *cl =
0787 rb_entry(n, struct htb_class, node[prio]);
0788
0789 if (id > cl->common.classid) {
0790 n = n->rb_right;
0791 } else if (id < cl->common.classid) {
0792 r = n;
0793 n = n->rb_left;
0794 } else {
0795 return n;
0796 }
0797 }
0798 return r;
0799 }
0800
0801
0802
0803
0804
0805
0806
0807
0808 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
0809 {
0810 int i;
0811 struct {
0812 struct rb_node *root;
0813 struct rb_node **pptr;
0814 u32 *pid;
0815 } stk[TC_HTB_MAXDEPTH], *sp = stk;
0816
0817 BUG_ON(!hprio->row.rb_node);
0818 sp->root = hprio->row.rb_node;
0819 sp->pptr = &hprio->ptr;
0820 sp->pid = &hprio->last_ptr_id;
0821
0822 for (i = 0; i < 65535; i++) {
0823 if (!*sp->pptr && *sp->pid) {
0824
0825
0826
0827 *sp->pptr =
0828 htb_id_find_next_upper(prio, sp->root, *sp->pid);
0829 }
0830 *sp->pid = 0;
0831
0832
0833 if (!*sp->pptr) {
0834 *sp->pptr = sp->root;
0835 while ((*sp->pptr)->rb_left)
0836 *sp->pptr = (*sp->pptr)->rb_left;
0837 if (sp > stk) {
0838 sp--;
0839 if (!*sp->pptr) {
0840 WARN_ON(1);
0841 return NULL;
0842 }
0843 htb_next_rb_node(sp->pptr);
0844 }
0845 } else {
0846 struct htb_class *cl;
0847 struct htb_prio *clp;
0848
0849 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
0850 if (!cl->level)
0851 return cl;
0852 clp = &cl->inner.clprio[prio];
0853 (++sp)->root = clp->feed.rb_node;
0854 sp->pptr = &clp->ptr;
0855 sp->pid = &clp->last_ptr_id;
0856 }
0857 }
0858 WARN_ON(1);
0859 return NULL;
0860 }
0861
0862
0863
0864
0865 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
0866 const int level)
0867 {
0868 struct sk_buff *skb = NULL;
0869 struct htb_class *cl, *start;
0870 struct htb_level *hlevel = &q->hlevel[level];
0871 struct htb_prio *hprio = &hlevel->hprio[prio];
0872
0873
0874 start = cl = htb_lookup_leaf(hprio, prio);
0875
0876 do {
0877 next:
0878 if (unlikely(!cl))
0879 return NULL;
0880
0881
0882
0883
0884
0885
0886 if (unlikely(cl->leaf.q->q.qlen == 0)) {
0887 struct htb_class *next;
0888 htb_deactivate(q, cl);
0889
0890
0891 if ((q->row_mask[level] & (1 << prio)) == 0)
0892 return NULL;
0893
0894 next = htb_lookup_leaf(hprio, prio);
0895
0896 if (cl == start)
0897 start = next;
0898 cl = next;
0899 goto next;
0900 }
0901
0902 skb = cl->leaf.q->dequeue(cl->leaf.q);
0903 if (likely(skb != NULL))
0904 break;
0905
0906 qdisc_warn_nonwc("htb", cl->leaf.q);
0907 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
0908 &q->hlevel[0].hprio[prio].ptr);
0909 cl = htb_lookup_leaf(hprio, prio);
0910
0911 } while (cl != start);
0912
0913 if (likely(skb != NULL)) {
0914 bstats_update(&cl->bstats, skb);
0915 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
0916 if (cl->leaf.deficit[level] < 0) {
0917 cl->leaf.deficit[level] += cl->quantum;
0918 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
0919 &q->hlevel[0].hprio[prio].ptr);
0920 }
0921
0922
0923
0924 if (!cl->leaf.q->q.qlen)
0925 htb_deactivate(q, cl);
0926 htb_charge_class(q, cl, level, skb);
0927 }
0928 return skb;
0929 }
0930
0931 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
0932 {
0933 struct sk_buff *skb;
0934 struct htb_sched *q = qdisc_priv(sch);
0935 int level;
0936 s64 next_event;
0937 unsigned long start_at;
0938
0939
0940 skb = __qdisc_dequeue_head(&q->direct_queue);
0941 if (skb != NULL) {
0942 ok:
0943 qdisc_bstats_update(sch, skb);
0944 qdisc_qstats_backlog_dec(sch, skb);
0945 sch->q.qlen--;
0946 return skb;
0947 }
0948
0949 if (!sch->q.qlen)
0950 goto fin;
0951 q->now = ktime_get_ns();
0952 start_at = jiffies;
0953
0954 next_event = q->now + 5LLU * NSEC_PER_SEC;
0955
0956 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
0957
0958 int m;
0959 s64 event = q->near_ev_cache[level];
0960
0961 if (q->now >= event) {
0962 event = htb_do_events(q, level, start_at);
0963 if (!event)
0964 event = q->now + NSEC_PER_SEC;
0965 q->near_ev_cache[level] = event;
0966 }
0967
0968 if (next_event > event)
0969 next_event = event;
0970
0971 m = ~q->row_mask[level];
0972 while (m != (int)(-1)) {
0973 int prio = ffz(m);
0974
0975 m |= 1 << prio;
0976 skb = htb_dequeue_tree(q, prio, level);
0977 if (likely(skb != NULL))
0978 goto ok;
0979 }
0980 }
0981 if (likely(next_event > q->now))
0982 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
0983 else
0984 schedule_work(&q->work);
0985 fin:
0986 return skb;
0987 }
0988
0989
0990
0991 static void htb_reset(struct Qdisc *sch)
0992 {
0993 struct htb_sched *q = qdisc_priv(sch);
0994 struct htb_class *cl;
0995 unsigned int i;
0996
0997 for (i = 0; i < q->clhash.hashsize; i++) {
0998 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
0999 if (cl->level)
1000 memset(&cl->inner, 0, sizeof(cl->inner));
1001 else {
1002 if (cl->leaf.q && !q->offload)
1003 qdisc_reset(cl->leaf.q);
1004 }
1005 cl->prio_activity = 0;
1006 cl->cmode = HTB_CAN_SEND;
1007 }
1008 }
1009 qdisc_watchdog_cancel(&q->watchdog);
1010 __qdisc_reset_queue(&q->direct_queue);
1011 sch->q.qlen = 0;
1012 sch->qstats.backlog = 0;
1013 memset(q->hlevel, 0, sizeof(q->hlevel));
1014 memset(q->row_mask, 0, sizeof(q->row_mask));
1015 }
1016
1017 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1018 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1019 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1020 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1023 [TCA_HTB_RATE64] = { .type = NLA_U64 },
1024 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1025 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1026 };
1027
1028 static void htb_work_func(struct work_struct *work)
1029 {
1030 struct htb_sched *q = container_of(work, struct htb_sched, work);
1031 struct Qdisc *sch = q->watchdog.qdisc;
1032
1033 rcu_read_lock();
1034 __netif_schedule(qdisc_root(sch));
1035 rcu_read_unlock();
1036 }
1037
1038 static void htb_set_lockdep_class_child(struct Qdisc *q)
1039 {
1040 static struct lock_class_key child_key;
1041
1042 lockdep_set_class(qdisc_lock(q), &child_key);
1043 }
1044
1045 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1046 {
1047 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1048 }
1049
1050 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1051 struct netlink_ext_ack *extack)
1052 {
1053 struct net_device *dev = qdisc_dev(sch);
1054 struct tc_htb_qopt_offload offload_opt;
1055 struct htb_sched *q = qdisc_priv(sch);
1056 struct nlattr *tb[TCA_HTB_MAX + 1];
1057 struct tc_htb_glob *gopt;
1058 unsigned int ntx;
1059 bool offload;
1060 int err;
1061
1062 qdisc_watchdog_init(&q->watchdog, sch);
1063 INIT_WORK(&q->work, htb_work_func);
1064
1065 if (!opt)
1066 return -EINVAL;
1067
1068 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1069 if (err)
1070 return err;
1071
1072 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1073 NULL);
1074 if (err < 0)
1075 return err;
1076
1077 if (!tb[TCA_HTB_INIT])
1078 return -EINVAL;
1079
1080 gopt = nla_data(tb[TCA_HTB_INIT]);
1081 if (gopt->version != HTB_VER >> 16)
1082 return -EINVAL;
1083
1084 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1085
1086 if (offload) {
1087 if (sch->parent != TC_H_ROOT) {
1088 NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1089 return -EOPNOTSUPP;
1090 }
1091
1092 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1093 NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1094 return -EOPNOTSUPP;
1095 }
1096
1097 q->num_direct_qdiscs = dev->real_num_tx_queues;
1098 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1099 sizeof(*q->direct_qdiscs),
1100 GFP_KERNEL);
1101 if (!q->direct_qdiscs)
1102 return -ENOMEM;
1103 }
1104
1105 err = qdisc_class_hash_init(&q->clhash);
1106 if (err < 0)
1107 goto err_free_direct_qdiscs;
1108
1109 qdisc_skb_head_init(&q->direct_queue);
1110
1111 if (tb[TCA_HTB_DIRECT_QLEN])
1112 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1113 else
1114 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1115
1116 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1117 q->rate2quantum = 1;
1118 q->defcls = gopt->defcls;
1119
1120 if (!offload)
1121 return 0;
1122
1123 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1124 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1125 struct Qdisc *qdisc;
1126
1127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1128 TC_H_MAKE(sch->handle, 0), extack);
1129 if (!qdisc) {
1130 err = -ENOMEM;
1131 goto err_free_qdiscs;
1132 }
1133
1134 htb_set_lockdep_class_child(qdisc);
1135 q->direct_qdiscs[ntx] = qdisc;
1136 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1137 }
1138
1139 sch->flags |= TCQ_F_MQROOT;
1140
1141 offload_opt = (struct tc_htb_qopt_offload) {
1142 .command = TC_HTB_CREATE,
1143 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1144 .classid = TC_H_MIN(q->defcls),
1145 .extack = extack,
1146 };
1147 err = htb_offload(dev, &offload_opt);
1148 if (err)
1149 goto err_free_qdiscs;
1150
1151
1152
1153
1154 q->offload = true;
1155
1156 return 0;
1157
1158 err_free_qdiscs:
1159 for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1160 ntx++)
1161 qdisc_put(q->direct_qdiscs[ntx]);
1162
1163 qdisc_class_hash_destroy(&q->clhash);
1164
1165
1166 q->clhash.hash = NULL;
1167 q->clhash.hashsize = 0;
1168
1169 err_free_direct_qdiscs:
1170 kfree(q->direct_qdiscs);
1171 q->direct_qdiscs = NULL;
1172 return err;
1173 }
1174
1175 static void htb_attach_offload(struct Qdisc *sch)
1176 {
1177 struct net_device *dev = qdisc_dev(sch);
1178 struct htb_sched *q = qdisc_priv(sch);
1179 unsigned int ntx;
1180
1181 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1182 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1183
1184 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1185 qdisc_put(old);
1186 qdisc_hash_add(qdisc, false);
1187 }
1188 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1189 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1190 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1191
1192 qdisc_put(old);
1193 }
1194
1195 kfree(q->direct_qdiscs);
1196 q->direct_qdiscs = NULL;
1197 }
1198
1199 static void htb_attach_software(struct Qdisc *sch)
1200 {
1201 struct net_device *dev = qdisc_dev(sch);
1202 unsigned int ntx;
1203
1204
1205 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1206 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1207 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1208
1209 qdisc_refcount_inc(sch);
1210
1211 qdisc_put(old);
1212 }
1213 }
1214
1215 static void htb_attach(struct Qdisc *sch)
1216 {
1217 struct htb_sched *q = qdisc_priv(sch);
1218
1219 if (q->offload)
1220 htb_attach_offload(sch);
1221 else
1222 htb_attach_software(sch);
1223 }
1224
1225 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1226 {
1227 struct htb_sched *q = qdisc_priv(sch);
1228 struct nlattr *nest;
1229 struct tc_htb_glob gopt;
1230
1231 if (q->offload)
1232 sch->flags |= TCQ_F_OFFLOADED;
1233 else
1234 sch->flags &= ~TCQ_F_OFFLOADED;
1235
1236 sch->qstats.overlimits = q->overlimits;
1237
1238
1239
1240
1241 gopt.direct_pkts = q->direct_pkts;
1242 gopt.version = HTB_VER;
1243 gopt.rate2quantum = q->rate2quantum;
1244 gopt.defcls = q->defcls;
1245 gopt.debug = 0;
1246
1247 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1248 if (nest == NULL)
1249 goto nla_put_failure;
1250 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1251 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1252 goto nla_put_failure;
1253 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1254 goto nla_put_failure;
1255
1256 return nla_nest_end(skb, nest);
1257
1258 nla_put_failure:
1259 nla_nest_cancel(skb, nest);
1260 return -1;
1261 }
1262
1263 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1264 struct sk_buff *skb, struct tcmsg *tcm)
1265 {
1266 struct htb_class *cl = (struct htb_class *)arg;
1267 struct htb_sched *q = qdisc_priv(sch);
1268 struct nlattr *nest;
1269 struct tc_htb_opt opt;
1270
1271
1272
1273
1274 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1275 tcm->tcm_handle = cl->common.classid;
1276 if (!cl->level && cl->leaf.q)
1277 tcm->tcm_info = cl->leaf.q->handle;
1278
1279 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1280 if (nest == NULL)
1281 goto nla_put_failure;
1282
1283 memset(&opt, 0, sizeof(opt));
1284
1285 psched_ratecfg_getrate(&opt.rate, &cl->rate);
1286 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1287 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1288 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1289 opt.quantum = cl->quantum;
1290 opt.prio = cl->prio;
1291 opt.level = cl->level;
1292 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1293 goto nla_put_failure;
1294 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1295 goto nla_put_failure;
1296 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1297 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1298 TCA_HTB_PAD))
1299 goto nla_put_failure;
1300 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1301 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1302 TCA_HTB_PAD))
1303 goto nla_put_failure;
1304
1305 return nla_nest_end(skb, nest);
1306
1307 nla_put_failure:
1308 nla_nest_cancel(skb, nest);
1309 return -1;
1310 }
1311
1312 static void htb_offload_aggregate_stats(struct htb_sched *q,
1313 struct htb_class *cl)
1314 {
1315 u64 bytes = 0, packets = 0;
1316 struct htb_class *c;
1317 unsigned int i;
1318
1319 gnet_stats_basic_sync_init(&cl->bstats);
1320
1321 for (i = 0; i < q->clhash.hashsize; i++) {
1322 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1323 struct htb_class *p = c;
1324
1325 while (p && p->level < cl->level)
1326 p = p->parent;
1327
1328 if (p != cl)
1329 continue;
1330
1331 bytes += u64_stats_read(&c->bstats_bias.bytes);
1332 packets += u64_stats_read(&c->bstats_bias.packets);
1333 if (c->level == 0) {
1334 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1335 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1336 }
1337 }
1338 }
1339 _bstats_update(&cl->bstats, bytes, packets);
1340 }
1341
1342 static int
1343 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1344 {
1345 struct htb_class *cl = (struct htb_class *)arg;
1346 struct htb_sched *q = qdisc_priv(sch);
1347 struct gnet_stats_queue qs = {
1348 .drops = cl->drops,
1349 .overlimits = cl->overlimits,
1350 };
1351 __u32 qlen = 0;
1352
1353 if (!cl->level && cl->leaf.q)
1354 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1355
1356 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1357 INT_MIN, INT_MAX);
1358 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1359 INT_MIN, INT_MAX);
1360
1361 if (q->offload) {
1362 if (!cl->level) {
1363 if (cl->leaf.q)
1364 cl->bstats = cl->leaf.q->bstats;
1365 else
1366 gnet_stats_basic_sync_init(&cl->bstats);
1367 _bstats_update(&cl->bstats,
1368 u64_stats_read(&cl->bstats_bias.bytes),
1369 u64_stats_read(&cl->bstats_bias.packets));
1370 } else {
1371 htb_offload_aggregate_stats(q, cl);
1372 }
1373 }
1374
1375 if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1376 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1377 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1378 return -1;
1379
1380 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1381 }
1382
1383 static struct netdev_queue *
1384 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1385 {
1386 struct net_device *dev = qdisc_dev(sch);
1387 struct tc_htb_qopt_offload offload_opt;
1388 struct htb_sched *q = qdisc_priv(sch);
1389 int err;
1390
1391 if (!q->offload)
1392 return sch->dev_queue;
1393
1394 offload_opt = (struct tc_htb_qopt_offload) {
1395 .command = TC_HTB_LEAF_QUERY_QUEUE,
1396 .classid = TC_H_MIN(tcm->tcm_parent),
1397 };
1398 err = htb_offload(dev, &offload_opt);
1399 if (err || offload_opt.qid >= dev->num_tx_queues)
1400 return NULL;
1401 return netdev_get_tx_queue(dev, offload_opt.qid);
1402 }
1403
1404 static struct Qdisc *
1405 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1406 {
1407 struct net_device *dev = dev_queue->dev;
1408 struct Qdisc *old_q;
1409
1410 if (dev->flags & IFF_UP)
1411 dev_deactivate(dev);
1412 old_q = dev_graft_qdisc(dev_queue, new_q);
1413 if (new_q)
1414 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1415 if (dev->flags & IFF_UP)
1416 dev_activate(dev);
1417
1418 return old_q;
1419 }
1420
1421 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1422 {
1423 struct netdev_queue *queue;
1424
1425 queue = cl->leaf.offload_queue;
1426 if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1427 WARN_ON(cl->leaf.q->dev_queue != queue);
1428
1429 return queue;
1430 }
1431
1432 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1433 struct htb_class *cl_new, bool destroying)
1434 {
1435 struct netdev_queue *queue_old, *queue_new;
1436 struct net_device *dev = qdisc_dev(sch);
1437
1438 queue_old = htb_offload_get_queue(cl_old);
1439 queue_new = htb_offload_get_queue(cl_new);
1440
1441 if (!destroying) {
1442 struct Qdisc *qdisc;
1443
1444 if (dev->flags & IFF_UP)
1445 dev_deactivate(dev);
1446 qdisc = dev_graft_qdisc(queue_old, NULL);
1447 WARN_ON(qdisc != cl_old->leaf.q);
1448 }
1449
1450 if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1451 cl_old->leaf.q->dev_queue = queue_new;
1452 cl_old->leaf.offload_queue = queue_new;
1453
1454 if (!destroying) {
1455 struct Qdisc *qdisc;
1456
1457 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1458 if (dev->flags & IFF_UP)
1459 dev_activate(dev);
1460 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1461 }
1462 }
1463
1464 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1465 struct Qdisc **old, struct netlink_ext_ack *extack)
1466 {
1467 struct netdev_queue *dev_queue = sch->dev_queue;
1468 struct htb_class *cl = (struct htb_class *)arg;
1469 struct htb_sched *q = qdisc_priv(sch);
1470 struct Qdisc *old_q;
1471
1472 if (cl->level)
1473 return -EINVAL;
1474
1475 if (q->offload)
1476 dev_queue = htb_offload_get_queue(cl);
1477
1478 if (!new) {
1479 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1480 cl->common.classid, extack);
1481 if (!new)
1482 return -ENOBUFS;
1483 }
1484
1485 if (q->offload) {
1486 htb_set_lockdep_class_child(new);
1487
1488 qdisc_refcount_inc(new);
1489 old_q = htb_graft_helper(dev_queue, new);
1490 }
1491
1492 *old = qdisc_replace(sch, new, &cl->leaf.q);
1493
1494 if (q->offload) {
1495 WARN_ON(old_q != *old);
1496 qdisc_put(old_q);
1497 }
1498
1499 return 0;
1500 }
1501
1502 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1503 {
1504 struct htb_class *cl = (struct htb_class *)arg;
1505 return !cl->level ? cl->leaf.q : NULL;
1506 }
1507
1508 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1509 {
1510 struct htb_class *cl = (struct htb_class *)arg;
1511
1512 htb_deactivate(qdisc_priv(sch), cl);
1513 }
1514
1515 static inline int htb_parent_last_child(struct htb_class *cl)
1516 {
1517 if (!cl->parent)
1518
1519 return 0;
1520 if (cl->parent->children > 1)
1521
1522 return 0;
1523 return 1;
1524 }
1525
1526 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1527 struct Qdisc *new_q)
1528 {
1529 struct htb_sched *q = qdisc_priv(sch);
1530 struct htb_class *parent = cl->parent;
1531
1532 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1533
1534 if (parent->cmode != HTB_CAN_SEND)
1535 htb_safe_rb_erase(&parent->pq_node,
1536 &q->hlevel[parent->level].wait_pq);
1537
1538 parent->level = 0;
1539 memset(&parent->inner, 0, sizeof(parent->inner));
1540 parent->leaf.q = new_q ? new_q : &noop_qdisc;
1541 parent->tokens = parent->buffer;
1542 parent->ctokens = parent->cbuffer;
1543 parent->t_c = ktime_get_ns();
1544 parent->cmode = HTB_CAN_SEND;
1545 if (q->offload)
1546 parent->leaf.offload_queue = cl->leaf.offload_queue;
1547 }
1548
1549 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1550 struct netdev_queue *dev_queue,
1551 struct Qdisc *new_q)
1552 {
1553 struct Qdisc *old_q;
1554
1555
1556 if (new_q)
1557 qdisc_refcount_inc(new_q);
1558 old_q = htb_graft_helper(dev_queue, new_q);
1559 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1560 }
1561
1562 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1563 bool last_child, bool destroying,
1564 struct netlink_ext_ack *extack)
1565 {
1566 struct tc_htb_qopt_offload offload_opt;
1567 struct netdev_queue *dev_queue;
1568 struct Qdisc *q = cl->leaf.q;
1569 struct Qdisc *old = NULL;
1570 int err;
1571
1572 if (cl->level)
1573 return -EINVAL;
1574
1575 WARN_ON(!q);
1576 dev_queue = htb_offload_get_queue(cl);
1577 old = htb_graft_helper(dev_queue, NULL);
1578 if (destroying)
1579
1580
1581
1582 WARN_ON(!(old->flags & TCQ_F_BUILTIN));
1583 else
1584 WARN_ON(old != q);
1585
1586 if (cl->parent) {
1587 _bstats_update(&cl->parent->bstats_bias,
1588 u64_stats_read(&q->bstats.bytes),
1589 u64_stats_read(&q->bstats.packets));
1590 }
1591
1592 offload_opt = (struct tc_htb_qopt_offload) {
1593 .command = !last_child ? TC_HTB_LEAF_DEL :
1594 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1595 TC_HTB_LEAF_DEL_LAST,
1596 .classid = cl->common.classid,
1597 .extack = extack,
1598 };
1599 err = htb_offload(qdisc_dev(sch), &offload_opt);
1600
1601 if (!err || destroying)
1602 qdisc_put(old);
1603 else
1604 htb_graft_helper(dev_queue, old);
1605
1606 if (last_child)
1607 return err;
1608
1609 if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1610 u32 classid = TC_H_MAJ(sch->handle) |
1611 TC_H_MIN(offload_opt.classid);
1612 struct htb_class *moved_cl = htb_find(classid, sch);
1613
1614 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1615 }
1616
1617 return err;
1618 }
1619
1620 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1621 {
1622 if (!cl->level) {
1623 WARN_ON(!cl->leaf.q);
1624 qdisc_put(cl->leaf.q);
1625 }
1626 gen_kill_estimator(&cl->rate_est);
1627 tcf_block_put(cl->block);
1628 kfree(cl);
1629 }
1630
1631 static void htb_destroy(struct Qdisc *sch)
1632 {
1633 struct net_device *dev = qdisc_dev(sch);
1634 struct tc_htb_qopt_offload offload_opt;
1635 struct htb_sched *q = qdisc_priv(sch);
1636 struct hlist_node *next;
1637 bool nonempty, changed;
1638 struct htb_class *cl;
1639 unsigned int i;
1640
1641 cancel_work_sync(&q->work);
1642 qdisc_watchdog_cancel(&q->watchdog);
1643
1644
1645
1646
1647
1648 tcf_block_put(q->block);
1649
1650 for (i = 0; i < q->clhash.hashsize; i++) {
1651 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1652 tcf_block_put(cl->block);
1653 cl->block = NULL;
1654 }
1655 }
1656
1657 do {
1658 nonempty = false;
1659 changed = false;
1660 for (i = 0; i < q->clhash.hashsize; i++) {
1661 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1662 common.hnode) {
1663 bool last_child;
1664
1665 if (!q->offload) {
1666 htb_destroy_class(sch, cl);
1667 continue;
1668 }
1669
1670 nonempty = true;
1671
1672 if (cl->level)
1673 continue;
1674
1675 changed = true;
1676
1677 last_child = htb_parent_last_child(cl);
1678 htb_destroy_class_offload(sch, cl, last_child,
1679 true, NULL);
1680 qdisc_class_hash_remove(&q->clhash,
1681 &cl->common);
1682 if (cl->parent)
1683 cl->parent->children--;
1684 if (last_child)
1685 htb_parent_to_leaf(sch, cl, NULL);
1686 htb_destroy_class(sch, cl);
1687 }
1688 }
1689 } while (changed);
1690 WARN_ON(nonempty);
1691
1692 qdisc_class_hash_destroy(&q->clhash);
1693 __qdisc_reset_queue(&q->direct_queue);
1694
1695 if (!q->offload)
1696 return;
1697
1698 offload_opt = (struct tc_htb_qopt_offload) {
1699 .command = TC_HTB_DESTROY,
1700 };
1701 htb_offload(dev, &offload_opt);
1702
1703 if (!q->direct_qdiscs)
1704 return;
1705 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1706 qdisc_put(q->direct_qdiscs[i]);
1707 kfree(q->direct_qdiscs);
1708 }
1709
1710 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1711 struct netlink_ext_ack *extack)
1712 {
1713 struct htb_sched *q = qdisc_priv(sch);
1714 struct htb_class *cl = (struct htb_class *)arg;
1715 struct Qdisc *new_q = NULL;
1716 int last_child = 0;
1717 int err;
1718
1719
1720
1721
1722
1723 if (cl->children || cl->filter_cnt)
1724 return -EBUSY;
1725
1726 if (!cl->level && htb_parent_last_child(cl))
1727 last_child = 1;
1728
1729 if (q->offload) {
1730 err = htb_destroy_class_offload(sch, cl, last_child, false,
1731 extack);
1732 if (err)
1733 return err;
1734 }
1735
1736 if (last_child) {
1737 struct netdev_queue *dev_queue = sch->dev_queue;
1738
1739 if (q->offload)
1740 dev_queue = htb_offload_get_queue(cl);
1741
1742 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1743 cl->parent->common.classid,
1744 NULL);
1745 if (q->offload) {
1746 if (new_q)
1747 htb_set_lockdep_class_child(new_q);
1748 htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1749 }
1750 }
1751
1752 sch_tree_lock(sch);
1753
1754 if (!cl->level)
1755 qdisc_purge_queue(cl->leaf.q);
1756
1757
1758 qdisc_class_hash_remove(&q->clhash, &cl->common);
1759 if (cl->parent)
1760 cl->parent->children--;
1761
1762 if (cl->prio_activity)
1763 htb_deactivate(q, cl);
1764
1765 if (cl->cmode != HTB_CAN_SEND)
1766 htb_safe_rb_erase(&cl->pq_node,
1767 &q->hlevel[cl->level].wait_pq);
1768
1769 if (last_child)
1770 htb_parent_to_leaf(sch, cl, new_q);
1771
1772 sch_tree_unlock(sch);
1773
1774 htb_destroy_class(sch, cl);
1775 return 0;
1776 }
1777
1778 static int htb_change_class(struct Qdisc *sch, u32 classid,
1779 u32 parentid, struct nlattr **tca,
1780 unsigned long *arg, struct netlink_ext_ack *extack)
1781 {
1782 int err = -EINVAL;
1783 struct htb_sched *q = qdisc_priv(sch);
1784 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1785 struct tc_htb_qopt_offload offload_opt;
1786 struct nlattr *opt = tca[TCA_OPTIONS];
1787 struct nlattr *tb[TCA_HTB_MAX + 1];
1788 struct Qdisc *parent_qdisc = NULL;
1789 struct netdev_queue *dev_queue;
1790 struct tc_htb_opt *hopt;
1791 u64 rate64, ceil64;
1792 int warn = 0;
1793
1794
1795 if (!opt)
1796 goto failure;
1797
1798 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1799 NULL);
1800 if (err < 0)
1801 goto failure;
1802
1803 err = -EINVAL;
1804 if (tb[TCA_HTB_PARMS] == NULL)
1805 goto failure;
1806
1807 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1808
1809 hopt = nla_data(tb[TCA_HTB_PARMS]);
1810 if (!hopt->rate.rate || !hopt->ceil.rate)
1811 goto failure;
1812
1813 if (q->offload) {
1814
1815 if (hopt->rate.overhead || hopt->ceil.overhead) {
1816 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1817 goto failure;
1818 }
1819 if (hopt->rate.mpu || hopt->ceil.mpu) {
1820 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1821 goto failure;
1822 }
1823 if (hopt->quantum) {
1824 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1825 goto failure;
1826 }
1827 if (hopt->prio) {
1828 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1829 goto failure;
1830 }
1831 }
1832
1833
1834 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1835 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1836 NULL));
1837
1838 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1839 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1840 NULL));
1841
1842 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1843 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1844
1845 if (!cl) {
1846 struct net_device *dev = qdisc_dev(sch);
1847 struct Qdisc *new_q, *old_q;
1848 int prio;
1849 struct {
1850 struct nlattr nla;
1851 struct gnet_estimator opt;
1852 } est = {
1853 .nla = {
1854 .nla_len = nla_attr_size(sizeof(est.opt)),
1855 .nla_type = TCA_RATE,
1856 },
1857 .opt = {
1858
1859 .interval = 2,
1860 .ewma_log = 2,
1861 },
1862 };
1863
1864
1865 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1866 htb_find(classid, sch))
1867 goto failure;
1868
1869
1870 if (parent && parent->parent && parent->parent->level < 2) {
1871 pr_err("htb: tree is too deep\n");
1872 goto failure;
1873 }
1874 err = -ENOBUFS;
1875 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1876 if (!cl)
1877 goto failure;
1878
1879 gnet_stats_basic_sync_init(&cl->bstats);
1880 gnet_stats_basic_sync_init(&cl->bstats_bias);
1881
1882 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1883 if (err) {
1884 kfree(cl);
1885 goto failure;
1886 }
1887 if (htb_rate_est || tca[TCA_RATE]) {
1888 err = gen_new_estimator(&cl->bstats, NULL,
1889 &cl->rate_est,
1890 NULL,
1891 true,
1892 tca[TCA_RATE] ? : &est.nla);
1893 if (err)
1894 goto err_block_put;
1895 }
1896
1897 cl->children = 0;
1898 RB_CLEAR_NODE(&cl->pq_node);
1899
1900 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1901 RB_CLEAR_NODE(&cl->node[prio]);
1902
1903 cl->common.classid = classid;
1904
1905
1906
1907
1908 ASSERT_RTNL();
1909
1910
1911
1912
1913
1914 if (!q->offload) {
1915 dev_queue = sch->dev_queue;
1916 } else if (!(parent && !parent->level)) {
1917
1918 offload_opt = (struct tc_htb_qopt_offload) {
1919 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1920 .classid = cl->common.classid,
1921 .parent_classid = parent ?
1922 TC_H_MIN(parent->common.classid) :
1923 TC_HTB_CLASSID_ROOT,
1924 .rate = max_t(u64, hopt->rate.rate, rate64),
1925 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1926 .extack = extack,
1927 };
1928 err = htb_offload(dev, &offload_opt);
1929 if (err) {
1930 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1931 err);
1932 goto err_kill_estimator;
1933 }
1934 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1935 } else {
1936 dev_queue = htb_offload_get_queue(parent);
1937 old_q = htb_graft_helper(dev_queue, NULL);
1938 WARN_ON(old_q != parent->leaf.q);
1939 offload_opt = (struct tc_htb_qopt_offload) {
1940 .command = TC_HTB_LEAF_TO_INNER,
1941 .classid = cl->common.classid,
1942 .parent_classid =
1943 TC_H_MIN(parent->common.classid),
1944 .rate = max_t(u64, hopt->rate.rate, rate64),
1945 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1946 .extack = extack,
1947 };
1948 err = htb_offload(dev, &offload_opt);
1949 if (err) {
1950 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1951 err);
1952 htb_graft_helper(dev_queue, old_q);
1953 goto err_kill_estimator;
1954 }
1955 _bstats_update(&parent->bstats_bias,
1956 u64_stats_read(&old_q->bstats.bytes),
1957 u64_stats_read(&old_q->bstats.packets));
1958 qdisc_put(old_q);
1959 }
1960 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1961 classid, NULL);
1962 if (q->offload) {
1963 if (new_q) {
1964 htb_set_lockdep_class_child(new_q);
1965
1966
1967
1968 qdisc_refcount_inc(new_q);
1969 }
1970 old_q = htb_graft_helper(dev_queue, new_q);
1971
1972 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1973 }
1974 sch_tree_lock(sch);
1975 if (parent && !parent->level) {
1976
1977 qdisc_purge_queue(parent->leaf.q);
1978 parent_qdisc = parent->leaf.q;
1979 if (parent->prio_activity)
1980 htb_deactivate(q, parent);
1981
1982
1983 if (parent->cmode != HTB_CAN_SEND) {
1984 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1985 parent->cmode = HTB_CAN_SEND;
1986 }
1987 parent->level = (parent->parent ? parent->parent->level
1988 : TC_HTB_MAXDEPTH) - 1;
1989 memset(&parent->inner, 0, sizeof(parent->inner));
1990 }
1991
1992
1993 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1994 if (q->offload)
1995 cl->leaf.offload_queue = dev_queue;
1996
1997 cl->parent = parent;
1998
1999
2000 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
2001 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
2002 cl->mbuffer = 60ULL * NSEC_PER_SEC;
2003 cl->t_c = ktime_get_ns();
2004 cl->cmode = HTB_CAN_SEND;
2005
2006
2007 qdisc_class_hash_insert(&q->clhash, &cl->common);
2008 if (parent)
2009 parent->children++;
2010 if (cl->leaf.q != &noop_qdisc)
2011 qdisc_hash_add(cl->leaf.q, true);
2012 } else {
2013 if (tca[TCA_RATE]) {
2014 err = gen_replace_estimator(&cl->bstats, NULL,
2015 &cl->rate_est,
2016 NULL,
2017 true,
2018 tca[TCA_RATE]);
2019 if (err)
2020 return err;
2021 }
2022
2023 if (q->offload) {
2024 struct net_device *dev = qdisc_dev(sch);
2025
2026 offload_opt = (struct tc_htb_qopt_offload) {
2027 .command = TC_HTB_NODE_MODIFY,
2028 .classid = cl->common.classid,
2029 .rate = max_t(u64, hopt->rate.rate, rate64),
2030 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2031 .extack = extack,
2032 };
2033 err = htb_offload(dev, &offload_opt);
2034 if (err)
2035
2036
2037
2038
2039
2040
2041 return err;
2042 }
2043
2044 sch_tree_lock(sch);
2045 }
2046
2047 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2048 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2049
2050
2051
2052
2053 if (!cl->level) {
2054 u64 quantum = cl->rate.rate_bytes_ps;
2055
2056 do_div(quantum, q->rate2quantum);
2057 cl->quantum = min_t(u64, quantum, INT_MAX);
2058
2059 if (!hopt->quantum && cl->quantum < 1000) {
2060 warn = -1;
2061 cl->quantum = 1000;
2062 }
2063 if (!hopt->quantum && cl->quantum > 200000) {
2064 warn = 1;
2065 cl->quantum = 200000;
2066 }
2067 if (hopt->quantum)
2068 cl->quantum = hopt->quantum;
2069 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2070 cl->prio = TC_HTB_NUMPRIO - 1;
2071 }
2072
2073 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2074 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2075
2076 sch_tree_unlock(sch);
2077 qdisc_put(parent_qdisc);
2078
2079 if (warn)
2080 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2081 cl->common.classid, (warn == -1 ? "small" : "big"));
2082
2083 qdisc_class_hash_grow(sch, &q->clhash);
2084
2085 *arg = (unsigned long)cl;
2086 return 0;
2087
2088 err_kill_estimator:
2089 gen_kill_estimator(&cl->rate_est);
2090 err_block_put:
2091 tcf_block_put(cl->block);
2092 kfree(cl);
2093 failure:
2094 return err;
2095 }
2096
2097 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2098 struct netlink_ext_ack *extack)
2099 {
2100 struct htb_sched *q = qdisc_priv(sch);
2101 struct htb_class *cl = (struct htb_class *)arg;
2102
2103 return cl ? cl->block : q->block;
2104 }
2105
2106 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2107 u32 classid)
2108 {
2109 struct htb_class *cl = htb_find(classid, sch);
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120 if (cl)
2121 cl->filter_cnt++;
2122 return (unsigned long)cl;
2123 }
2124
2125 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2126 {
2127 struct htb_class *cl = (struct htb_class *)arg;
2128
2129 if (cl)
2130 cl->filter_cnt--;
2131 }
2132
2133 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2134 {
2135 struct htb_sched *q = qdisc_priv(sch);
2136 struct htb_class *cl;
2137 unsigned int i;
2138
2139 if (arg->stop)
2140 return;
2141
2142 for (i = 0; i < q->clhash.hashsize; i++) {
2143 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2144 if (arg->count < arg->skip) {
2145 arg->count++;
2146 continue;
2147 }
2148 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2149 arg->stop = 1;
2150 return;
2151 }
2152 arg->count++;
2153 }
2154 }
2155 }
2156
2157 static const struct Qdisc_class_ops htb_class_ops = {
2158 .select_queue = htb_select_queue,
2159 .graft = htb_graft,
2160 .leaf = htb_leaf,
2161 .qlen_notify = htb_qlen_notify,
2162 .find = htb_search,
2163 .change = htb_change_class,
2164 .delete = htb_delete,
2165 .walk = htb_walk,
2166 .tcf_block = htb_tcf_block,
2167 .bind_tcf = htb_bind_filter,
2168 .unbind_tcf = htb_unbind_filter,
2169 .dump = htb_dump_class,
2170 .dump_stats = htb_dump_class_stats,
2171 };
2172
2173 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2174 .cl_ops = &htb_class_ops,
2175 .id = "htb",
2176 .priv_size = sizeof(struct htb_sched),
2177 .enqueue = htb_enqueue,
2178 .dequeue = htb_dequeue,
2179 .peek = qdisc_peek_dequeued,
2180 .init = htb_init,
2181 .attach = htb_attach,
2182 .reset = htb_reset,
2183 .destroy = htb_destroy,
2184 .dump = htb_dump,
2185 .owner = THIS_MODULE,
2186 };
2187
2188 static int __init htb_module_init(void)
2189 {
2190 return register_qdisc(&htb_qdisc_ops);
2191 }
2192 static void __exit htb_module_exit(void)
2193 {
2194 unregister_qdisc(&htb_qdisc_ops);
2195 }
2196
2197 module_init(htb_module_init)
2198 module_exit(htb_module_exit)
2199 MODULE_LICENSE("GPL");