0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/module.h>
0010 #include <linux/init.h>
0011 #include <linux/bitops.h>
0012 #include <linux/errno.h>
0013 #include <linux/netdevice.h>
0014 #include <linux/pkt_sched.h>
0015 #include <net/sch_generic.h>
0016 #include <net/pkt_sched.h>
0017 #include <net/pkt_cls.h>
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094 #define QFQ_MAX_SLOTS 32
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105 #define QFQ_MAX_INDEX 24
0106 #define QFQ_MAX_WSHIFT 10
0107
0108 #define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
0109 #define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
0110
0111 #define FRAC_BITS 30
0112 #define ONE_FP (1UL << FRAC_BITS)
0113
0114 #define QFQ_MTU_SHIFT 16
0115 #define QFQ_MIN_LMAX 512
0116
0117 #define QFQ_MAX_AGG_CLASSES 8
0118
0119
0120
0121
0122
0123 enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
0124
0125 struct qfq_group;
0126
0127 struct qfq_aggregate;
0128
0129 struct qfq_class {
0130 struct Qdisc_class_common common;
0131
0132 unsigned int filter_cnt;
0133
0134 struct gnet_stats_basic_sync bstats;
0135 struct gnet_stats_queue qstats;
0136 struct net_rate_estimator __rcu *rate_est;
0137 struct Qdisc *qdisc;
0138 struct list_head alist;
0139 struct qfq_aggregate *agg;
0140 int deficit;
0141 };
0142
0143 struct qfq_aggregate {
0144 struct hlist_node next;
0145 u64 S, F;
0146
0147
0148
0149
0150
0151 struct qfq_group *grp;
0152
0153
0154 u32 class_weight;
0155
0156 int lmax;
0157
0158 u32 inv_w;
0159 u32 budgetmax;
0160 u32 initial_budget, budget;
0161
0162 int num_classes;
0163 struct list_head active;
0164
0165 struct hlist_node nonfull_next;
0166 };
0167
0168 struct qfq_group {
0169 u64 S, F;
0170 unsigned int slot_shift;
0171 unsigned int index;
0172 unsigned int front;
0173 unsigned long full_slots;
0174
0175
0176 struct hlist_head slots[QFQ_MAX_SLOTS];
0177 };
0178
0179 struct qfq_sched {
0180 struct tcf_proto __rcu *filter_list;
0181 struct tcf_block *block;
0182 struct Qdisc_class_hash clhash;
0183
0184 u64 oldV, V;
0185 struct qfq_aggregate *in_serv_agg;
0186 u32 wsum;
0187 u32 iwsum;
0188
0189 unsigned long bitmaps[QFQ_MAX_STATE];
0190 struct qfq_group groups[QFQ_MAX_INDEX + 1];
0191 u32 min_slot_shift;
0192
0193 u32 max_agg_classes;
0194 struct hlist_head nonfull_aggs;
0195 };
0196
0197
0198
0199
0200
0201
0202
0203
0204 enum update_reason {enqueue, requeue};
0205
0206 static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
0207 {
0208 struct qfq_sched *q = qdisc_priv(sch);
0209 struct Qdisc_class_common *clc;
0210
0211 clc = qdisc_class_find(&q->clhash, classid);
0212 if (clc == NULL)
0213 return NULL;
0214 return container_of(clc, struct qfq_class, common);
0215 }
0216
0217 static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
0218 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
0219 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
0220 };
0221
0222
0223
0224
0225
0226
0227 static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
0228 {
0229 u64 slot_size = (u64)maxlen * inv_w;
0230 unsigned long size_map;
0231 int index = 0;
0232
0233 size_map = slot_size >> min_slot_shift;
0234 if (!size_map)
0235 goto out;
0236
0237 index = __fls(size_map) + 1;
0238 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
0239
0240 if (index < 0)
0241 index = 0;
0242 out:
0243 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
0244 (unsigned long) ONE_FP/inv_w, maxlen, index);
0245
0246 return index;
0247 }
0248
0249 static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
0250 static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
0251 enum update_reason);
0252
0253 static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
0254 u32 lmax, u32 weight)
0255 {
0256 INIT_LIST_HEAD(&agg->active);
0257 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
0258
0259 agg->lmax = lmax;
0260 agg->class_weight = weight;
0261 }
0262
0263 static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
0264 u32 lmax, u32 weight)
0265 {
0266 struct qfq_aggregate *agg;
0267
0268 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
0269 if (agg->lmax == lmax && agg->class_weight == weight)
0270 return agg;
0271
0272 return NULL;
0273 }
0274
0275
0276
0277 static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
0278 int new_num_classes)
0279 {
0280 u32 new_agg_weight;
0281
0282 if (new_num_classes == q->max_agg_classes)
0283 hlist_del_init(&agg->nonfull_next);
0284
0285 if (agg->num_classes > new_num_classes &&
0286 new_num_classes == q->max_agg_classes - 1)
0287 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
0288
0289
0290
0291
0292
0293 agg->budgetmax = new_num_classes * agg->lmax;
0294 new_agg_weight = agg->class_weight * new_num_classes;
0295 agg->inv_w = ONE_FP/new_agg_weight;
0296
0297 if (agg->grp == NULL) {
0298 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
0299 q->min_slot_shift);
0300 agg->grp = &q->groups[i];
0301 }
0302
0303 q->wsum +=
0304 (int) agg->class_weight * (new_num_classes - agg->num_classes);
0305 q->iwsum = ONE_FP / q->wsum;
0306
0307 agg->num_classes = new_num_classes;
0308 }
0309
0310
0311 static void qfq_add_to_agg(struct qfq_sched *q,
0312 struct qfq_aggregate *agg,
0313 struct qfq_class *cl)
0314 {
0315 cl->agg = agg;
0316
0317 qfq_update_agg(q, agg, agg->num_classes+1);
0318 if (cl->qdisc->q.qlen > 0) {
0319 list_add_tail(&cl->alist, &agg->active);
0320 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
0321 cl && q->in_serv_agg != agg)
0322 qfq_activate_agg(q, agg, enqueue);
0323 }
0324 }
0325
0326 static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
0327
0328 static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
0329 {
0330 hlist_del_init(&agg->nonfull_next);
0331 q->wsum -= agg->class_weight;
0332 if (q->wsum != 0)
0333 q->iwsum = ONE_FP / q->wsum;
0334
0335 if (q->in_serv_agg == agg)
0336 q->in_serv_agg = qfq_choose_next_agg(q);
0337 kfree(agg);
0338 }
0339
0340
0341 static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
0342 {
0343 struct qfq_aggregate *agg = cl->agg;
0344
0345
0346 list_del(&cl->alist);
0347 if (list_empty(&agg->active))
0348 qfq_deactivate_agg(q, agg);
0349 }
0350
0351
0352 static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
0353 {
0354 struct qfq_aggregate *agg = cl->agg;
0355
0356 cl->agg = NULL;
0357 if (agg->num_classes == 1) {
0358 qfq_destroy_agg(q, agg);
0359 return;
0360 }
0361 qfq_update_agg(q, agg, agg->num_classes-1);
0362 }
0363
0364
0365 static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
0366 {
0367 if (cl->qdisc->q.qlen > 0)
0368 qfq_deactivate_class(q, cl);
0369
0370 qfq_rm_from_agg(q, cl);
0371 }
0372
0373
0374 static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
0375 u32 lmax)
0376 {
0377 struct qfq_sched *q = qdisc_priv(sch);
0378 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
0379
0380 if (new_agg == NULL) {
0381 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
0382 if (new_agg == NULL)
0383 return -ENOBUFS;
0384 qfq_init_agg(q, new_agg, lmax, weight);
0385 }
0386 qfq_deact_rm_from_agg(q, cl);
0387 qfq_add_to_agg(q, new_agg, cl);
0388
0389 return 0;
0390 }
0391
0392 static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
0393 struct nlattr **tca, unsigned long *arg,
0394 struct netlink_ext_ack *extack)
0395 {
0396 struct qfq_sched *q = qdisc_priv(sch);
0397 struct qfq_class *cl = (struct qfq_class *)*arg;
0398 bool existing = false;
0399 struct nlattr *tb[TCA_QFQ_MAX + 1];
0400 struct qfq_aggregate *new_agg = NULL;
0401 u32 weight, lmax, inv_w;
0402 int err;
0403 int delta_w;
0404
0405 if (tca[TCA_OPTIONS] == NULL) {
0406 pr_notice("qfq: no options\n");
0407 return -EINVAL;
0408 }
0409
0410 err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
0411 qfq_policy, NULL);
0412 if (err < 0)
0413 return err;
0414
0415 if (tb[TCA_QFQ_WEIGHT]) {
0416 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
0417 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
0418 pr_notice("qfq: invalid weight %u\n", weight);
0419 return -EINVAL;
0420 }
0421 } else
0422 weight = 1;
0423
0424 if (tb[TCA_QFQ_LMAX]) {
0425 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
0426 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
0427 pr_notice("qfq: invalid max length %u\n", lmax);
0428 return -EINVAL;
0429 }
0430 } else
0431 lmax = psched_mtu(qdisc_dev(sch));
0432
0433 inv_w = ONE_FP / weight;
0434 weight = ONE_FP / inv_w;
0435
0436 if (cl != NULL &&
0437 lmax == cl->agg->lmax &&
0438 weight == cl->agg->class_weight)
0439 return 0;
0440
0441 delta_w = weight - (cl ? cl->agg->class_weight : 0);
0442
0443 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
0444 pr_notice("qfq: total weight out of range (%d + %u)\n",
0445 delta_w, q->wsum);
0446 return -EINVAL;
0447 }
0448
0449 if (cl != NULL) {
0450 if (tca[TCA_RATE]) {
0451 err = gen_replace_estimator(&cl->bstats, NULL,
0452 &cl->rate_est,
0453 NULL,
0454 true,
0455 tca[TCA_RATE]);
0456 if (err)
0457 return err;
0458 }
0459 existing = true;
0460 goto set_change_agg;
0461 }
0462
0463
0464 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
0465 if (cl == NULL)
0466 return -ENOBUFS;
0467
0468 gnet_stats_basic_sync_init(&cl->bstats);
0469 cl->common.classid = classid;
0470 cl->deficit = lmax;
0471
0472 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
0473 classid, NULL);
0474 if (cl->qdisc == NULL)
0475 cl->qdisc = &noop_qdisc;
0476
0477 if (tca[TCA_RATE]) {
0478 err = gen_new_estimator(&cl->bstats, NULL,
0479 &cl->rate_est,
0480 NULL,
0481 true,
0482 tca[TCA_RATE]);
0483 if (err)
0484 goto destroy_class;
0485 }
0486
0487 if (cl->qdisc != &noop_qdisc)
0488 qdisc_hash_add(cl->qdisc, true);
0489
0490 set_change_agg:
0491 sch_tree_lock(sch);
0492 new_agg = qfq_find_agg(q, lmax, weight);
0493 if (new_agg == NULL) {
0494 sch_tree_unlock(sch);
0495 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
0496 if (new_agg == NULL) {
0497 err = -ENOBUFS;
0498 gen_kill_estimator(&cl->rate_est);
0499 goto destroy_class;
0500 }
0501 sch_tree_lock(sch);
0502 qfq_init_agg(q, new_agg, lmax, weight);
0503 }
0504 if (existing)
0505 qfq_deact_rm_from_agg(q, cl);
0506 else
0507 qdisc_class_hash_insert(&q->clhash, &cl->common);
0508 qfq_add_to_agg(q, new_agg, cl);
0509 sch_tree_unlock(sch);
0510 qdisc_class_hash_grow(sch, &q->clhash);
0511
0512 *arg = (unsigned long)cl;
0513 return 0;
0514
0515 destroy_class:
0516 qdisc_put(cl->qdisc);
0517 kfree(cl);
0518 return err;
0519 }
0520
0521 static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
0522 {
0523 struct qfq_sched *q = qdisc_priv(sch);
0524
0525 qfq_rm_from_agg(q, cl);
0526 gen_kill_estimator(&cl->rate_est);
0527 qdisc_put(cl->qdisc);
0528 kfree(cl);
0529 }
0530
0531 static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
0532 struct netlink_ext_ack *extack)
0533 {
0534 struct qfq_sched *q = qdisc_priv(sch);
0535 struct qfq_class *cl = (struct qfq_class *)arg;
0536
0537 if (cl->filter_cnt > 0)
0538 return -EBUSY;
0539
0540 sch_tree_lock(sch);
0541
0542 qdisc_purge_queue(cl->qdisc);
0543 qdisc_class_hash_remove(&q->clhash, &cl->common);
0544
0545 sch_tree_unlock(sch);
0546
0547 qfq_destroy_class(sch, cl);
0548 return 0;
0549 }
0550
0551 static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
0552 {
0553 return (unsigned long)qfq_find_class(sch, classid);
0554 }
0555
0556 static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
0557 struct netlink_ext_ack *extack)
0558 {
0559 struct qfq_sched *q = qdisc_priv(sch);
0560
0561 if (cl)
0562 return NULL;
0563
0564 return q->block;
0565 }
0566
0567 static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
0568 u32 classid)
0569 {
0570 struct qfq_class *cl = qfq_find_class(sch, classid);
0571
0572 if (cl != NULL)
0573 cl->filter_cnt++;
0574
0575 return (unsigned long)cl;
0576 }
0577
0578 static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
0579 {
0580 struct qfq_class *cl = (struct qfq_class *)arg;
0581
0582 cl->filter_cnt--;
0583 }
0584
0585 static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
0586 struct Qdisc *new, struct Qdisc **old,
0587 struct netlink_ext_ack *extack)
0588 {
0589 struct qfq_class *cl = (struct qfq_class *)arg;
0590
0591 if (new == NULL) {
0592 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
0593 cl->common.classid, NULL);
0594 if (new == NULL)
0595 new = &noop_qdisc;
0596 }
0597
0598 *old = qdisc_replace(sch, new, &cl->qdisc);
0599 return 0;
0600 }
0601
0602 static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
0603 {
0604 struct qfq_class *cl = (struct qfq_class *)arg;
0605
0606 return cl->qdisc;
0607 }
0608
0609 static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
0610 struct sk_buff *skb, struct tcmsg *tcm)
0611 {
0612 struct qfq_class *cl = (struct qfq_class *)arg;
0613 struct nlattr *nest;
0614
0615 tcm->tcm_parent = TC_H_ROOT;
0616 tcm->tcm_handle = cl->common.classid;
0617 tcm->tcm_info = cl->qdisc->handle;
0618
0619 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
0620 if (nest == NULL)
0621 goto nla_put_failure;
0622 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
0623 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
0624 goto nla_put_failure;
0625 return nla_nest_end(skb, nest);
0626
0627 nla_put_failure:
0628 nla_nest_cancel(skb, nest);
0629 return -EMSGSIZE;
0630 }
0631
0632 static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
0633 struct gnet_dump *d)
0634 {
0635 struct qfq_class *cl = (struct qfq_class *)arg;
0636 struct tc_qfq_stats xstats;
0637
0638 memset(&xstats, 0, sizeof(xstats));
0639
0640 xstats.weight = cl->agg->class_weight;
0641 xstats.lmax = cl->agg->lmax;
0642
0643 if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
0644 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
0645 qdisc_qstats_copy(d, cl->qdisc) < 0)
0646 return -1;
0647
0648 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
0649 }
0650
0651 static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
0652 {
0653 struct qfq_sched *q = qdisc_priv(sch);
0654 struct qfq_class *cl;
0655 unsigned int i;
0656
0657 if (arg->stop)
0658 return;
0659
0660 for (i = 0; i < q->clhash.hashsize; i++) {
0661 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
0662 if (arg->count < arg->skip) {
0663 arg->count++;
0664 continue;
0665 }
0666 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
0667 arg->stop = 1;
0668 return;
0669 }
0670 arg->count++;
0671 }
0672 }
0673 }
0674
0675 static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
0676 int *qerr)
0677 {
0678 struct qfq_sched *q = qdisc_priv(sch);
0679 struct qfq_class *cl;
0680 struct tcf_result res;
0681 struct tcf_proto *fl;
0682 int result;
0683
0684 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
0685 pr_debug("qfq_classify: found %d\n", skb->priority);
0686 cl = qfq_find_class(sch, skb->priority);
0687 if (cl != NULL)
0688 return cl;
0689 }
0690
0691 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
0692 fl = rcu_dereference_bh(q->filter_list);
0693 result = tcf_classify(skb, NULL, fl, &res, false);
0694 if (result >= 0) {
0695 #ifdef CONFIG_NET_CLS_ACT
0696 switch (result) {
0697 case TC_ACT_QUEUED:
0698 case TC_ACT_STOLEN:
0699 case TC_ACT_TRAP:
0700 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
0701 fallthrough;
0702 case TC_ACT_SHOT:
0703 return NULL;
0704 }
0705 #endif
0706 cl = (struct qfq_class *)res.class;
0707 if (cl == NULL)
0708 cl = qfq_find_class(sch, res.classid);
0709 return cl;
0710 }
0711
0712 return NULL;
0713 }
0714
0715
0716 static inline int qfq_gt(u64 a, u64 b)
0717 {
0718 return (s64)(a - b) > 0;
0719 }
0720
0721
0722 static inline u64 qfq_round_down(u64 ts, unsigned int shift)
0723 {
0724 return ts & ~((1ULL << shift) - 1);
0725 }
0726
0727
0728 static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
0729 unsigned long bitmap)
0730 {
0731 int index = __ffs(bitmap);
0732 return &q->groups[index];
0733 }
0734
0735 static inline unsigned long mask_from(unsigned long bitmap, int from)
0736 {
0737 return bitmap & ~((1UL << from) - 1);
0738 }
0739
0740
0741
0742
0743
0744
0745 static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
0746 {
0747
0748 unsigned int state = qfq_gt(grp->S, q->V);
0749 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
0750 struct qfq_group *next;
0751
0752 if (mask) {
0753 next = qfq_ffs(q, mask);
0754 if (qfq_gt(grp->F, next->F))
0755 state |= EB;
0756 }
0757
0758 return state;
0759 }
0760
0761
0762
0763
0764
0765
0766
0767
0768 static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
0769 int src, int dst)
0770 {
0771 q->bitmaps[dst] |= q->bitmaps[src] & mask;
0772 q->bitmaps[src] &= ~mask;
0773 }
0774
0775 static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
0776 {
0777 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
0778 struct qfq_group *next;
0779
0780 if (mask) {
0781 next = qfq_ffs(q, mask);
0782 if (!qfq_gt(next->F, old_F))
0783 return;
0784 }
0785
0786 mask = (1UL << index) - 1;
0787 qfq_move_groups(q, mask, EB, ER);
0788 qfq_move_groups(q, mask, IB, IR);
0789 }
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801 static void qfq_make_eligible(struct qfq_sched *q)
0802 {
0803 unsigned long vslot = q->V >> q->min_slot_shift;
0804 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
0805
0806 if (vslot != old_vslot) {
0807 unsigned long mask;
0808 int last_flip_pos = fls(vslot ^ old_vslot);
0809
0810 if (last_flip_pos > 31)
0811 mask = ~0UL;
0812 else
0813 mask = (1UL << last_flip_pos) - 1;
0814
0815 qfq_move_groups(q, mask, IR, ER);
0816 qfq_move_groups(q, mask, IB, EB);
0817 }
0818 }
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869
0870
0871
0872
0873
0874
0875 static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
0876 u64 roundedS)
0877 {
0878 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
0879 unsigned int i;
0880
0881 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
0882 u64 deltaS = roundedS - grp->S -
0883 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
0884 agg->S -= deltaS;
0885 agg->F -= deltaS;
0886 slot = QFQ_MAX_SLOTS - 2;
0887 }
0888
0889 i = (grp->front + slot) % QFQ_MAX_SLOTS;
0890
0891 hlist_add_head(&agg->next, &grp->slots[i]);
0892 __set_bit(slot, &grp->full_slots);
0893 }
0894
0895
0896 static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
0897 {
0898 return hlist_entry(grp->slots[grp->front].first,
0899 struct qfq_aggregate, next);
0900 }
0901
0902
0903
0904
0905 static void qfq_front_slot_remove(struct qfq_group *grp)
0906 {
0907 struct qfq_aggregate *agg = qfq_slot_head(grp);
0908
0909 BUG_ON(!agg);
0910 hlist_del(&agg->next);
0911 if (hlist_empty(&grp->slots[grp->front]))
0912 __clear_bit(0, &grp->full_slots);
0913 }
0914
0915
0916
0917
0918
0919
0920 static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
0921 {
0922 unsigned int i;
0923
0924 pr_debug("qfq slot_scan: grp %u full %#lx\n",
0925 grp->index, grp->full_slots);
0926
0927 if (grp->full_slots == 0)
0928 return NULL;
0929
0930 i = __ffs(grp->full_slots);
0931 if (i > 0) {
0932 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
0933 grp->full_slots >>= i;
0934 }
0935
0936 return qfq_slot_head(grp);
0937 }
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948 static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
0949 {
0950 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
0951
0952 grp->full_slots <<= i;
0953 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
0954 }
0955
0956 static void qfq_update_eligible(struct qfq_sched *q)
0957 {
0958 struct qfq_group *grp;
0959 unsigned long ineligible;
0960
0961 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
0962 if (ineligible) {
0963 if (!q->bitmaps[ER]) {
0964 grp = qfq_ffs(q, ineligible);
0965 if (qfq_gt(grp->S, q->V))
0966 q->V = grp->S;
0967 }
0968 qfq_make_eligible(q);
0969 }
0970 }
0971
0972
0973 static void agg_dequeue(struct qfq_aggregate *agg,
0974 struct qfq_class *cl, unsigned int len)
0975 {
0976 qdisc_dequeue_peeked(cl->qdisc);
0977
0978 cl->deficit -= (int) len;
0979
0980 if (cl->qdisc->q.qlen == 0)
0981 list_del(&cl->alist);
0982 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
0983 cl->deficit += agg->lmax;
0984 list_move_tail(&cl->alist, &agg->active);
0985 }
0986 }
0987
0988 static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
0989 struct qfq_class **cl,
0990 unsigned int *len)
0991 {
0992 struct sk_buff *skb;
0993
0994 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
0995 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
0996 if (skb == NULL)
0997 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
0998 else
0999 *len = qdisc_pkt_len(skb);
1000
1001 return skb;
1002 }
1003
1004
1005 static inline void charge_actual_service(struct qfq_aggregate *agg)
1006 {
1007
1008
1009
1010
1011
1012 u32 service_received = min(agg->budgetmax,
1013 agg->initial_budget - agg->budget);
1014
1015 agg->F = agg->S + (u64)service_received * agg->inv_w;
1016 }
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1031 {
1032 unsigned long mask;
1033 u64 limit, roundedF;
1034 int slot_shift = agg->grp->slot_shift;
1035
1036 roundedF = qfq_round_down(agg->F, slot_shift);
1037 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1038
1039 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1040
1041 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1042 if (mask) {
1043 struct qfq_group *next = qfq_ffs(q, mask);
1044 if (qfq_gt(roundedF, next->F)) {
1045 if (qfq_gt(limit, next->F))
1046 agg->S = next->F;
1047 else
1048 agg->S = limit;
1049 return;
1050 }
1051 }
1052 agg->S = q->V;
1053 } else
1054 agg->S = agg->F;
1055 }
1056
1057
1058
1059
1060
1061
1062 static inline void
1063 qfq_update_agg_ts(struct qfq_sched *q,
1064 struct qfq_aggregate *agg, enum update_reason reason)
1065 {
1066 if (reason != requeue)
1067 qfq_update_start(q, agg);
1068 else
1069 agg->S = agg->F;
1070
1071 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1072 }
1073
1074 static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1075
1076 static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1077 {
1078 struct qfq_sched *q = qdisc_priv(sch);
1079 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1080 struct qfq_class *cl;
1081 struct sk_buff *skb = NULL;
1082
1083 unsigned int len = 0;
1084
1085 if (in_serv_agg == NULL)
1086 return NULL;
1087
1088 if (!list_empty(&in_serv_agg->active))
1089 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1090
1091
1092
1093
1094
1095
1096 if (len == 0 || in_serv_agg->budget < len) {
1097 charge_actual_service(in_serv_agg);
1098
1099
1100 in_serv_agg->initial_budget = in_serv_agg->budget =
1101 in_serv_agg->budgetmax;
1102
1103 if (!list_empty(&in_serv_agg->active)) {
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 qfq_update_agg_ts(q, in_serv_agg, requeue);
1115 qfq_schedule_agg(q, in_serv_agg);
1116 } else if (sch->q.qlen == 0) {
1117 q->in_serv_agg = NULL;
1118 return NULL;
1119 }
1120
1121
1122
1123
1124
1125 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1126 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1127 }
1128 if (!skb)
1129 return NULL;
1130
1131 qdisc_qstats_backlog_dec(sch, skb);
1132 sch->q.qlen--;
1133 qdisc_bstats_update(sch, skb);
1134
1135 agg_dequeue(in_serv_agg, cl, len);
1136
1137
1138
1139
1140 if (unlikely(in_serv_agg->budget < len))
1141 in_serv_agg->budget = 0;
1142 else
1143 in_serv_agg->budget -= len;
1144
1145 q->V += (u64)len * q->iwsum;
1146 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1147 len, (unsigned long long) in_serv_agg->F,
1148 (unsigned long long) q->V);
1149
1150 return skb;
1151 }
1152
1153 static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1154 {
1155 struct qfq_group *grp;
1156 struct qfq_aggregate *agg, *new_front_agg;
1157 u64 old_F;
1158
1159 qfq_update_eligible(q);
1160 q->oldV = q->V;
1161
1162 if (!q->bitmaps[ER])
1163 return NULL;
1164
1165 grp = qfq_ffs(q, q->bitmaps[ER]);
1166 old_F = grp->F;
1167
1168 agg = qfq_slot_head(grp);
1169
1170
1171 qfq_front_slot_remove(grp);
1172
1173 new_front_agg = qfq_slot_scan(grp);
1174
1175 if (new_front_agg == NULL)
1176 __clear_bit(grp->index, &q->bitmaps[ER]);
1177 else {
1178 u64 roundedS = qfq_round_down(new_front_agg->S,
1179 grp->slot_shift);
1180 unsigned int s;
1181
1182 if (grp->S == roundedS)
1183 return agg;
1184 grp->S = roundedS;
1185 grp->F = roundedS + (2ULL << grp->slot_shift);
1186 __clear_bit(grp->index, &q->bitmaps[ER]);
1187 s = qfq_calc_state(q, grp);
1188 __set_bit(grp->index, &q->bitmaps[s]);
1189 }
1190
1191 qfq_unblock_groups(q, grp->index, old_F);
1192
1193 return agg;
1194 }
1195
1196 static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1197 struct sk_buff **to_free)
1198 {
1199 unsigned int len = qdisc_pkt_len(skb), gso_segs;
1200 struct qfq_sched *q = qdisc_priv(sch);
1201 struct qfq_class *cl;
1202 struct qfq_aggregate *agg;
1203 int err = 0;
1204 bool first;
1205
1206 cl = qfq_classify(skb, sch, &err);
1207 if (cl == NULL) {
1208 if (err & __NET_XMIT_BYPASS)
1209 qdisc_qstats_drop(sch);
1210 __qdisc_drop(skb, to_free);
1211 return err;
1212 }
1213 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1214
1215 if (unlikely(cl->agg->lmax < len)) {
1216 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1217 cl->agg->lmax, len, cl->common.classid);
1218 err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
1219 if (err) {
1220 cl->qstats.drops++;
1221 return qdisc_drop(skb, sch, to_free);
1222 }
1223 }
1224
1225 gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
1226 first = !cl->qdisc->q.qlen;
1227 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1228 if (unlikely(err != NET_XMIT_SUCCESS)) {
1229 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1230 if (net_xmit_drop_count(err)) {
1231 cl->qstats.drops++;
1232 qdisc_qstats_drop(sch);
1233 }
1234 return err;
1235 }
1236
1237 _bstats_update(&cl->bstats, len, gso_segs);
1238 sch->qstats.backlog += len;
1239 ++sch->q.qlen;
1240
1241 agg = cl->agg;
1242
1243 if (!first) {
1244 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1245 list_first_entry(&agg->active, struct qfq_class, alist)
1246 == cl && cl->deficit < len)
1247 list_move_tail(&cl->alist, &agg->active);
1248
1249 return err;
1250 }
1251
1252
1253 cl->deficit = agg->lmax;
1254 list_add_tail(&cl->alist, &agg->active);
1255
1256 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1257 q->in_serv_agg == agg)
1258 return err;
1259
1260 qfq_activate_agg(q, agg, enqueue);
1261
1262 return err;
1263 }
1264
1265
1266
1267
1268 static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1269 {
1270 struct qfq_group *grp = agg->grp;
1271 u64 roundedS;
1272 int s;
1273
1274 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285 if (grp->full_slots) {
1286 if (!qfq_gt(grp->S, agg->S))
1287 goto skip_update;
1288
1289
1290 qfq_slot_rotate(grp, roundedS);
1291
1292 __clear_bit(grp->index, &q->bitmaps[IR]);
1293 __clear_bit(grp->index, &q->bitmaps[IB]);
1294 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1295 q->in_serv_agg == NULL)
1296 q->V = roundedS;
1297
1298 grp->S = roundedS;
1299 grp->F = roundedS + (2ULL << grp->slot_shift);
1300 s = qfq_calc_state(q, grp);
1301 __set_bit(grp->index, &q->bitmaps[s]);
1302
1303 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1304 s, q->bitmaps[s],
1305 (unsigned long long) agg->S,
1306 (unsigned long long) agg->F,
1307 (unsigned long long) q->V);
1308
1309 skip_update:
1310 qfq_slot_insert(grp, agg, roundedS);
1311 }
1312
1313
1314
1315 static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1316 enum update_reason reason)
1317 {
1318 agg->initial_budget = agg->budget = agg->budgetmax;
1319
1320 qfq_update_agg_ts(q, agg, reason);
1321 if (q->in_serv_agg == NULL) {
1322 q->in_serv_agg = agg;
1323
1324 q->oldV = q->V = agg->S;
1325 } else if (agg != q->in_serv_agg)
1326 qfq_schedule_agg(q, agg);
1327 }
1328
1329 static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1330 struct qfq_aggregate *agg)
1331 {
1332 unsigned int i, offset;
1333 u64 roundedS;
1334
1335 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1336 offset = (roundedS - grp->S) >> grp->slot_shift;
1337
1338 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1339
1340 hlist_del(&agg->next);
1341 if (hlist_empty(&grp->slots[i]))
1342 __clear_bit(offset, &grp->full_slots);
1343 }
1344
1345
1346
1347
1348
1349
1350
1351
1352 static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1353 {
1354 struct qfq_group *grp = agg->grp;
1355 unsigned long mask;
1356 u64 roundedS;
1357 int s;
1358
1359 if (agg == q->in_serv_agg) {
1360 charge_actual_service(agg);
1361 q->in_serv_agg = qfq_choose_next_agg(q);
1362 return;
1363 }
1364
1365 agg->F = agg->S;
1366 qfq_slot_remove(q, grp, agg);
1367
1368 if (!grp->full_slots) {
1369 __clear_bit(grp->index, &q->bitmaps[IR]);
1370 __clear_bit(grp->index, &q->bitmaps[EB]);
1371 __clear_bit(grp->index, &q->bitmaps[IB]);
1372
1373 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1374 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1375 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1376 if (mask)
1377 mask = ~((1UL << __fls(mask)) - 1);
1378 else
1379 mask = ~0UL;
1380 qfq_move_groups(q, mask, EB, ER);
1381 qfq_move_groups(q, mask, IB, IR);
1382 }
1383 __clear_bit(grp->index, &q->bitmaps[ER]);
1384 } else if (hlist_empty(&grp->slots[grp->front])) {
1385 agg = qfq_slot_scan(grp);
1386 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1387 if (grp->S != roundedS) {
1388 __clear_bit(grp->index, &q->bitmaps[ER]);
1389 __clear_bit(grp->index, &q->bitmaps[IR]);
1390 __clear_bit(grp->index, &q->bitmaps[EB]);
1391 __clear_bit(grp->index, &q->bitmaps[IB]);
1392 grp->S = roundedS;
1393 grp->F = roundedS + (2ULL << grp->slot_shift);
1394 s = qfq_calc_state(q, grp);
1395 __set_bit(grp->index, &q->bitmaps[s]);
1396 }
1397 }
1398 }
1399
1400 static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1401 {
1402 struct qfq_sched *q = qdisc_priv(sch);
1403 struct qfq_class *cl = (struct qfq_class *)arg;
1404
1405 qfq_deactivate_class(q, cl);
1406 }
1407
1408 static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1409 struct netlink_ext_ack *extack)
1410 {
1411 struct qfq_sched *q = qdisc_priv(sch);
1412 struct qfq_group *grp;
1413 int i, j, err;
1414 u32 max_cl_shift, maxbudg_shift, max_classes;
1415
1416 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1417 if (err)
1418 return err;
1419
1420 err = qdisc_class_hash_init(&q->clhash);
1421 if (err < 0)
1422 return err;
1423
1424 max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1,
1425 QFQ_MAX_AGG_CLASSES);
1426
1427 max_cl_shift = __fls(max_classes);
1428 q->max_agg_classes = 1<<max_cl_shift;
1429
1430
1431 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1432 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1433
1434 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1435 grp = &q->groups[i];
1436 grp->index = i;
1437 grp->slot_shift = q->min_slot_shift + i;
1438 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1439 INIT_HLIST_HEAD(&grp->slots[j]);
1440 }
1441
1442 INIT_HLIST_HEAD(&q->nonfull_aggs);
1443
1444 return 0;
1445 }
1446
1447 static void qfq_reset_qdisc(struct Qdisc *sch)
1448 {
1449 struct qfq_sched *q = qdisc_priv(sch);
1450 struct qfq_class *cl;
1451 unsigned int i;
1452
1453 for (i = 0; i < q->clhash.hashsize; i++) {
1454 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1455 if (cl->qdisc->q.qlen > 0)
1456 qfq_deactivate_class(q, cl);
1457
1458 qdisc_reset(cl->qdisc);
1459 }
1460 }
1461 sch->qstats.backlog = 0;
1462 sch->q.qlen = 0;
1463 }
1464
1465 static void qfq_destroy_qdisc(struct Qdisc *sch)
1466 {
1467 struct qfq_sched *q = qdisc_priv(sch);
1468 struct qfq_class *cl;
1469 struct hlist_node *next;
1470 unsigned int i;
1471
1472 tcf_block_put(q->block);
1473
1474 for (i = 0; i < q->clhash.hashsize; i++) {
1475 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1476 common.hnode) {
1477 qfq_destroy_class(sch, cl);
1478 }
1479 }
1480 qdisc_class_hash_destroy(&q->clhash);
1481 }
1482
1483 static const struct Qdisc_class_ops qfq_class_ops = {
1484 .change = qfq_change_class,
1485 .delete = qfq_delete_class,
1486 .find = qfq_search_class,
1487 .tcf_block = qfq_tcf_block,
1488 .bind_tcf = qfq_bind_tcf,
1489 .unbind_tcf = qfq_unbind_tcf,
1490 .graft = qfq_graft_class,
1491 .leaf = qfq_class_leaf,
1492 .qlen_notify = qfq_qlen_notify,
1493 .dump = qfq_dump_class,
1494 .dump_stats = qfq_dump_class_stats,
1495 .walk = qfq_walk,
1496 };
1497
1498 static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1499 .cl_ops = &qfq_class_ops,
1500 .id = "qfq",
1501 .priv_size = sizeof(struct qfq_sched),
1502 .enqueue = qfq_enqueue,
1503 .dequeue = qfq_dequeue,
1504 .peek = qdisc_peek_dequeued,
1505 .init = qfq_init_qdisc,
1506 .reset = qfq_reset_qdisc,
1507 .destroy = qfq_destroy_qdisc,
1508 .owner = THIS_MODULE,
1509 };
1510
1511 static int __init qfq_init(void)
1512 {
1513 return register_qdisc(&qfq_qdisc_ops);
1514 }
1515
1516 static void __exit qfq_exit(void)
1517 {
1518 unregister_qdisc(&qfq_qdisc_ops);
1519 }
1520
1521 module_init(qfq_init);
1522 module_exit(qfq_exit);
1523 MODULE_LICENSE("GPL");