0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/module.h>
0011 #include <linux/types.h>
0012 #include <linux/kernel.h>
0013 #include <linux/string.h>
0014 #include <linux/errno.h>
0015 #include <linux/skbuff.h>
0016 #include <net/netlink.h>
0017 #include <net/sch_generic.h>
0018 #include <net/pkt_cls.h>
0019 #include <net/pkt_sched.h>
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097 struct tbf_sched_data {
0098
0099 u32 limit;
0100 u32 max_size;
0101 s64 buffer;
0102 s64 mtu;
0103 struct psched_ratecfg rate;
0104 struct psched_ratecfg peak;
0105
0106
0107 s64 tokens;
0108 s64 ptokens;
0109 s64 t_c;
0110 struct Qdisc *qdisc;
0111 struct qdisc_watchdog watchdog;
0112 };
0113
0114
0115
0116
0117
0118 static u64 psched_ns_t2l(const struct psched_ratecfg *r,
0119 u64 time_in_ns)
0120 {
0121
0122
0123
0124 u64 len = time_in_ns * r->rate_bytes_ps;
0125
0126 do_div(len, NSEC_PER_SEC);
0127
0128 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
0129 do_div(len, 53);
0130 len = len * 48;
0131 }
0132
0133 if (len > r->overhead)
0134 len -= r->overhead;
0135 else
0136 len = 0;
0137
0138 return len;
0139 }
0140
0141 static void tbf_offload_change(struct Qdisc *sch)
0142 {
0143 struct tbf_sched_data *q = qdisc_priv(sch);
0144 struct net_device *dev = qdisc_dev(sch);
0145 struct tc_tbf_qopt_offload qopt;
0146
0147 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
0148 return;
0149
0150 qopt.command = TC_TBF_REPLACE;
0151 qopt.handle = sch->handle;
0152 qopt.parent = sch->parent;
0153 qopt.replace_params.rate = q->rate;
0154 qopt.replace_params.max_size = q->max_size;
0155 qopt.replace_params.qstats = &sch->qstats;
0156
0157 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
0158 }
0159
0160 static void tbf_offload_destroy(struct Qdisc *sch)
0161 {
0162 struct net_device *dev = qdisc_dev(sch);
0163 struct tc_tbf_qopt_offload qopt;
0164
0165 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
0166 return;
0167
0168 qopt.command = TC_TBF_DESTROY;
0169 qopt.handle = sch->handle;
0170 qopt.parent = sch->parent;
0171 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
0172 }
0173
0174 static int tbf_offload_dump(struct Qdisc *sch)
0175 {
0176 struct tc_tbf_qopt_offload qopt;
0177
0178 qopt.command = TC_TBF_STATS;
0179 qopt.handle = sch->handle;
0180 qopt.parent = sch->parent;
0181 qopt.stats.bstats = &sch->bstats;
0182 qopt.stats.qstats = &sch->qstats;
0183
0184 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
0185 }
0186
0187 static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
0188 struct Qdisc *old, struct netlink_ext_ack *extack)
0189 {
0190 struct tc_tbf_qopt_offload graft_offload = {
0191 .handle = sch->handle,
0192 .parent = sch->parent,
0193 .child_handle = new->handle,
0194 .command = TC_TBF_GRAFT,
0195 };
0196
0197 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
0198 TC_SETUP_QDISC_TBF, &graft_offload, extack);
0199 }
0200
0201
0202
0203
0204 static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
0205 struct sk_buff **to_free)
0206 {
0207 struct tbf_sched_data *q = qdisc_priv(sch);
0208 struct sk_buff *segs, *nskb;
0209 netdev_features_t features = netif_skb_features(skb);
0210 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
0211 int ret, nb;
0212
0213 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
0214
0215 if (IS_ERR_OR_NULL(segs))
0216 return qdisc_drop(skb, sch, to_free);
0217
0218 nb = 0;
0219 skb_list_walk_safe(segs, segs, nskb) {
0220 skb_mark_not_on_list(segs);
0221 qdisc_skb_cb(segs)->pkt_len = segs->len;
0222 len += segs->len;
0223 ret = qdisc_enqueue(segs, q->qdisc, to_free);
0224 if (ret != NET_XMIT_SUCCESS) {
0225 if (net_xmit_drop_count(ret))
0226 qdisc_qstats_drop(sch);
0227 } else {
0228 nb++;
0229 }
0230 }
0231 sch->q.qlen += nb;
0232 if (nb > 1)
0233 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
0234 consume_skb(skb);
0235 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
0236 }
0237
0238 static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0239 struct sk_buff **to_free)
0240 {
0241 struct tbf_sched_data *q = qdisc_priv(sch);
0242 unsigned int len = qdisc_pkt_len(skb);
0243 int ret;
0244
0245 if (qdisc_pkt_len(skb) > q->max_size) {
0246 if (skb_is_gso(skb) &&
0247 skb_gso_validate_mac_len(skb, q->max_size))
0248 return tbf_segment(skb, sch, to_free);
0249 return qdisc_drop(skb, sch, to_free);
0250 }
0251 ret = qdisc_enqueue(skb, q->qdisc, to_free);
0252 if (ret != NET_XMIT_SUCCESS) {
0253 if (net_xmit_drop_count(ret))
0254 qdisc_qstats_drop(sch);
0255 return ret;
0256 }
0257
0258 sch->qstats.backlog += len;
0259 sch->q.qlen++;
0260 return NET_XMIT_SUCCESS;
0261 }
0262
0263 static bool tbf_peak_present(const struct tbf_sched_data *q)
0264 {
0265 return q->peak.rate_bytes_ps;
0266 }
0267
0268 static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
0269 {
0270 struct tbf_sched_data *q = qdisc_priv(sch);
0271 struct sk_buff *skb;
0272
0273 skb = q->qdisc->ops->peek(q->qdisc);
0274
0275 if (skb) {
0276 s64 now;
0277 s64 toks;
0278 s64 ptoks = 0;
0279 unsigned int len = qdisc_pkt_len(skb);
0280
0281 now = ktime_get_ns();
0282 toks = min_t(s64, now - q->t_c, q->buffer);
0283
0284 if (tbf_peak_present(q)) {
0285 ptoks = toks + q->ptokens;
0286 if (ptoks > q->mtu)
0287 ptoks = q->mtu;
0288 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
0289 }
0290 toks += q->tokens;
0291 if (toks > q->buffer)
0292 toks = q->buffer;
0293 toks -= (s64) psched_l2t_ns(&q->rate, len);
0294
0295 if ((toks|ptoks) >= 0) {
0296 skb = qdisc_dequeue_peeked(q->qdisc);
0297 if (unlikely(!skb))
0298 return NULL;
0299
0300 q->t_c = now;
0301 q->tokens = toks;
0302 q->ptokens = ptoks;
0303 qdisc_qstats_backlog_dec(sch, skb);
0304 sch->q.qlen--;
0305 qdisc_bstats_update(sch, skb);
0306 return skb;
0307 }
0308
0309 qdisc_watchdog_schedule_ns(&q->watchdog,
0310 now + max_t(long, -toks, -ptoks));
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323 qdisc_qstats_overlimit(sch);
0324 }
0325 return NULL;
0326 }
0327
0328 static void tbf_reset(struct Qdisc *sch)
0329 {
0330 struct tbf_sched_data *q = qdisc_priv(sch);
0331
0332 qdisc_reset(q->qdisc);
0333 sch->qstats.backlog = 0;
0334 sch->q.qlen = 0;
0335 q->t_c = ktime_get_ns();
0336 q->tokens = q->buffer;
0337 q->ptokens = q->mtu;
0338 qdisc_watchdog_cancel(&q->watchdog);
0339 }
0340
0341 static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
0342 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
0343 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
0344 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
0345 [TCA_TBF_RATE64] = { .type = NLA_U64 },
0346 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
0347 [TCA_TBF_BURST] = { .type = NLA_U32 },
0348 [TCA_TBF_PBURST] = { .type = NLA_U32 },
0349 };
0350
0351 static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
0352 struct netlink_ext_ack *extack)
0353 {
0354 int err;
0355 struct tbf_sched_data *q = qdisc_priv(sch);
0356 struct nlattr *tb[TCA_TBF_MAX + 1];
0357 struct tc_tbf_qopt *qopt;
0358 struct Qdisc *child = NULL;
0359 struct Qdisc *old = NULL;
0360 struct psched_ratecfg rate;
0361 struct psched_ratecfg peak;
0362 u64 max_size;
0363 s64 buffer, mtu;
0364 u64 rate64 = 0, prate64 = 0;
0365
0366 err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
0367 NULL);
0368 if (err < 0)
0369 return err;
0370
0371 err = -EINVAL;
0372 if (tb[TCA_TBF_PARMS] == NULL)
0373 goto done;
0374
0375 qopt = nla_data(tb[TCA_TBF_PARMS]);
0376 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
0377 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
0378 tb[TCA_TBF_RTAB],
0379 NULL));
0380
0381 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
0382 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
0383 tb[TCA_TBF_PTAB],
0384 NULL));
0385
0386 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
0387 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
0388
0389 if (tb[TCA_TBF_RATE64])
0390 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
0391 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
0392
0393 if (tb[TCA_TBF_BURST]) {
0394 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
0395 buffer = psched_l2t_ns(&rate, max_size);
0396 } else {
0397 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
0398 }
0399
0400 if (qopt->peakrate.rate) {
0401 if (tb[TCA_TBF_PRATE64])
0402 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
0403 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
0404 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
0405 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
0406 peak.rate_bytes_ps, rate.rate_bytes_ps);
0407 err = -EINVAL;
0408 goto done;
0409 }
0410
0411 if (tb[TCA_TBF_PBURST]) {
0412 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
0413 max_size = min_t(u32, max_size, pburst);
0414 mtu = psched_l2t_ns(&peak, pburst);
0415 } else {
0416 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
0417 }
0418 } else {
0419 memset(&peak, 0, sizeof(peak));
0420 }
0421
0422 if (max_size < psched_mtu(qdisc_dev(sch)))
0423 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
0424 max_size, qdisc_dev(sch)->name,
0425 psched_mtu(qdisc_dev(sch)));
0426
0427 if (!max_size) {
0428 err = -EINVAL;
0429 goto done;
0430 }
0431
0432 if (q->qdisc != &noop_qdisc) {
0433 err = fifo_set_limit(q->qdisc, qopt->limit);
0434 if (err)
0435 goto done;
0436 } else if (qopt->limit > 0) {
0437 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
0438 extack);
0439 if (IS_ERR(child)) {
0440 err = PTR_ERR(child);
0441 goto done;
0442 }
0443
0444
0445 qdisc_hash_add(child, true);
0446 }
0447
0448 sch_tree_lock(sch);
0449 if (child) {
0450 qdisc_tree_flush_backlog(q->qdisc);
0451 old = q->qdisc;
0452 q->qdisc = child;
0453 }
0454 q->limit = qopt->limit;
0455 if (tb[TCA_TBF_PBURST])
0456 q->mtu = mtu;
0457 else
0458 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
0459 q->max_size = max_size;
0460 if (tb[TCA_TBF_BURST])
0461 q->buffer = buffer;
0462 else
0463 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
0464 q->tokens = q->buffer;
0465 q->ptokens = q->mtu;
0466
0467 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
0468 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
0469
0470 sch_tree_unlock(sch);
0471 qdisc_put(old);
0472 err = 0;
0473
0474 tbf_offload_change(sch);
0475 done:
0476 return err;
0477 }
0478
0479 static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
0480 struct netlink_ext_ack *extack)
0481 {
0482 struct tbf_sched_data *q = qdisc_priv(sch);
0483
0484 qdisc_watchdog_init(&q->watchdog, sch);
0485 q->qdisc = &noop_qdisc;
0486
0487 if (!opt)
0488 return -EINVAL;
0489
0490 q->t_c = ktime_get_ns();
0491
0492 return tbf_change(sch, opt, extack);
0493 }
0494
0495 static void tbf_destroy(struct Qdisc *sch)
0496 {
0497 struct tbf_sched_data *q = qdisc_priv(sch);
0498
0499 qdisc_watchdog_cancel(&q->watchdog);
0500 tbf_offload_destroy(sch);
0501 qdisc_put(q->qdisc);
0502 }
0503
0504 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
0505 {
0506 struct tbf_sched_data *q = qdisc_priv(sch);
0507 struct nlattr *nest;
0508 struct tc_tbf_qopt opt;
0509 int err;
0510
0511 err = tbf_offload_dump(sch);
0512 if (err)
0513 return err;
0514
0515 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
0516 if (nest == NULL)
0517 goto nla_put_failure;
0518
0519 opt.limit = q->limit;
0520 psched_ratecfg_getrate(&opt.rate, &q->rate);
0521 if (tbf_peak_present(q))
0522 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
0523 else
0524 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
0525 opt.mtu = PSCHED_NS2TICKS(q->mtu);
0526 opt.buffer = PSCHED_NS2TICKS(q->buffer);
0527 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
0528 goto nla_put_failure;
0529 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
0530 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
0531 TCA_TBF_PAD))
0532 goto nla_put_failure;
0533 if (tbf_peak_present(q) &&
0534 q->peak.rate_bytes_ps >= (1ULL << 32) &&
0535 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
0536 TCA_TBF_PAD))
0537 goto nla_put_failure;
0538
0539 return nla_nest_end(skb, nest);
0540
0541 nla_put_failure:
0542 nla_nest_cancel(skb, nest);
0543 return -1;
0544 }
0545
0546 static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
0547 struct sk_buff *skb, struct tcmsg *tcm)
0548 {
0549 struct tbf_sched_data *q = qdisc_priv(sch);
0550
0551 tcm->tcm_handle |= TC_H_MIN(1);
0552 tcm->tcm_info = q->qdisc->handle;
0553
0554 return 0;
0555 }
0556
0557 static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
0558 struct Qdisc **old, struct netlink_ext_ack *extack)
0559 {
0560 struct tbf_sched_data *q = qdisc_priv(sch);
0561
0562 if (new == NULL)
0563 new = &noop_qdisc;
0564
0565 *old = qdisc_replace(sch, new, &q->qdisc);
0566
0567 tbf_offload_graft(sch, new, *old, extack);
0568 return 0;
0569 }
0570
0571 static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
0572 {
0573 struct tbf_sched_data *q = qdisc_priv(sch);
0574 return q->qdisc;
0575 }
0576
0577 static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
0578 {
0579 return 1;
0580 }
0581
0582 static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
0583 {
0584 if (!walker->stop) {
0585 if (walker->count >= walker->skip)
0586 if (walker->fn(sch, 1, walker) < 0) {
0587 walker->stop = 1;
0588 return;
0589 }
0590 walker->count++;
0591 }
0592 }
0593
0594 static const struct Qdisc_class_ops tbf_class_ops = {
0595 .graft = tbf_graft,
0596 .leaf = tbf_leaf,
0597 .find = tbf_find,
0598 .walk = tbf_walk,
0599 .dump = tbf_dump_class,
0600 };
0601
0602 static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
0603 .next = NULL,
0604 .cl_ops = &tbf_class_ops,
0605 .id = "tbf",
0606 .priv_size = sizeof(struct tbf_sched_data),
0607 .enqueue = tbf_enqueue,
0608 .dequeue = tbf_dequeue,
0609 .peek = qdisc_peek_dequeued,
0610 .init = tbf_init,
0611 .reset = tbf_reset,
0612 .destroy = tbf_destroy,
0613 .change = tbf_change,
0614 .dump = tbf_dump,
0615 .owner = THIS_MODULE,
0616 };
0617
0618 static int __init tbf_module_init(void)
0619 {
0620 return register_qdisc(&tbf_qdisc_ops);
0621 }
0622
0623 static void __exit tbf_module_exit(void)
0624 {
0625 unregister_qdisc(&tbf_qdisc_ops);
0626 }
0627 module_init(tbf_module_init)
0628 module_exit(tbf_module_exit)
0629 MODULE_LICENSE("GPL");