0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053 #include <linux/ethtool.h>
0054 #include <linux/module.h>
0055 #include <linux/types.h>
0056 #include <linux/kernel.h>
0057 #include <linux/string.h>
0058 #include <linux/errno.h>
0059 #include <linux/skbuff.h>
0060 #include <net/netevent.h>
0061 #include <net/netlink.h>
0062 #include <net/sch_generic.h>
0063 #include <net/pkt_sched.h>
0064
0065 static LIST_HEAD(cbs_list);
0066 static DEFINE_SPINLOCK(cbs_list_lock);
0067
0068 #define BYTES_PER_KBIT (1000LL / 8)
0069
0070 struct cbs_sched_data {
0071 bool offload;
0072 int queue;
0073 atomic64_t port_rate;
0074 s64 last;
0075 s64 credits;
0076 s32 locredit;
0077 s32 hicredit;
0078 s64 sendslope;
0079 s64 idleslope;
0080 struct qdisc_watchdog watchdog;
0081 int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
0082 struct sk_buff **to_free);
0083 struct sk_buff *(*dequeue)(struct Qdisc *sch);
0084 struct Qdisc *qdisc;
0085 struct list_head cbs_list;
0086 };
0087
0088 static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0089 struct Qdisc *child,
0090 struct sk_buff **to_free)
0091 {
0092 unsigned int len = qdisc_pkt_len(skb);
0093 int err;
0094
0095 err = child->ops->enqueue(skb, child, to_free);
0096 if (err != NET_XMIT_SUCCESS)
0097 return err;
0098
0099 sch->qstats.backlog += len;
0100 sch->q.qlen++;
0101
0102 return NET_XMIT_SUCCESS;
0103 }
0104
0105 static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
0106 struct sk_buff **to_free)
0107 {
0108 struct cbs_sched_data *q = qdisc_priv(sch);
0109 struct Qdisc *qdisc = q->qdisc;
0110
0111 return cbs_child_enqueue(skb, sch, qdisc, to_free);
0112 }
0113
0114 static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
0115 struct sk_buff **to_free)
0116 {
0117 struct cbs_sched_data *q = qdisc_priv(sch);
0118 struct Qdisc *qdisc = q->qdisc;
0119
0120 if (sch->q.qlen == 0 && q->credits > 0) {
0121
0122
0123
0124 q->credits = 0;
0125 q->last = ktime_get_ns();
0126 }
0127
0128 return cbs_child_enqueue(skb, sch, qdisc, to_free);
0129 }
0130
0131 static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0132 struct sk_buff **to_free)
0133 {
0134 struct cbs_sched_data *q = qdisc_priv(sch);
0135
0136 return q->enqueue(skb, sch, to_free);
0137 }
0138
0139
0140 static s64 timediff_to_credits(s64 timediff, s64 slope)
0141 {
0142 return div64_s64(timediff * slope, NSEC_PER_SEC);
0143 }
0144
0145 static s64 delay_from_credits(s64 credits, s64 slope)
0146 {
0147 if (unlikely(slope == 0))
0148 return S64_MAX;
0149
0150 return div64_s64(-credits * NSEC_PER_SEC, slope);
0151 }
0152
0153 static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
0154 {
0155 if (unlikely(port_rate == 0))
0156 return S64_MAX;
0157
0158 return div64_s64(len * slope, port_rate);
0159 }
0160
0161 static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child)
0162 {
0163 struct sk_buff *skb;
0164
0165 skb = child->ops->dequeue(child);
0166 if (!skb)
0167 return NULL;
0168
0169 qdisc_qstats_backlog_dec(sch, skb);
0170 qdisc_bstats_update(sch, skb);
0171 sch->q.qlen--;
0172
0173 return skb;
0174 }
0175
0176 static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
0177 {
0178 struct cbs_sched_data *q = qdisc_priv(sch);
0179 struct Qdisc *qdisc = q->qdisc;
0180 s64 now = ktime_get_ns();
0181 struct sk_buff *skb;
0182 s64 credits;
0183 int len;
0184
0185
0186 if (now < q->last) {
0187 qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
0188 return NULL;
0189 }
0190 if (q->credits < 0) {
0191 credits = timediff_to_credits(now - q->last, q->idleslope);
0192
0193 credits = q->credits + credits;
0194 q->credits = min_t(s64, credits, q->hicredit);
0195
0196 if (q->credits < 0) {
0197 s64 delay;
0198
0199 delay = delay_from_credits(q->credits, q->idleslope);
0200 qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
0201
0202 q->last = now;
0203
0204 return NULL;
0205 }
0206 }
0207 skb = cbs_child_dequeue(sch, qdisc);
0208 if (!skb)
0209 return NULL;
0210
0211 len = qdisc_pkt_len(skb);
0212
0213
0214
0215
0216 credits = credits_from_len(len, q->sendslope,
0217 atomic64_read(&q->port_rate));
0218 credits += q->credits;
0219
0220 q->credits = max_t(s64, credits, q->locredit);
0221
0222 if (unlikely(atomic64_read(&q->port_rate) == 0))
0223 q->last = now;
0224 else
0225 q->last = now + div64_s64(len * NSEC_PER_SEC,
0226 atomic64_read(&q->port_rate));
0227
0228 return skb;
0229 }
0230
0231 static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
0232 {
0233 struct cbs_sched_data *q = qdisc_priv(sch);
0234 struct Qdisc *qdisc = q->qdisc;
0235
0236 return cbs_child_dequeue(sch, qdisc);
0237 }
0238
0239 static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
0240 {
0241 struct cbs_sched_data *q = qdisc_priv(sch);
0242
0243 return q->dequeue(sch);
0244 }
0245
0246 static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
0247 [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
0248 };
0249
0250 static void cbs_disable_offload(struct net_device *dev,
0251 struct cbs_sched_data *q)
0252 {
0253 struct tc_cbs_qopt_offload cbs = { };
0254 const struct net_device_ops *ops;
0255 int err;
0256
0257 if (!q->offload)
0258 return;
0259
0260 q->enqueue = cbs_enqueue_soft;
0261 q->dequeue = cbs_dequeue_soft;
0262
0263 ops = dev->netdev_ops;
0264 if (!ops->ndo_setup_tc)
0265 return;
0266
0267 cbs.queue = q->queue;
0268 cbs.enable = 0;
0269
0270 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
0271 if (err < 0)
0272 pr_warn("Couldn't disable CBS offload for queue %d\n",
0273 cbs.queue);
0274 }
0275
0276 static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
0277 const struct tc_cbs_qopt *opt,
0278 struct netlink_ext_ack *extack)
0279 {
0280 const struct net_device_ops *ops = dev->netdev_ops;
0281 struct tc_cbs_qopt_offload cbs = { };
0282 int err;
0283
0284 if (!ops->ndo_setup_tc) {
0285 NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload");
0286 return -EOPNOTSUPP;
0287 }
0288
0289 cbs.queue = q->queue;
0290
0291 cbs.enable = 1;
0292 cbs.hicredit = opt->hicredit;
0293 cbs.locredit = opt->locredit;
0294 cbs.idleslope = opt->idleslope;
0295 cbs.sendslope = opt->sendslope;
0296
0297 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
0298 if (err < 0) {
0299 NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload");
0300 return err;
0301 }
0302
0303 q->enqueue = cbs_enqueue_offload;
0304 q->dequeue = cbs_dequeue_offload;
0305
0306 return 0;
0307 }
0308
0309 static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
0310 {
0311 struct ethtool_link_ksettings ecmd;
0312 int speed = SPEED_10;
0313 int port_rate;
0314 int err;
0315
0316 err = __ethtool_get_link_ksettings(dev, &ecmd);
0317 if (err < 0)
0318 goto skip;
0319
0320 if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
0321 speed = ecmd.base.speed;
0322
0323 skip:
0324 port_rate = speed * 1000 * BYTES_PER_KBIT;
0325
0326 atomic64_set(&q->port_rate, port_rate);
0327 netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
0328 dev->name, (long long)atomic64_read(&q->port_rate),
0329 ecmd.base.speed);
0330 }
0331
0332 static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
0333 void *ptr)
0334 {
0335 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
0336 struct cbs_sched_data *q;
0337 struct net_device *qdev;
0338 bool found = false;
0339
0340 ASSERT_RTNL();
0341
0342 if (event != NETDEV_UP && event != NETDEV_CHANGE)
0343 return NOTIFY_DONE;
0344
0345 spin_lock(&cbs_list_lock);
0346 list_for_each_entry(q, &cbs_list, cbs_list) {
0347 qdev = qdisc_dev(q->qdisc);
0348 if (qdev == dev) {
0349 found = true;
0350 break;
0351 }
0352 }
0353 spin_unlock(&cbs_list_lock);
0354
0355 if (found)
0356 cbs_set_port_rate(dev, q);
0357
0358 return NOTIFY_DONE;
0359 }
0360
0361 static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
0362 struct netlink_ext_ack *extack)
0363 {
0364 struct cbs_sched_data *q = qdisc_priv(sch);
0365 struct net_device *dev = qdisc_dev(sch);
0366 struct nlattr *tb[TCA_CBS_MAX + 1];
0367 struct tc_cbs_qopt *qopt;
0368 int err;
0369
0370 err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy,
0371 extack);
0372 if (err < 0)
0373 return err;
0374
0375 if (!tb[TCA_CBS_PARMS]) {
0376 NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory");
0377 return -EINVAL;
0378 }
0379
0380 qopt = nla_data(tb[TCA_CBS_PARMS]);
0381
0382 if (!qopt->offload) {
0383 cbs_set_port_rate(dev, q);
0384 cbs_disable_offload(dev, q);
0385 } else {
0386 err = cbs_enable_offload(dev, q, qopt, extack);
0387 if (err < 0)
0388 return err;
0389 }
0390
0391
0392 q->hicredit = qopt->hicredit;
0393 q->locredit = qopt->locredit;
0394 q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
0395 q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
0396 q->offload = qopt->offload;
0397
0398 return 0;
0399 }
0400
0401 static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
0402 struct netlink_ext_ack *extack)
0403 {
0404 struct cbs_sched_data *q = qdisc_priv(sch);
0405 struct net_device *dev = qdisc_dev(sch);
0406
0407 if (!opt) {
0408 NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
0409 return -EINVAL;
0410 }
0411
0412 q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
0413 sch->handle, extack);
0414 if (!q->qdisc)
0415 return -ENOMEM;
0416
0417 spin_lock(&cbs_list_lock);
0418 list_add(&q->cbs_list, &cbs_list);
0419 spin_unlock(&cbs_list_lock);
0420
0421 qdisc_hash_add(q->qdisc, false);
0422
0423 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
0424
0425 q->enqueue = cbs_enqueue_soft;
0426 q->dequeue = cbs_dequeue_soft;
0427
0428 qdisc_watchdog_init(&q->watchdog, sch);
0429
0430 return cbs_change(sch, opt, extack);
0431 }
0432
0433 static void cbs_destroy(struct Qdisc *sch)
0434 {
0435 struct cbs_sched_data *q = qdisc_priv(sch);
0436 struct net_device *dev = qdisc_dev(sch);
0437
0438
0439 if (!q->qdisc)
0440 return;
0441
0442 qdisc_watchdog_cancel(&q->watchdog);
0443 cbs_disable_offload(dev, q);
0444
0445 spin_lock(&cbs_list_lock);
0446 list_del(&q->cbs_list);
0447 spin_unlock(&cbs_list_lock);
0448
0449 qdisc_put(q->qdisc);
0450 }
0451
0452 static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
0453 {
0454 struct cbs_sched_data *q = qdisc_priv(sch);
0455 struct tc_cbs_qopt opt = { };
0456 struct nlattr *nest;
0457
0458 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
0459 if (!nest)
0460 goto nla_put_failure;
0461
0462 opt.hicredit = q->hicredit;
0463 opt.locredit = q->locredit;
0464 opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
0465 opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
0466 opt.offload = q->offload;
0467
0468 if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
0469 goto nla_put_failure;
0470
0471 return nla_nest_end(skb, nest);
0472
0473 nla_put_failure:
0474 nla_nest_cancel(skb, nest);
0475 return -1;
0476 }
0477
0478 static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
0479 struct sk_buff *skb, struct tcmsg *tcm)
0480 {
0481 struct cbs_sched_data *q = qdisc_priv(sch);
0482
0483 if (cl != 1 || !q->qdisc)
0484 return -ENOENT;
0485
0486 tcm->tcm_handle |= TC_H_MIN(1);
0487 tcm->tcm_info = q->qdisc->handle;
0488
0489 return 0;
0490 }
0491
0492 static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
0493 struct Qdisc **old, struct netlink_ext_ack *extack)
0494 {
0495 struct cbs_sched_data *q = qdisc_priv(sch);
0496
0497 if (!new) {
0498 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
0499 sch->handle, NULL);
0500 if (!new)
0501 new = &noop_qdisc;
0502 }
0503
0504 *old = qdisc_replace(sch, new, &q->qdisc);
0505 return 0;
0506 }
0507
0508 static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
0509 {
0510 struct cbs_sched_data *q = qdisc_priv(sch);
0511
0512 return q->qdisc;
0513 }
0514
0515 static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
0516 {
0517 return 1;
0518 }
0519
0520 static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
0521 {
0522 if (!walker->stop) {
0523 if (walker->count >= walker->skip) {
0524 if (walker->fn(sch, 1, walker) < 0) {
0525 walker->stop = 1;
0526 return;
0527 }
0528 }
0529 walker->count++;
0530 }
0531 }
0532
0533 static const struct Qdisc_class_ops cbs_class_ops = {
0534 .graft = cbs_graft,
0535 .leaf = cbs_leaf,
0536 .find = cbs_find,
0537 .walk = cbs_walk,
0538 .dump = cbs_dump_class,
0539 };
0540
0541 static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
0542 .id = "cbs",
0543 .cl_ops = &cbs_class_ops,
0544 .priv_size = sizeof(struct cbs_sched_data),
0545 .enqueue = cbs_enqueue,
0546 .dequeue = cbs_dequeue,
0547 .peek = qdisc_peek_dequeued,
0548 .init = cbs_init,
0549 .reset = qdisc_reset_queue,
0550 .destroy = cbs_destroy,
0551 .change = cbs_change,
0552 .dump = cbs_dump,
0553 .owner = THIS_MODULE,
0554 };
0555
0556 static struct notifier_block cbs_device_notifier = {
0557 .notifier_call = cbs_dev_notifier,
0558 };
0559
0560 static int __init cbs_module_init(void)
0561 {
0562 int err;
0563
0564 err = register_netdevice_notifier(&cbs_device_notifier);
0565 if (err)
0566 return err;
0567
0568 err = register_qdisc(&cbs_qdisc_ops);
0569 if (err)
0570 unregister_netdevice_notifier(&cbs_device_notifier);
0571
0572 return err;
0573 }
0574
0575 static void __exit cbs_module_exit(void)
0576 {
0577 unregister_qdisc(&cbs_qdisc_ops);
0578 unregister_netdevice_notifier(&cbs_device_notifier);
0579 }
0580 module_init(cbs_module_init)
0581 module_exit(cbs_module_exit)
0582 MODULE_LICENSE("GPL");