Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
0003  *
0004  * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0005  */
0006 
0007 #include <linux/module.h>
0008 #include <linux/types.h>
0009 #include <linux/kernel.h>
0010 #include <linux/slab.h>
0011 #include <linux/string.h>
0012 #include <linux/errno.h>
0013 #include <linux/if_arp.h>
0014 #include <linux/netdevice.h>
0015 #include <linux/init.h>
0016 #include <linux/skbuff.h>
0017 #include <linux/moduleparam.h>
0018 #include <net/dst.h>
0019 #include <net/neighbour.h>
0020 #include <net/pkt_sched.h>
0021 
0022 /*
0023    How to setup it.
0024    ----------------
0025 
0026    After loading this module you will find a new device teqlN
0027    and new qdisc with the same name. To join a slave to the equalizer
0028    you should just set this qdisc on a device f.e.
0029 
0030    # tc qdisc add dev eth0 root teql0
0031    # tc qdisc add dev eth1 root teql0
0032 
0033    That's all. Full PnP 8)
0034 
0035    Applicability.
0036    --------------
0037 
0038    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
0039       signal and generate EOI events. If you want to equalize virtual devices
0040       like tunnels, use a normal eql device.
0041    2. This device puts no limitations on physical slave characteristics
0042       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
0043       Certainly, large difference in link speeds will make the resulting
0044       eqalized link unusable, because of huge packet reordering.
0045       I estimate an upper useful difference as ~10 times.
0046    3. If the slave requires address resolution, only protocols using
0047       neighbour cache (IPv4/IPv6) will work over the equalized link.
0048       Other protocols are still allowed to use the slave device directly,
0049       which will not break load balancing, though native slave
0050       traffic will have the highest priority.  */
0051 
0052 struct teql_master {
0053     struct Qdisc_ops qops;
0054     struct net_device *dev;
0055     struct Qdisc *slaves;
0056     struct list_head master_list;
0057     unsigned long   tx_bytes;
0058     unsigned long   tx_packets;
0059     unsigned long   tx_errors;
0060     unsigned long   tx_dropped;
0061 };
0062 
0063 struct teql_sched_data {
0064     struct Qdisc *next;
0065     struct teql_master *m;
0066     struct sk_buff_head q;
0067 };
0068 
0069 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
0070 
0071 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
0072 
0073 /* "teql*" qdisc routines */
0074 
0075 static int
0076 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
0077 {
0078     struct net_device *dev = qdisc_dev(sch);
0079     struct teql_sched_data *q = qdisc_priv(sch);
0080 
0081     if (q->q.qlen < dev->tx_queue_len) {
0082         __skb_queue_tail(&q->q, skb);
0083         return NET_XMIT_SUCCESS;
0084     }
0085 
0086     return qdisc_drop(skb, sch, to_free);
0087 }
0088 
0089 static struct sk_buff *
0090 teql_dequeue(struct Qdisc *sch)
0091 {
0092     struct teql_sched_data *dat = qdisc_priv(sch);
0093     struct netdev_queue *dat_queue;
0094     struct sk_buff *skb;
0095     struct Qdisc *q;
0096 
0097     skb = __skb_dequeue(&dat->q);
0098     dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
0099     q = rcu_dereference_bh(dat_queue->qdisc);
0100 
0101     if (skb == NULL) {
0102         struct net_device *m = qdisc_dev(q);
0103         if (m) {
0104             dat->m->slaves = sch;
0105             netif_wake_queue(m);
0106         }
0107     } else {
0108         qdisc_bstats_update(sch, skb);
0109     }
0110     sch->q.qlen = dat->q.qlen + q->q.qlen;
0111     return skb;
0112 }
0113 
0114 static struct sk_buff *
0115 teql_peek(struct Qdisc *sch)
0116 {
0117     /* teql is meant to be used as root qdisc */
0118     return NULL;
0119 }
0120 
0121 static void
0122 teql_reset(struct Qdisc *sch)
0123 {
0124     struct teql_sched_data *dat = qdisc_priv(sch);
0125 
0126     skb_queue_purge(&dat->q);
0127     sch->q.qlen = 0;
0128 }
0129 
0130 static void
0131 teql_destroy(struct Qdisc *sch)
0132 {
0133     struct Qdisc *q, *prev;
0134     struct teql_sched_data *dat = qdisc_priv(sch);
0135     struct teql_master *master = dat->m;
0136 
0137     if (!master)
0138         return;
0139 
0140     prev = master->slaves;
0141     if (prev) {
0142         do {
0143             q = NEXT_SLAVE(prev);
0144             if (q == sch) {
0145                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
0146                 if (q == master->slaves) {
0147                     master->slaves = NEXT_SLAVE(q);
0148                     if (q == master->slaves) {
0149                         struct netdev_queue *txq;
0150                         spinlock_t *root_lock;
0151 
0152                         txq = netdev_get_tx_queue(master->dev, 0);
0153                         master->slaves = NULL;
0154 
0155                         root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
0156                         spin_lock_bh(root_lock);
0157                         qdisc_reset(rtnl_dereference(txq->qdisc));
0158                         spin_unlock_bh(root_lock);
0159                     }
0160                 }
0161                 skb_queue_purge(&dat->q);
0162                 break;
0163             }
0164 
0165         } while ((prev = q) != master->slaves);
0166     }
0167 }
0168 
0169 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
0170                struct netlink_ext_ack *extack)
0171 {
0172     struct net_device *dev = qdisc_dev(sch);
0173     struct teql_master *m = (struct teql_master *)sch->ops;
0174     struct teql_sched_data *q = qdisc_priv(sch);
0175 
0176     if (dev->hard_header_len > m->dev->hard_header_len)
0177         return -EINVAL;
0178 
0179     if (m->dev == dev)
0180         return -ELOOP;
0181 
0182     q->m = m;
0183 
0184     skb_queue_head_init(&q->q);
0185 
0186     if (m->slaves) {
0187         if (m->dev->flags & IFF_UP) {
0188             if ((m->dev->flags & IFF_POINTOPOINT &&
0189                  !(dev->flags & IFF_POINTOPOINT)) ||
0190                 (m->dev->flags & IFF_BROADCAST &&
0191                  !(dev->flags & IFF_BROADCAST)) ||
0192                 (m->dev->flags & IFF_MULTICAST &&
0193                  !(dev->flags & IFF_MULTICAST)) ||
0194                 dev->mtu < m->dev->mtu)
0195                 return -EINVAL;
0196         } else {
0197             if (!(dev->flags&IFF_POINTOPOINT))
0198                 m->dev->flags &= ~IFF_POINTOPOINT;
0199             if (!(dev->flags&IFF_BROADCAST))
0200                 m->dev->flags &= ~IFF_BROADCAST;
0201             if (!(dev->flags&IFF_MULTICAST))
0202                 m->dev->flags &= ~IFF_MULTICAST;
0203             if (dev->mtu < m->dev->mtu)
0204                 m->dev->mtu = dev->mtu;
0205         }
0206         q->next = NEXT_SLAVE(m->slaves);
0207         NEXT_SLAVE(m->slaves) = sch;
0208     } else {
0209         q->next = sch;
0210         m->slaves = sch;
0211         m->dev->mtu = dev->mtu;
0212         m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
0213     }
0214     return 0;
0215 }
0216 
0217 
0218 static int
0219 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
0220            struct net_device *dev, struct netdev_queue *txq,
0221            struct dst_entry *dst)
0222 {
0223     struct neighbour *n;
0224     int err = 0;
0225 
0226     n = dst_neigh_lookup_skb(dst, skb);
0227     if (!n)
0228         return -ENOENT;
0229 
0230     if (dst->dev != dev) {
0231         struct neighbour *mn;
0232 
0233         mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
0234         neigh_release(n);
0235         if (IS_ERR(mn))
0236             return PTR_ERR(mn);
0237         n = mn;
0238     }
0239 
0240     if (neigh_event_send(n, skb_res) == 0) {
0241         int err;
0242         char haddr[MAX_ADDR_LEN];
0243 
0244         neigh_ha_snapshot(haddr, n, dev);
0245         err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
0246                       haddr, NULL, skb->len);
0247 
0248         if (err < 0)
0249             err = -EINVAL;
0250     } else {
0251         err = (skb_res == NULL) ? -EAGAIN : 1;
0252     }
0253     neigh_release(n);
0254     return err;
0255 }
0256 
0257 static inline int teql_resolve(struct sk_buff *skb,
0258                    struct sk_buff *skb_res,
0259                    struct net_device *dev,
0260                    struct netdev_queue *txq)
0261 {
0262     struct dst_entry *dst = skb_dst(skb);
0263     int res;
0264 
0265     if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
0266         return -ENODEV;
0267 
0268     if (!dev->header_ops || !dst)
0269         return 0;
0270 
0271     rcu_read_lock();
0272     res = __teql_resolve(skb, skb_res, dev, txq, dst);
0273     rcu_read_unlock();
0274 
0275     return res;
0276 }
0277 
0278 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
0279 {
0280     struct teql_master *master = netdev_priv(dev);
0281     struct Qdisc *start, *q;
0282     int busy;
0283     int nores;
0284     int subq = skb_get_queue_mapping(skb);
0285     struct sk_buff *skb_res = NULL;
0286 
0287     start = master->slaves;
0288 
0289 restart:
0290     nores = 0;
0291     busy = 0;
0292 
0293     q = start;
0294     if (!q)
0295         goto drop;
0296 
0297     do {
0298         struct net_device *slave = qdisc_dev(q);
0299         struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
0300 
0301         if (slave_txq->qdisc_sleeping != q)
0302             continue;
0303         if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
0304             !netif_running(slave)) {
0305             busy = 1;
0306             continue;
0307         }
0308 
0309         switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
0310         case 0:
0311             if (__netif_tx_trylock(slave_txq)) {
0312                 unsigned int length = qdisc_pkt_len(skb);
0313 
0314                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
0315                     netdev_start_xmit(skb, slave, slave_txq, false) ==
0316                     NETDEV_TX_OK) {
0317                     __netif_tx_unlock(slave_txq);
0318                     master->slaves = NEXT_SLAVE(q);
0319                     netif_wake_queue(dev);
0320                     master->tx_packets++;
0321                     master->tx_bytes += length;
0322                     return NETDEV_TX_OK;
0323                 }
0324                 __netif_tx_unlock(slave_txq);
0325             }
0326             if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
0327                 busy = 1;
0328             break;
0329         case 1:
0330             master->slaves = NEXT_SLAVE(q);
0331             return NETDEV_TX_OK;
0332         default:
0333             nores = 1;
0334             break;
0335         }
0336         __skb_pull(skb, skb_network_offset(skb));
0337     } while ((q = NEXT_SLAVE(q)) != start);
0338 
0339     if (nores && skb_res == NULL) {
0340         skb_res = skb;
0341         goto restart;
0342     }
0343 
0344     if (busy) {
0345         netif_stop_queue(dev);
0346         return NETDEV_TX_BUSY;
0347     }
0348     master->tx_errors++;
0349 
0350 drop:
0351     master->tx_dropped++;
0352     dev_kfree_skb(skb);
0353     return NETDEV_TX_OK;
0354 }
0355 
0356 static int teql_master_open(struct net_device *dev)
0357 {
0358     struct Qdisc *q;
0359     struct teql_master *m = netdev_priv(dev);
0360     int mtu = 0xFFFE;
0361     unsigned int flags = IFF_NOARP | IFF_MULTICAST;
0362 
0363     if (m->slaves == NULL)
0364         return -EUNATCH;
0365 
0366     flags = FMASK;
0367 
0368     q = m->slaves;
0369     do {
0370         struct net_device *slave = qdisc_dev(q);
0371 
0372         if (slave == NULL)
0373             return -EUNATCH;
0374 
0375         if (slave->mtu < mtu)
0376             mtu = slave->mtu;
0377         if (slave->hard_header_len > LL_MAX_HEADER)
0378             return -EINVAL;
0379 
0380         /* If all the slaves are BROADCAST, master is BROADCAST
0381            If all the slaves are PtP, master is PtP
0382            Otherwise, master is NBMA.
0383          */
0384         if (!(slave->flags&IFF_POINTOPOINT))
0385             flags &= ~IFF_POINTOPOINT;
0386         if (!(slave->flags&IFF_BROADCAST))
0387             flags &= ~IFF_BROADCAST;
0388         if (!(slave->flags&IFF_MULTICAST))
0389             flags &= ~IFF_MULTICAST;
0390     } while ((q = NEXT_SLAVE(q)) != m->slaves);
0391 
0392     m->dev->mtu = mtu;
0393     m->dev->flags = (m->dev->flags&~FMASK) | flags;
0394     netif_start_queue(m->dev);
0395     return 0;
0396 }
0397 
0398 static int teql_master_close(struct net_device *dev)
0399 {
0400     netif_stop_queue(dev);
0401     return 0;
0402 }
0403 
0404 static void teql_master_stats64(struct net_device *dev,
0405                 struct rtnl_link_stats64 *stats)
0406 {
0407     struct teql_master *m = netdev_priv(dev);
0408 
0409     stats->tx_packets   = m->tx_packets;
0410     stats->tx_bytes     = m->tx_bytes;
0411     stats->tx_errors    = m->tx_errors;
0412     stats->tx_dropped   = m->tx_dropped;
0413 }
0414 
0415 static int teql_master_mtu(struct net_device *dev, int new_mtu)
0416 {
0417     struct teql_master *m = netdev_priv(dev);
0418     struct Qdisc *q;
0419 
0420     q = m->slaves;
0421     if (q) {
0422         do {
0423             if (new_mtu > qdisc_dev(q)->mtu)
0424                 return -EINVAL;
0425         } while ((q = NEXT_SLAVE(q)) != m->slaves);
0426     }
0427 
0428     dev->mtu = new_mtu;
0429     return 0;
0430 }
0431 
0432 static const struct net_device_ops teql_netdev_ops = {
0433     .ndo_open   = teql_master_open,
0434     .ndo_stop   = teql_master_close,
0435     .ndo_start_xmit = teql_master_xmit,
0436     .ndo_get_stats64 = teql_master_stats64,
0437     .ndo_change_mtu = teql_master_mtu,
0438 };
0439 
0440 static __init void teql_master_setup(struct net_device *dev)
0441 {
0442     struct teql_master *master = netdev_priv(dev);
0443     struct Qdisc_ops *ops = &master->qops;
0444 
0445     master->dev = dev;
0446     ops->priv_size  = sizeof(struct teql_sched_data);
0447 
0448     ops->enqueue    =   teql_enqueue;
0449     ops->dequeue    =   teql_dequeue;
0450     ops->peek   =   teql_peek;
0451     ops->init   =   teql_qdisc_init;
0452     ops->reset  =   teql_reset;
0453     ops->destroy    =   teql_destroy;
0454     ops->owner  =   THIS_MODULE;
0455 
0456     dev->netdev_ops =       &teql_netdev_ops;
0457     dev->type       = ARPHRD_VOID;
0458     dev->mtu        = 1500;
0459     dev->min_mtu        = 68;
0460     dev->max_mtu        = 65535;
0461     dev->tx_queue_len   = 100;
0462     dev->flags      = IFF_NOARP;
0463     dev->hard_header_len    = LL_MAX_HEADER;
0464     netif_keep_dst(dev);
0465 }
0466 
0467 static LIST_HEAD(master_dev_list);
0468 static int max_equalizers = 1;
0469 module_param(max_equalizers, int, 0);
0470 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
0471 
0472 static int __init teql_init(void)
0473 {
0474     int i;
0475     int err = -ENODEV;
0476 
0477     for (i = 0; i < max_equalizers; i++) {
0478         struct net_device *dev;
0479         struct teql_master *master;
0480 
0481         dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
0482                    NET_NAME_UNKNOWN, teql_master_setup);
0483         if (!dev) {
0484             err = -ENOMEM;
0485             break;
0486         }
0487 
0488         if ((err = register_netdev(dev))) {
0489             free_netdev(dev);
0490             break;
0491         }
0492 
0493         master = netdev_priv(dev);
0494 
0495         strlcpy(master->qops.id, dev->name, IFNAMSIZ);
0496         err = register_qdisc(&master->qops);
0497 
0498         if (err) {
0499             unregister_netdev(dev);
0500             free_netdev(dev);
0501             break;
0502         }
0503 
0504         list_add_tail(&master->master_list, &master_dev_list);
0505     }
0506     return i ? 0 : err;
0507 }
0508 
0509 static void __exit teql_exit(void)
0510 {
0511     struct teql_master *master, *nxt;
0512 
0513     list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
0514 
0515         list_del(&master->master_list);
0516 
0517         unregister_qdisc(&master->qops);
0518         unregister_netdev(master->dev);
0519         free_netdev(master->dev);
0520     }
0521 }
0522 
0523 module_init(teql_init);
0524 module_exit(teql_exit);
0525 
0526 MODULE_LICENSE("GPL");