Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* drivers/net/ifb.c:
0003 
0004     The purpose of this driver is to provide a device that allows
0005     for sharing of resources:
0006 
0007     1) qdiscs/policies that are per device as opposed to system wide.
0008     ifb allows for a device which can be redirected to thus providing
0009     an impression of sharing.
0010 
0011     2) Allows for queueing incoming traffic for shaping instead of
0012     dropping.
0013 
0014     The original concept is based on what is known as the IMQ
0015     driver initially written by Martin Devera, later rewritten
0016     by Patrick McHardy and then maintained by Andre Correa.
0017 
0018     You need the tc action  mirror or redirect to feed this device
0019     packets.
0020 
0021 
0022     Authors:    Jamal Hadi Salim (2005)
0023 
0024 */
0025 
0026 
0027 #include <linux/module.h>
0028 #include <linux/kernel.h>
0029 #include <linux/netdevice.h>
0030 #include <linux/ethtool.h>
0031 #include <linux/etherdevice.h>
0032 #include <linux/init.h>
0033 #include <linux/interrupt.h>
0034 #include <linux/moduleparam.h>
0035 #include <linux/netfilter_netdev.h>
0036 #include <net/pkt_sched.h>
0037 #include <net/net_namespace.h>
0038 
0039 #define TX_Q_LIMIT    32
0040 
0041 struct ifb_q_stats {
0042     u64 packets;
0043     u64 bytes;
0044     struct u64_stats_sync   sync;
0045 };
0046 
0047 struct ifb_q_private {
0048     struct net_device   *dev;
0049     struct tasklet_struct   ifb_tasklet;
0050     int         tasklet_pending;
0051     int         txqnum;
0052     struct sk_buff_head     rq;
0053     struct sk_buff_head     tq;
0054     struct ifb_q_stats  rx_stats;
0055     struct ifb_q_stats  tx_stats;
0056 } ____cacheline_aligned_in_smp;
0057 
0058 struct ifb_dev_private {
0059     struct ifb_q_private *tx_private;
0060 };
0061 
0062 /* For ethtools stats. */
0063 struct ifb_q_stats_desc {
0064     char    desc[ETH_GSTRING_LEN];
0065     size_t  offset;
0066 };
0067 
0068 #define IFB_Q_STAT(m)   offsetof(struct ifb_q_stats, m)
0069 
0070 static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
0071     { "packets",    IFB_Q_STAT(packets) },
0072     { "bytes",  IFB_Q_STAT(bytes) },
0073 };
0074 
0075 #define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc)
0076 
0077 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
0078 static int ifb_open(struct net_device *dev);
0079 static int ifb_close(struct net_device *dev);
0080 
0081 static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
0082 {
0083     u64_stats_update_begin(&stats->sync);
0084     stats->packets++;
0085     stats->bytes += len;
0086     u64_stats_update_end(&stats->sync);
0087 }
0088 
0089 static void ifb_ri_tasklet(struct tasklet_struct *t)
0090 {
0091     struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
0092     struct netdev_queue *txq;
0093     struct sk_buff *skb;
0094 
0095     txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
0096     skb = skb_peek(&txp->tq);
0097     if (!skb) {
0098         if (!__netif_tx_trylock(txq))
0099             goto resched;
0100         skb_queue_splice_tail_init(&txp->rq, &txp->tq);
0101         __netif_tx_unlock(txq);
0102     }
0103 
0104     while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
0105         /* Skip tc and netfilter to prevent redirection loop. */
0106         skb->redirected = 0;
0107 #ifdef CONFIG_NET_CLS_ACT
0108         skb->tc_skip_classify = 1;
0109 #endif
0110         nf_skip_egress(skb, true);
0111 
0112         ifb_update_q_stats(&txp->tx_stats, skb->len);
0113 
0114         rcu_read_lock();
0115         skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
0116         if (!skb->dev) {
0117             rcu_read_unlock();
0118             dev_kfree_skb(skb);
0119             txp->dev->stats.tx_dropped++;
0120             if (skb_queue_len(&txp->tq) != 0)
0121                 goto resched;
0122             break;
0123         }
0124         rcu_read_unlock();
0125         skb->skb_iif = txp->dev->ifindex;
0126 
0127         if (!skb->from_ingress) {
0128             dev_queue_xmit(skb);
0129         } else {
0130             skb_pull_rcsum(skb, skb->mac_len);
0131             netif_receive_skb(skb);
0132         }
0133     }
0134 
0135     if (__netif_tx_trylock(txq)) {
0136         skb = skb_peek(&txp->rq);
0137         if (!skb) {
0138             txp->tasklet_pending = 0;
0139             if (netif_tx_queue_stopped(txq))
0140                 netif_tx_wake_queue(txq);
0141         } else {
0142             __netif_tx_unlock(txq);
0143             goto resched;
0144         }
0145         __netif_tx_unlock(txq);
0146     } else {
0147 resched:
0148         txp->tasklet_pending = 1;
0149         tasklet_schedule(&txp->ifb_tasklet);
0150     }
0151 
0152 }
0153 
0154 static void ifb_stats64(struct net_device *dev,
0155             struct rtnl_link_stats64 *stats)
0156 {
0157     struct ifb_dev_private *dp = netdev_priv(dev);
0158     struct ifb_q_private *txp = dp->tx_private;
0159     unsigned int start;
0160     u64 packets, bytes;
0161     int i;
0162 
0163     for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0164         do {
0165             start = u64_stats_fetch_begin_irq(&txp->rx_stats.sync);
0166             packets = txp->rx_stats.packets;
0167             bytes = txp->rx_stats.bytes;
0168         } while (u64_stats_fetch_retry_irq(&txp->rx_stats.sync, start));
0169         stats->rx_packets += packets;
0170         stats->rx_bytes += bytes;
0171 
0172         do {
0173             start = u64_stats_fetch_begin_irq(&txp->tx_stats.sync);
0174             packets = txp->tx_stats.packets;
0175             bytes = txp->tx_stats.bytes;
0176         } while (u64_stats_fetch_retry_irq(&txp->tx_stats.sync, start));
0177         stats->tx_packets += packets;
0178         stats->tx_bytes += bytes;
0179     }
0180     stats->rx_dropped = dev->stats.rx_dropped;
0181     stats->tx_dropped = dev->stats.tx_dropped;
0182 }
0183 
0184 static int ifb_dev_init(struct net_device *dev)
0185 {
0186     struct ifb_dev_private *dp = netdev_priv(dev);
0187     struct ifb_q_private *txp;
0188     int i;
0189 
0190     txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
0191     if (!txp)
0192         return -ENOMEM;
0193     dp->tx_private = txp;
0194     for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0195         txp->txqnum = i;
0196         txp->dev = dev;
0197         __skb_queue_head_init(&txp->rq);
0198         __skb_queue_head_init(&txp->tq);
0199         u64_stats_init(&txp->rx_stats.sync);
0200         u64_stats_init(&txp->tx_stats.sync);
0201         tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
0202         netif_tx_start_queue(netdev_get_tx_queue(dev, i));
0203     }
0204     return 0;
0205 }
0206 
0207 static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
0208 {
0209     u8 *p = buf;
0210     int i, j;
0211 
0212     switch (stringset) {
0213     case ETH_SS_STATS:
0214         for (i = 0; i < dev->real_num_rx_queues; i++)
0215             for (j = 0; j < IFB_Q_STATS_LEN; j++)
0216                 ethtool_sprintf(&p, "rx_queue_%u_%.18s",
0217                         i, ifb_q_stats_desc[j].desc);
0218 
0219         for (i = 0; i < dev->real_num_tx_queues; i++)
0220             for (j = 0; j < IFB_Q_STATS_LEN; j++)
0221                 ethtool_sprintf(&p, "tx_queue_%u_%.18s",
0222                         i, ifb_q_stats_desc[j].desc);
0223 
0224         break;
0225     }
0226 }
0227 
0228 static int ifb_get_sset_count(struct net_device *dev, int sset)
0229 {
0230     switch (sset) {
0231     case ETH_SS_STATS:
0232         return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
0233                       dev->real_num_tx_queues);
0234     default:
0235         return -EOPNOTSUPP;
0236     }
0237 }
0238 
0239 static void ifb_fill_stats_data(u64 **data,
0240                 struct ifb_q_stats *q_stats)
0241 {
0242     void *stats_base = (void *)q_stats;
0243     unsigned int start;
0244     size_t offset;
0245     int j;
0246 
0247     do {
0248         start = u64_stats_fetch_begin_irq(&q_stats->sync);
0249         for (j = 0; j < IFB_Q_STATS_LEN; j++) {
0250             offset = ifb_q_stats_desc[j].offset;
0251             (*data)[j] = *(u64 *)(stats_base + offset);
0252         }
0253     } while (u64_stats_fetch_retry_irq(&q_stats->sync, start));
0254 
0255     *data += IFB_Q_STATS_LEN;
0256 }
0257 
0258 static void ifb_get_ethtool_stats(struct net_device *dev,
0259                   struct ethtool_stats *stats, u64 *data)
0260 {
0261     struct ifb_dev_private *dp = netdev_priv(dev);
0262     struct ifb_q_private *txp;
0263     int i;
0264 
0265     for (i = 0; i < dev->real_num_rx_queues; i++) {
0266         txp = dp->tx_private + i;
0267         ifb_fill_stats_data(&data, &txp->rx_stats);
0268     }
0269 
0270     for (i = 0; i < dev->real_num_tx_queues; i++) {
0271         txp = dp->tx_private + i;
0272         ifb_fill_stats_data(&data, &txp->tx_stats);
0273     }
0274 }
0275 
0276 static const struct net_device_ops ifb_netdev_ops = {
0277     .ndo_open   = ifb_open,
0278     .ndo_stop   = ifb_close,
0279     .ndo_get_stats64 = ifb_stats64,
0280     .ndo_start_xmit = ifb_xmit,
0281     .ndo_validate_addr = eth_validate_addr,
0282     .ndo_init   = ifb_dev_init,
0283 };
0284 
0285 static const struct ethtool_ops ifb_ethtool_ops = {
0286     .get_strings        = ifb_get_strings,
0287     .get_sset_count     = ifb_get_sset_count,
0288     .get_ethtool_stats  = ifb_get_ethtool_stats,
0289 };
0290 
0291 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST  | \
0292               NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL  | \
0293               NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX     | \
0294               NETIF_F_HW_VLAN_STAG_TX)
0295 
0296 static void ifb_dev_free(struct net_device *dev)
0297 {
0298     struct ifb_dev_private *dp = netdev_priv(dev);
0299     struct ifb_q_private *txp = dp->tx_private;
0300     int i;
0301 
0302     for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0303         tasklet_kill(&txp->ifb_tasklet);
0304         __skb_queue_purge(&txp->rq);
0305         __skb_queue_purge(&txp->tq);
0306     }
0307     kfree(dp->tx_private);
0308 }
0309 
0310 static void ifb_setup(struct net_device *dev)
0311 {
0312     /* Initialize the device structure. */
0313     dev->netdev_ops = &ifb_netdev_ops;
0314     dev->ethtool_ops = &ifb_ethtool_ops;
0315 
0316     /* Fill in device structure with ethernet-generic values. */
0317     ether_setup(dev);
0318     dev->tx_queue_len = TX_Q_LIMIT;
0319 
0320     dev->features |= IFB_FEATURES;
0321     dev->hw_features |= dev->features;
0322     dev->hw_enc_features |= dev->features;
0323     dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
0324                            NETIF_F_HW_VLAN_STAG_TX);
0325 
0326     dev->flags |= IFF_NOARP;
0327     dev->flags &= ~IFF_MULTICAST;
0328     dev->priv_flags &= ~IFF_TX_SKB_SHARING;
0329     netif_keep_dst(dev);
0330     eth_hw_addr_random(dev);
0331     dev->needs_free_netdev = true;
0332     dev->priv_destructor = ifb_dev_free;
0333 
0334     dev->min_mtu = 0;
0335     dev->max_mtu = 0;
0336 }
0337 
0338 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
0339 {
0340     struct ifb_dev_private *dp = netdev_priv(dev);
0341     struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
0342 
0343     ifb_update_q_stats(&txp->rx_stats, skb->len);
0344 
0345     if (!skb->redirected || !skb->skb_iif) {
0346         dev_kfree_skb(skb);
0347         dev->stats.rx_dropped++;
0348         return NETDEV_TX_OK;
0349     }
0350 
0351     if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
0352         netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
0353 
0354     __skb_queue_tail(&txp->rq, skb);
0355     if (!txp->tasklet_pending) {
0356         txp->tasklet_pending = 1;
0357         tasklet_schedule(&txp->ifb_tasklet);
0358     }
0359 
0360     return NETDEV_TX_OK;
0361 }
0362 
0363 static int ifb_close(struct net_device *dev)
0364 {
0365     netif_tx_stop_all_queues(dev);
0366     return 0;
0367 }
0368 
0369 static int ifb_open(struct net_device *dev)
0370 {
0371     netif_tx_start_all_queues(dev);
0372     return 0;
0373 }
0374 
0375 static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
0376             struct netlink_ext_ack *extack)
0377 {
0378     if (tb[IFLA_ADDRESS]) {
0379         if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
0380             return -EINVAL;
0381         if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
0382             return -EADDRNOTAVAIL;
0383     }
0384     return 0;
0385 }
0386 
0387 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
0388     .kind       = "ifb",
0389     .priv_size  = sizeof(struct ifb_dev_private),
0390     .setup      = ifb_setup,
0391     .validate   = ifb_validate,
0392 };
0393 
0394 /* Number of ifb devices to be set up by this module.
0395  * Note that these legacy devices have one queue.
0396  * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
0397  */
0398 static int numifbs = 2;
0399 module_param(numifbs, int, 0);
0400 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
0401 
0402 static int __init ifb_init_one(int index)
0403 {
0404     struct net_device *dev_ifb;
0405     int err;
0406 
0407     dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
0408                    NET_NAME_UNKNOWN, ifb_setup);
0409 
0410     if (!dev_ifb)
0411         return -ENOMEM;
0412 
0413     dev_ifb->rtnl_link_ops = &ifb_link_ops;
0414     err = register_netdevice(dev_ifb);
0415     if (err < 0)
0416         goto err;
0417 
0418     return 0;
0419 
0420 err:
0421     free_netdev(dev_ifb);
0422     return err;
0423 }
0424 
0425 static int __init ifb_init_module(void)
0426 {
0427     int i, err;
0428 
0429     down_write(&pernet_ops_rwsem);
0430     rtnl_lock();
0431     err = __rtnl_link_register(&ifb_link_ops);
0432     if (err < 0)
0433         goto out;
0434 
0435     for (i = 0; i < numifbs && !err; i++) {
0436         err = ifb_init_one(i);
0437         cond_resched();
0438     }
0439     if (err)
0440         __rtnl_link_unregister(&ifb_link_ops);
0441 
0442 out:
0443     rtnl_unlock();
0444     up_write(&pernet_ops_rwsem);
0445 
0446     return err;
0447 }
0448 
0449 static void __exit ifb_cleanup_module(void)
0450 {
0451     rtnl_link_unregister(&ifb_link_ops);
0452 }
0453 
0454 module_init(ifb_init_module);
0455 module_exit(ifb_cleanup_module);
0456 MODULE_LICENSE("GPL");
0457 MODULE_AUTHOR("Jamal Hadi Salim");
0458 MODULE_ALIAS_RTNL_LINK("ifb");