0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #include <linux/module.h>
0028 #include <linux/kernel.h>
0029 #include <linux/netdevice.h>
0030 #include <linux/ethtool.h>
0031 #include <linux/etherdevice.h>
0032 #include <linux/init.h>
0033 #include <linux/interrupt.h>
0034 #include <linux/moduleparam.h>
0035 #include <linux/netfilter_netdev.h>
0036 #include <net/pkt_sched.h>
0037 #include <net/net_namespace.h>
0038
0039 #define TX_Q_LIMIT 32
0040
0041 struct ifb_q_stats {
0042 u64 packets;
0043 u64 bytes;
0044 struct u64_stats_sync sync;
0045 };
0046
0047 struct ifb_q_private {
0048 struct net_device *dev;
0049 struct tasklet_struct ifb_tasklet;
0050 int tasklet_pending;
0051 int txqnum;
0052 struct sk_buff_head rq;
0053 struct sk_buff_head tq;
0054 struct ifb_q_stats rx_stats;
0055 struct ifb_q_stats tx_stats;
0056 } ____cacheline_aligned_in_smp;
0057
0058 struct ifb_dev_private {
0059 struct ifb_q_private *tx_private;
0060 };
0061
0062
0063 struct ifb_q_stats_desc {
0064 char desc[ETH_GSTRING_LEN];
0065 size_t offset;
0066 };
0067
0068 #define IFB_Q_STAT(m) offsetof(struct ifb_q_stats, m)
0069
0070 static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
0071 { "packets", IFB_Q_STAT(packets) },
0072 { "bytes", IFB_Q_STAT(bytes) },
0073 };
0074
0075 #define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc)
0076
0077 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
0078 static int ifb_open(struct net_device *dev);
0079 static int ifb_close(struct net_device *dev);
0080
0081 static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
0082 {
0083 u64_stats_update_begin(&stats->sync);
0084 stats->packets++;
0085 stats->bytes += len;
0086 u64_stats_update_end(&stats->sync);
0087 }
0088
0089 static void ifb_ri_tasklet(struct tasklet_struct *t)
0090 {
0091 struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
0092 struct netdev_queue *txq;
0093 struct sk_buff *skb;
0094
0095 txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
0096 skb = skb_peek(&txp->tq);
0097 if (!skb) {
0098 if (!__netif_tx_trylock(txq))
0099 goto resched;
0100 skb_queue_splice_tail_init(&txp->rq, &txp->tq);
0101 __netif_tx_unlock(txq);
0102 }
0103
0104 while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
0105
0106 skb->redirected = 0;
0107 #ifdef CONFIG_NET_CLS_ACT
0108 skb->tc_skip_classify = 1;
0109 #endif
0110 nf_skip_egress(skb, true);
0111
0112 ifb_update_q_stats(&txp->tx_stats, skb->len);
0113
0114 rcu_read_lock();
0115 skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
0116 if (!skb->dev) {
0117 rcu_read_unlock();
0118 dev_kfree_skb(skb);
0119 txp->dev->stats.tx_dropped++;
0120 if (skb_queue_len(&txp->tq) != 0)
0121 goto resched;
0122 break;
0123 }
0124 rcu_read_unlock();
0125 skb->skb_iif = txp->dev->ifindex;
0126
0127 if (!skb->from_ingress) {
0128 dev_queue_xmit(skb);
0129 } else {
0130 skb_pull_rcsum(skb, skb->mac_len);
0131 netif_receive_skb(skb);
0132 }
0133 }
0134
0135 if (__netif_tx_trylock(txq)) {
0136 skb = skb_peek(&txp->rq);
0137 if (!skb) {
0138 txp->tasklet_pending = 0;
0139 if (netif_tx_queue_stopped(txq))
0140 netif_tx_wake_queue(txq);
0141 } else {
0142 __netif_tx_unlock(txq);
0143 goto resched;
0144 }
0145 __netif_tx_unlock(txq);
0146 } else {
0147 resched:
0148 txp->tasklet_pending = 1;
0149 tasklet_schedule(&txp->ifb_tasklet);
0150 }
0151
0152 }
0153
0154 static void ifb_stats64(struct net_device *dev,
0155 struct rtnl_link_stats64 *stats)
0156 {
0157 struct ifb_dev_private *dp = netdev_priv(dev);
0158 struct ifb_q_private *txp = dp->tx_private;
0159 unsigned int start;
0160 u64 packets, bytes;
0161 int i;
0162
0163 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0164 do {
0165 start = u64_stats_fetch_begin_irq(&txp->rx_stats.sync);
0166 packets = txp->rx_stats.packets;
0167 bytes = txp->rx_stats.bytes;
0168 } while (u64_stats_fetch_retry_irq(&txp->rx_stats.sync, start));
0169 stats->rx_packets += packets;
0170 stats->rx_bytes += bytes;
0171
0172 do {
0173 start = u64_stats_fetch_begin_irq(&txp->tx_stats.sync);
0174 packets = txp->tx_stats.packets;
0175 bytes = txp->tx_stats.bytes;
0176 } while (u64_stats_fetch_retry_irq(&txp->tx_stats.sync, start));
0177 stats->tx_packets += packets;
0178 stats->tx_bytes += bytes;
0179 }
0180 stats->rx_dropped = dev->stats.rx_dropped;
0181 stats->tx_dropped = dev->stats.tx_dropped;
0182 }
0183
0184 static int ifb_dev_init(struct net_device *dev)
0185 {
0186 struct ifb_dev_private *dp = netdev_priv(dev);
0187 struct ifb_q_private *txp;
0188 int i;
0189
0190 txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
0191 if (!txp)
0192 return -ENOMEM;
0193 dp->tx_private = txp;
0194 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0195 txp->txqnum = i;
0196 txp->dev = dev;
0197 __skb_queue_head_init(&txp->rq);
0198 __skb_queue_head_init(&txp->tq);
0199 u64_stats_init(&txp->rx_stats.sync);
0200 u64_stats_init(&txp->tx_stats.sync);
0201 tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
0202 netif_tx_start_queue(netdev_get_tx_queue(dev, i));
0203 }
0204 return 0;
0205 }
0206
0207 static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
0208 {
0209 u8 *p = buf;
0210 int i, j;
0211
0212 switch (stringset) {
0213 case ETH_SS_STATS:
0214 for (i = 0; i < dev->real_num_rx_queues; i++)
0215 for (j = 0; j < IFB_Q_STATS_LEN; j++)
0216 ethtool_sprintf(&p, "rx_queue_%u_%.18s",
0217 i, ifb_q_stats_desc[j].desc);
0218
0219 for (i = 0; i < dev->real_num_tx_queues; i++)
0220 for (j = 0; j < IFB_Q_STATS_LEN; j++)
0221 ethtool_sprintf(&p, "tx_queue_%u_%.18s",
0222 i, ifb_q_stats_desc[j].desc);
0223
0224 break;
0225 }
0226 }
0227
0228 static int ifb_get_sset_count(struct net_device *dev, int sset)
0229 {
0230 switch (sset) {
0231 case ETH_SS_STATS:
0232 return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
0233 dev->real_num_tx_queues);
0234 default:
0235 return -EOPNOTSUPP;
0236 }
0237 }
0238
0239 static void ifb_fill_stats_data(u64 **data,
0240 struct ifb_q_stats *q_stats)
0241 {
0242 void *stats_base = (void *)q_stats;
0243 unsigned int start;
0244 size_t offset;
0245 int j;
0246
0247 do {
0248 start = u64_stats_fetch_begin_irq(&q_stats->sync);
0249 for (j = 0; j < IFB_Q_STATS_LEN; j++) {
0250 offset = ifb_q_stats_desc[j].offset;
0251 (*data)[j] = *(u64 *)(stats_base + offset);
0252 }
0253 } while (u64_stats_fetch_retry_irq(&q_stats->sync, start));
0254
0255 *data += IFB_Q_STATS_LEN;
0256 }
0257
0258 static void ifb_get_ethtool_stats(struct net_device *dev,
0259 struct ethtool_stats *stats, u64 *data)
0260 {
0261 struct ifb_dev_private *dp = netdev_priv(dev);
0262 struct ifb_q_private *txp;
0263 int i;
0264
0265 for (i = 0; i < dev->real_num_rx_queues; i++) {
0266 txp = dp->tx_private + i;
0267 ifb_fill_stats_data(&data, &txp->rx_stats);
0268 }
0269
0270 for (i = 0; i < dev->real_num_tx_queues; i++) {
0271 txp = dp->tx_private + i;
0272 ifb_fill_stats_data(&data, &txp->tx_stats);
0273 }
0274 }
0275
0276 static const struct net_device_ops ifb_netdev_ops = {
0277 .ndo_open = ifb_open,
0278 .ndo_stop = ifb_close,
0279 .ndo_get_stats64 = ifb_stats64,
0280 .ndo_start_xmit = ifb_xmit,
0281 .ndo_validate_addr = eth_validate_addr,
0282 .ndo_init = ifb_dev_init,
0283 };
0284
0285 static const struct ethtool_ops ifb_ethtool_ops = {
0286 .get_strings = ifb_get_strings,
0287 .get_sset_count = ifb_get_sset_count,
0288 .get_ethtool_stats = ifb_get_ethtool_stats,
0289 };
0290
0291 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
0292 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
0293 NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \
0294 NETIF_F_HW_VLAN_STAG_TX)
0295
0296 static void ifb_dev_free(struct net_device *dev)
0297 {
0298 struct ifb_dev_private *dp = netdev_priv(dev);
0299 struct ifb_q_private *txp = dp->tx_private;
0300 int i;
0301
0302 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
0303 tasklet_kill(&txp->ifb_tasklet);
0304 __skb_queue_purge(&txp->rq);
0305 __skb_queue_purge(&txp->tq);
0306 }
0307 kfree(dp->tx_private);
0308 }
0309
0310 static void ifb_setup(struct net_device *dev)
0311 {
0312
0313 dev->netdev_ops = &ifb_netdev_ops;
0314 dev->ethtool_ops = &ifb_ethtool_ops;
0315
0316
0317 ether_setup(dev);
0318 dev->tx_queue_len = TX_Q_LIMIT;
0319
0320 dev->features |= IFB_FEATURES;
0321 dev->hw_features |= dev->features;
0322 dev->hw_enc_features |= dev->features;
0323 dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
0324 NETIF_F_HW_VLAN_STAG_TX);
0325
0326 dev->flags |= IFF_NOARP;
0327 dev->flags &= ~IFF_MULTICAST;
0328 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
0329 netif_keep_dst(dev);
0330 eth_hw_addr_random(dev);
0331 dev->needs_free_netdev = true;
0332 dev->priv_destructor = ifb_dev_free;
0333
0334 dev->min_mtu = 0;
0335 dev->max_mtu = 0;
0336 }
0337
0338 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
0339 {
0340 struct ifb_dev_private *dp = netdev_priv(dev);
0341 struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
0342
0343 ifb_update_q_stats(&txp->rx_stats, skb->len);
0344
0345 if (!skb->redirected || !skb->skb_iif) {
0346 dev_kfree_skb(skb);
0347 dev->stats.rx_dropped++;
0348 return NETDEV_TX_OK;
0349 }
0350
0351 if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
0352 netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
0353
0354 __skb_queue_tail(&txp->rq, skb);
0355 if (!txp->tasklet_pending) {
0356 txp->tasklet_pending = 1;
0357 tasklet_schedule(&txp->ifb_tasklet);
0358 }
0359
0360 return NETDEV_TX_OK;
0361 }
0362
0363 static int ifb_close(struct net_device *dev)
0364 {
0365 netif_tx_stop_all_queues(dev);
0366 return 0;
0367 }
0368
0369 static int ifb_open(struct net_device *dev)
0370 {
0371 netif_tx_start_all_queues(dev);
0372 return 0;
0373 }
0374
0375 static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
0376 struct netlink_ext_ack *extack)
0377 {
0378 if (tb[IFLA_ADDRESS]) {
0379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
0380 return -EINVAL;
0381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
0382 return -EADDRNOTAVAIL;
0383 }
0384 return 0;
0385 }
0386
0387 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
0388 .kind = "ifb",
0389 .priv_size = sizeof(struct ifb_dev_private),
0390 .setup = ifb_setup,
0391 .validate = ifb_validate,
0392 };
0393
0394
0395
0396
0397
0398 static int numifbs = 2;
0399 module_param(numifbs, int, 0);
0400 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
0401
0402 static int __init ifb_init_one(int index)
0403 {
0404 struct net_device *dev_ifb;
0405 int err;
0406
0407 dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
0408 NET_NAME_UNKNOWN, ifb_setup);
0409
0410 if (!dev_ifb)
0411 return -ENOMEM;
0412
0413 dev_ifb->rtnl_link_ops = &ifb_link_ops;
0414 err = register_netdevice(dev_ifb);
0415 if (err < 0)
0416 goto err;
0417
0418 return 0;
0419
0420 err:
0421 free_netdev(dev_ifb);
0422 return err;
0423 }
0424
0425 static int __init ifb_init_module(void)
0426 {
0427 int i, err;
0428
0429 down_write(&pernet_ops_rwsem);
0430 rtnl_lock();
0431 err = __rtnl_link_register(&ifb_link_ops);
0432 if (err < 0)
0433 goto out;
0434
0435 for (i = 0; i < numifbs && !err; i++) {
0436 err = ifb_init_one(i);
0437 cond_resched();
0438 }
0439 if (err)
0440 __rtnl_link_unregister(&ifb_link_ops);
0441
0442 out:
0443 rtnl_unlock();
0444 up_write(&pernet_ops_rwsem);
0445
0446 return err;
0447 }
0448
0449 static void __exit ifb_cleanup_module(void)
0450 {
0451 rtnl_link_unregister(&ifb_link_ops);
0452 }
0453
0454 module_init(ifb_init_module);
0455 module_exit(ifb_cleanup_module);
0456 MODULE_LICENSE("GPL");
0457 MODULE_AUTHOR("Jamal Hadi Salim");
0458 MODULE_ALIAS_RTNL_LINK("ifb");