0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 #include <linux/uaccess.h>
0072 #include <linux/bitops.h>
0073 #include <linux/capability.h>
0074 #include <linux/cpu.h>
0075 #include <linux/types.h>
0076 #include <linux/kernel.h>
0077 #include <linux/hash.h>
0078 #include <linux/slab.h>
0079 #include <linux/sched.h>
0080 #include <linux/sched/mm.h>
0081 #include <linux/mutex.h>
0082 #include <linux/rwsem.h>
0083 #include <linux/string.h>
0084 #include <linux/mm.h>
0085 #include <linux/socket.h>
0086 #include <linux/sockios.h>
0087 #include <linux/errno.h>
0088 #include <linux/interrupt.h>
0089 #include <linux/if_ether.h>
0090 #include <linux/netdevice.h>
0091 #include <linux/etherdevice.h>
0092 #include <linux/ethtool.h>
0093 #include <linux/skbuff.h>
0094 #include <linux/kthread.h>
0095 #include <linux/bpf.h>
0096 #include <linux/bpf_trace.h>
0097 #include <net/net_namespace.h>
0098 #include <net/sock.h>
0099 #include <net/busy_poll.h>
0100 #include <linux/rtnetlink.h>
0101 #include <linux/stat.h>
0102 #include <net/dsa.h>
0103 #include <net/dst.h>
0104 #include <net/dst_metadata.h>
0105 #include <net/gro.h>
0106 #include <net/pkt_sched.h>
0107 #include <net/pkt_cls.h>
0108 #include <net/checksum.h>
0109 #include <net/xfrm.h>
0110 #include <linux/highmem.h>
0111 #include <linux/init.h>
0112 #include <linux/module.h>
0113 #include <linux/netpoll.h>
0114 #include <linux/rcupdate.h>
0115 #include <linux/delay.h>
0116 #include <net/iw_handler.h>
0117 #include <asm/current.h>
0118 #include <linux/audit.h>
0119 #include <linux/dmaengine.h>
0120 #include <linux/err.h>
0121 #include <linux/ctype.h>
0122 #include <linux/if_arp.h>
0123 #include <linux/if_vlan.h>
0124 #include <linux/ip.h>
0125 #include <net/ip.h>
0126 #include <net/mpls.h>
0127 #include <linux/ipv6.h>
0128 #include <linux/in.h>
0129 #include <linux/jhash.h>
0130 #include <linux/random.h>
0131 #include <trace/events/napi.h>
0132 #include <trace/events/net.h>
0133 #include <trace/events/skb.h>
0134 #include <trace/events/qdisc.h>
0135 #include <linux/inetdevice.h>
0136 #include <linux/cpu_rmap.h>
0137 #include <linux/static_key.h>
0138 #include <linux/hashtable.h>
0139 #include <linux/vmalloc.h>
0140 #include <linux/if_macvlan.h>
0141 #include <linux/errqueue.h>
0142 #include <linux/hrtimer.h>
0143 #include <linux/netfilter_netdev.h>
0144 #include <linux/crash_dump.h>
0145 #include <linux/sctp.h>
0146 #include <net/udp_tunnel.h>
0147 #include <linux/net_namespace.h>
0148 #include <linux/indirect_call_wrapper.h>
0149 #include <net/devlink.h>
0150 #include <linux/pm_runtime.h>
0151 #include <linux/prandom.h>
0152 #include <linux/once_lite.h>
0153
0154 #include "dev.h"
0155 #include "net-sysfs.h"
0156
0157
0158 static DEFINE_SPINLOCK(ptype_lock);
0159 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
0160 struct list_head ptype_all __read_mostly;
0161
0162 static int netif_rx_internal(struct sk_buff *skb);
0163 static int call_netdevice_notifiers_info(unsigned long val,
0164 struct netdev_notifier_info *info);
0165 static int call_netdevice_notifiers_extack(unsigned long val,
0166 struct net_device *dev,
0167 struct netlink_ext_ack *extack);
0168 static struct napi_struct *napi_by_id(unsigned int napi_id);
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189 DEFINE_RWLOCK(dev_base_lock);
0190 EXPORT_SYMBOL(dev_base_lock);
0191
0192 static DEFINE_MUTEX(ifalias_mutex);
0193
0194
0195 static DEFINE_SPINLOCK(napi_hash_lock);
0196
0197 static unsigned int napi_gen_id = NR_CPUS;
0198 static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
0199
0200 static DECLARE_RWSEM(devnet_rename_sem);
0201
0202 static inline void dev_base_seq_inc(struct net *net)
0203 {
0204 while (++net->dev_base_seq == 0)
0205 ;
0206 }
0207
0208 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
0209 {
0210 unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
0211
0212 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
0213 }
0214
0215 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
0216 {
0217 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
0218 }
0219
0220 static inline void rps_lock_irqsave(struct softnet_data *sd,
0221 unsigned long *flags)
0222 {
0223 if (IS_ENABLED(CONFIG_RPS))
0224 spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
0225 else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
0226 local_irq_save(*flags);
0227 }
0228
0229 static inline void rps_lock_irq_disable(struct softnet_data *sd)
0230 {
0231 if (IS_ENABLED(CONFIG_RPS))
0232 spin_lock_irq(&sd->input_pkt_queue.lock);
0233 else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
0234 local_irq_disable();
0235 }
0236
0237 static inline void rps_unlock_irq_restore(struct softnet_data *sd,
0238 unsigned long *flags)
0239 {
0240 if (IS_ENABLED(CONFIG_RPS))
0241 spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
0242 else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
0243 local_irq_restore(*flags);
0244 }
0245
0246 static inline void rps_unlock_irq_enable(struct softnet_data *sd)
0247 {
0248 if (IS_ENABLED(CONFIG_RPS))
0249 spin_unlock_irq(&sd->input_pkt_queue.lock);
0250 else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
0251 local_irq_enable();
0252 }
0253
0254 static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
0255 const char *name)
0256 {
0257 struct netdev_name_node *name_node;
0258
0259 name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
0260 if (!name_node)
0261 return NULL;
0262 INIT_HLIST_NODE(&name_node->hlist);
0263 name_node->dev = dev;
0264 name_node->name = name;
0265 return name_node;
0266 }
0267
0268 static struct netdev_name_node *
0269 netdev_name_node_head_alloc(struct net_device *dev)
0270 {
0271 struct netdev_name_node *name_node;
0272
0273 name_node = netdev_name_node_alloc(dev, dev->name);
0274 if (!name_node)
0275 return NULL;
0276 INIT_LIST_HEAD(&name_node->list);
0277 return name_node;
0278 }
0279
0280 static void netdev_name_node_free(struct netdev_name_node *name_node)
0281 {
0282 kfree(name_node);
0283 }
0284
0285 static void netdev_name_node_add(struct net *net,
0286 struct netdev_name_node *name_node)
0287 {
0288 hlist_add_head_rcu(&name_node->hlist,
0289 dev_name_hash(net, name_node->name));
0290 }
0291
0292 static void netdev_name_node_del(struct netdev_name_node *name_node)
0293 {
0294 hlist_del_rcu(&name_node->hlist);
0295 }
0296
0297 static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
0298 const char *name)
0299 {
0300 struct hlist_head *head = dev_name_hash(net, name);
0301 struct netdev_name_node *name_node;
0302
0303 hlist_for_each_entry(name_node, head, hlist)
0304 if (!strcmp(name_node->name, name))
0305 return name_node;
0306 return NULL;
0307 }
0308
0309 static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
0310 const char *name)
0311 {
0312 struct hlist_head *head = dev_name_hash(net, name);
0313 struct netdev_name_node *name_node;
0314
0315 hlist_for_each_entry_rcu(name_node, head, hlist)
0316 if (!strcmp(name_node->name, name))
0317 return name_node;
0318 return NULL;
0319 }
0320
0321 bool netdev_name_in_use(struct net *net, const char *name)
0322 {
0323 return netdev_name_node_lookup(net, name);
0324 }
0325 EXPORT_SYMBOL(netdev_name_in_use);
0326
0327 int netdev_name_node_alt_create(struct net_device *dev, const char *name)
0328 {
0329 struct netdev_name_node *name_node;
0330 struct net *net = dev_net(dev);
0331
0332 name_node = netdev_name_node_lookup(net, name);
0333 if (name_node)
0334 return -EEXIST;
0335 name_node = netdev_name_node_alloc(dev, name);
0336 if (!name_node)
0337 return -ENOMEM;
0338 netdev_name_node_add(net, name_node);
0339
0340 list_add_tail(&name_node->list, &dev->name_node->list);
0341
0342 return 0;
0343 }
0344
0345 static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
0346 {
0347 list_del(&name_node->list);
0348 netdev_name_node_del(name_node);
0349 kfree(name_node->name);
0350 netdev_name_node_free(name_node);
0351 }
0352
0353 int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
0354 {
0355 struct netdev_name_node *name_node;
0356 struct net *net = dev_net(dev);
0357
0358 name_node = netdev_name_node_lookup(net, name);
0359 if (!name_node)
0360 return -ENOENT;
0361
0362
0363
0364 if (name_node == dev->name_node || name_node->dev != dev)
0365 return -EINVAL;
0366
0367 __netdev_name_node_alt_destroy(name_node);
0368
0369 return 0;
0370 }
0371
0372 static void netdev_name_node_alt_flush(struct net_device *dev)
0373 {
0374 struct netdev_name_node *name_node, *tmp;
0375
0376 list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
0377 __netdev_name_node_alt_destroy(name_node);
0378 }
0379
0380
0381 static void list_netdevice(struct net_device *dev)
0382 {
0383 struct net *net = dev_net(dev);
0384
0385 ASSERT_RTNL();
0386
0387 write_lock(&dev_base_lock);
0388 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
0389 netdev_name_node_add(net, dev->name_node);
0390 hlist_add_head_rcu(&dev->index_hlist,
0391 dev_index_hash(net, dev->ifindex));
0392 write_unlock(&dev_base_lock);
0393
0394 dev_base_seq_inc(net);
0395 }
0396
0397
0398
0399
0400 static void unlist_netdevice(struct net_device *dev, bool lock)
0401 {
0402 ASSERT_RTNL();
0403
0404
0405 if (lock)
0406 write_lock(&dev_base_lock);
0407 list_del_rcu(&dev->dev_list);
0408 netdev_name_node_del(dev->name_node);
0409 hlist_del_rcu(&dev->index_hlist);
0410 if (lock)
0411 write_unlock(&dev_base_lock);
0412
0413 dev_base_seq_inc(dev_net(dev));
0414 }
0415
0416
0417
0418
0419
0420 static RAW_NOTIFIER_HEAD(netdev_chain);
0421
0422
0423
0424
0425
0426
0427 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
0428 EXPORT_PER_CPU_SYMBOL(softnet_data);
0429
0430 #ifdef CONFIG_LOCKDEP
0431
0432
0433
0434
0435 static const unsigned short netdev_lock_type[] = {
0436 ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
0437 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
0438 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
0439 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
0440 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
0441 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
0442 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
0443 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
0444 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
0445 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
0446 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
0447 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
0448 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
0449 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
0450 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
0451
0452 static const char *const netdev_lock_name[] = {
0453 "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
0454 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
0455 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
0456 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
0457 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
0458 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
0459 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
0460 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
0461 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
0462 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
0463 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
0464 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
0465 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
0466 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
0467 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
0468
0469 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
0470 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
0471
0472 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
0473 {
0474 int i;
0475
0476 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
0477 if (netdev_lock_type[i] == dev_type)
0478 return i;
0479
0480 return ARRAY_SIZE(netdev_lock_type) - 1;
0481 }
0482
0483 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
0484 unsigned short dev_type)
0485 {
0486 int i;
0487
0488 i = netdev_lock_pos(dev_type);
0489 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
0490 netdev_lock_name[i]);
0491 }
0492
0493 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
0494 {
0495 int i;
0496
0497 i = netdev_lock_pos(dev->type);
0498 lockdep_set_class_and_name(&dev->addr_list_lock,
0499 &netdev_addr_lock_key[i],
0500 netdev_lock_name[i]);
0501 }
0502 #else
0503 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
0504 unsigned short dev_type)
0505 {
0506 }
0507
0508 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
0509 {
0510 }
0511 #endif
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536 static inline struct list_head *ptype_head(const struct packet_type *pt)
0537 {
0538 if (pt->type == htons(ETH_P_ALL))
0539 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
0540 else
0541 return pt->dev ? &pt->dev->ptype_specific :
0542 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
0543 }
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558 void dev_add_pack(struct packet_type *pt)
0559 {
0560 struct list_head *head = ptype_head(pt);
0561
0562 spin_lock(&ptype_lock);
0563 list_add_rcu(&pt->list, head);
0564 spin_unlock(&ptype_lock);
0565 }
0566 EXPORT_SYMBOL(dev_add_pack);
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581 void __dev_remove_pack(struct packet_type *pt)
0582 {
0583 struct list_head *head = ptype_head(pt);
0584 struct packet_type *pt1;
0585
0586 spin_lock(&ptype_lock);
0587
0588 list_for_each_entry(pt1, head, list) {
0589 if (pt == pt1) {
0590 list_del_rcu(&pt->list);
0591 goto out;
0592 }
0593 }
0594
0595 pr_warn("dev_remove_pack: %p not found\n", pt);
0596 out:
0597 spin_unlock(&ptype_lock);
0598 }
0599 EXPORT_SYMBOL(__dev_remove_pack);
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613 void dev_remove_pack(struct packet_type *pt)
0614 {
0615 __dev_remove_pack(pt);
0616
0617 synchronize_net();
0618 }
0619 EXPORT_SYMBOL(dev_remove_pack);
0620
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636 int dev_get_iflink(const struct net_device *dev)
0637 {
0638 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
0639 return dev->netdev_ops->ndo_get_iflink(dev);
0640
0641 return dev->ifindex;
0642 }
0643 EXPORT_SYMBOL(dev_get_iflink);
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
0655 {
0656 struct ip_tunnel_info *info;
0657
0658 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
0659 return -EINVAL;
0660
0661 info = skb_tunnel_info_unclone(skb);
0662 if (!info)
0663 return -ENOMEM;
0664 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
0665 return -EINVAL;
0666
0667 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
0668 }
0669 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
0670
0671 static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
0672 {
0673 int k = stack->num_paths++;
0674
0675 if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
0676 return NULL;
0677
0678 return &stack->path[k];
0679 }
0680
0681 int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
0682 struct net_device_path_stack *stack)
0683 {
0684 const struct net_device *last_dev;
0685 struct net_device_path_ctx ctx = {
0686 .dev = dev,
0687 };
0688 struct net_device_path *path;
0689 int ret = 0;
0690
0691 memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
0692 stack->num_paths = 0;
0693 while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
0694 last_dev = ctx.dev;
0695 path = dev_fwd_path(stack);
0696 if (!path)
0697 return -1;
0698
0699 memset(path, 0, sizeof(struct net_device_path));
0700 ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
0701 if (ret < 0)
0702 return -1;
0703
0704 if (WARN_ON_ONCE(last_dev == ctx.dev))
0705 return -1;
0706 }
0707
0708 if (!ctx.dev)
0709 return ret;
0710
0711 path = dev_fwd_path(stack);
0712 if (!path)
0713 return -1;
0714 path->type = DEV_PATH_ETHERNET;
0715 path->dev = ctx.dev;
0716
0717 return ret;
0718 }
0719 EXPORT_SYMBOL_GPL(dev_fill_forward_path);
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733 struct net_device *__dev_get_by_name(struct net *net, const char *name)
0734 {
0735 struct netdev_name_node *node_name;
0736
0737 node_name = netdev_name_node_lookup(net, name);
0738 return node_name ? node_name->dev : NULL;
0739 }
0740 EXPORT_SYMBOL(__dev_get_by_name);
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
0755 {
0756 struct netdev_name_node *node_name;
0757
0758 node_name = netdev_name_node_lookup_rcu(net, name);
0759 return node_name ? node_name->dev : NULL;
0760 }
0761 EXPORT_SYMBOL(dev_get_by_name_rcu);
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775 struct net_device *dev_get_by_name(struct net *net, const char *name)
0776 {
0777 struct net_device *dev;
0778
0779 rcu_read_lock();
0780 dev = dev_get_by_name_rcu(net, name);
0781 dev_hold(dev);
0782 rcu_read_unlock();
0783 return dev;
0784 }
0785 EXPORT_SYMBOL(dev_get_by_name);
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
0800 {
0801 struct net_device *dev;
0802 struct hlist_head *head = dev_index_hash(net, ifindex);
0803
0804 hlist_for_each_entry(dev, head, index_hlist)
0805 if (dev->ifindex == ifindex)
0806 return dev;
0807
0808 return NULL;
0809 }
0810 EXPORT_SYMBOL(__dev_get_by_index);
0811
0812
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
0824 {
0825 struct net_device *dev;
0826 struct hlist_head *head = dev_index_hash(net, ifindex);
0827
0828 hlist_for_each_entry_rcu(dev, head, index_hlist)
0829 if (dev->ifindex == ifindex)
0830 return dev;
0831
0832 return NULL;
0833 }
0834 EXPORT_SYMBOL(dev_get_by_index_rcu);
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848 struct net_device *dev_get_by_index(struct net *net, int ifindex)
0849 {
0850 struct net_device *dev;
0851
0852 rcu_read_lock();
0853 dev = dev_get_by_index_rcu(net, ifindex);
0854 dev_hold(dev);
0855 rcu_read_unlock();
0856 return dev;
0857 }
0858 EXPORT_SYMBOL(dev_get_by_index);
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869
0870 struct net_device *dev_get_by_napi_id(unsigned int napi_id)
0871 {
0872 struct napi_struct *napi;
0873
0874 WARN_ON_ONCE(!rcu_read_lock_held());
0875
0876 if (napi_id < MIN_NAPI_ID)
0877 return NULL;
0878
0879 napi = napi_by_id(napi_id);
0880
0881 return napi ? napi->dev : NULL;
0882 }
0883 EXPORT_SYMBOL(dev_get_by_napi_id);
0884
0885
0886
0887
0888
0889
0890
0891 int netdev_get_name(struct net *net, char *name, int ifindex)
0892 {
0893 struct net_device *dev;
0894 int ret;
0895
0896 down_read(&devnet_rename_sem);
0897 rcu_read_lock();
0898
0899 dev = dev_get_by_index_rcu(net, ifindex);
0900 if (!dev) {
0901 ret = -ENODEV;
0902 goto out;
0903 }
0904
0905 strcpy(name, dev->name);
0906
0907 ret = 0;
0908 out:
0909 rcu_read_unlock();
0910 up_read(&devnet_rename_sem);
0911 return ret;
0912 }
0913
0914
0915
0916
0917
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
0929 const char *ha)
0930 {
0931 struct net_device *dev;
0932
0933 for_each_netdev_rcu(net, dev)
0934 if (dev->type == type &&
0935 !memcmp(dev->dev_addr, ha, dev->addr_len))
0936 return dev;
0937
0938 return NULL;
0939 }
0940 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
0941
0942 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
0943 {
0944 struct net_device *dev, *ret = NULL;
0945
0946 rcu_read_lock();
0947 for_each_netdev_rcu(net, dev)
0948 if (dev->type == type) {
0949 dev_hold(dev);
0950 ret = dev;
0951 break;
0952 }
0953 rcu_read_unlock();
0954 return ret;
0955 }
0956 EXPORT_SYMBOL(dev_getfirstbyhwtype);
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969 struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
0970 unsigned short mask)
0971 {
0972 struct net_device *dev, *ret;
0973
0974 ASSERT_RTNL();
0975
0976 ret = NULL;
0977 for_each_netdev(net, dev) {
0978 if (((dev->flags ^ if_flags) & mask) == 0) {
0979 ret = dev;
0980 break;
0981 }
0982 }
0983 return ret;
0984 }
0985 EXPORT_SYMBOL(__dev_get_by_flags);
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995 bool dev_valid_name(const char *name)
0996 {
0997 if (*name == '\0')
0998 return false;
0999 if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
1000 return false;
1001 if (!strcmp(name, ".") || !strcmp(name, ".."))
1002 return false;
1003
1004 while (*name) {
1005 if (*name == '/' || *name == ':' || isspace(*name))
1006 return false;
1007 name++;
1008 }
1009 return true;
1010 }
1011 EXPORT_SYMBOL(dev_valid_name);
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1029 {
1030 int i = 0;
1031 const char *p;
1032 const int max_netdevices = 8*PAGE_SIZE;
1033 unsigned long *inuse;
1034 struct net_device *d;
1035
1036 if (!dev_valid_name(name))
1037 return -EINVAL;
1038
1039 p = strchr(name, '%');
1040 if (p) {
1041
1042
1043
1044
1045
1046 if (p[1] != 'd' || strchr(p + 2, '%'))
1047 return -EINVAL;
1048
1049
1050 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1051 if (!inuse)
1052 return -ENOMEM;
1053
1054 for_each_netdev(net, d) {
1055 struct netdev_name_node *name_node;
1056 list_for_each_entry(name_node, &d->name_node->list, list) {
1057 if (!sscanf(name_node->name, name, &i))
1058 continue;
1059 if (i < 0 || i >= max_netdevices)
1060 continue;
1061
1062
1063 snprintf(buf, IFNAMSIZ, name, i);
1064 if (!strncmp(buf, name_node->name, IFNAMSIZ))
1065 __set_bit(i, inuse);
1066 }
1067 if (!sscanf(d->name, name, &i))
1068 continue;
1069 if (i < 0 || i >= max_netdevices)
1070 continue;
1071
1072
1073 snprintf(buf, IFNAMSIZ, name, i);
1074 if (!strncmp(buf, d->name, IFNAMSIZ))
1075 __set_bit(i, inuse);
1076 }
1077
1078 i = find_first_zero_bit(inuse, max_netdevices);
1079 free_page((unsigned long) inuse);
1080 }
1081
1082 snprintf(buf, IFNAMSIZ, name, i);
1083 if (!netdev_name_in_use(net, buf))
1084 return i;
1085
1086
1087
1088
1089
1090 return -ENFILE;
1091 }
1092
1093 static int dev_alloc_name_ns(struct net *net,
1094 struct net_device *dev,
1095 const char *name)
1096 {
1097 char buf[IFNAMSIZ];
1098 int ret;
1099
1100 BUG_ON(!net);
1101 ret = __dev_alloc_name(net, name, buf);
1102 if (ret >= 0)
1103 strlcpy(dev->name, buf, IFNAMSIZ);
1104 return ret;
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 int dev_alloc_name(struct net_device *dev, const char *name)
1122 {
1123 return dev_alloc_name_ns(dev_net(dev), dev, name);
1124 }
1125 EXPORT_SYMBOL(dev_alloc_name);
1126
1127 static int dev_get_valid_name(struct net *net, struct net_device *dev,
1128 const char *name)
1129 {
1130 BUG_ON(!net);
1131
1132 if (!dev_valid_name(name))
1133 return -EINVAL;
1134
1135 if (strchr(name, '%'))
1136 return dev_alloc_name_ns(net, dev, name);
1137 else if (netdev_name_in_use(net, name))
1138 return -EEXIST;
1139 else if (dev->name != name)
1140 strlcpy(dev->name, name, IFNAMSIZ);
1141
1142 return 0;
1143 }
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153 int dev_change_name(struct net_device *dev, const char *newname)
1154 {
1155 unsigned char old_assign_type;
1156 char oldname[IFNAMSIZ];
1157 int err = 0;
1158 int ret;
1159 struct net *net;
1160
1161 ASSERT_RTNL();
1162 BUG_ON(!dev_net(dev));
1163
1164 net = dev_net(dev);
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178 if (dev->flags & IFF_UP &&
1179 likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
1180 return -EBUSY;
1181
1182 down_write(&devnet_rename_sem);
1183
1184 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1185 up_write(&devnet_rename_sem);
1186 return 0;
1187 }
1188
1189 memcpy(oldname, dev->name, IFNAMSIZ);
1190
1191 err = dev_get_valid_name(net, dev, newname);
1192 if (err < 0) {
1193 up_write(&devnet_rename_sem);
1194 return err;
1195 }
1196
1197 if (oldname[0] && !strchr(oldname, '%'))
1198 netdev_info(dev, "renamed from %s\n", oldname);
1199
1200 old_assign_type = dev->name_assign_type;
1201 dev->name_assign_type = NET_NAME_RENAMED;
1202
1203 rollback:
1204 ret = device_rename(&dev->dev, dev->name);
1205 if (ret) {
1206 memcpy(dev->name, oldname, IFNAMSIZ);
1207 dev->name_assign_type = old_assign_type;
1208 up_write(&devnet_rename_sem);
1209 return ret;
1210 }
1211
1212 up_write(&devnet_rename_sem);
1213
1214 netdev_adjacent_rename_links(dev, oldname);
1215
1216 write_lock(&dev_base_lock);
1217 netdev_name_node_del(dev->name_node);
1218 write_unlock(&dev_base_lock);
1219
1220 synchronize_rcu();
1221
1222 write_lock(&dev_base_lock);
1223 netdev_name_node_add(net, dev->name_node);
1224 write_unlock(&dev_base_lock);
1225
1226 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1227 ret = notifier_to_errno(ret);
1228
1229 if (ret) {
1230
1231 if (err >= 0) {
1232 err = ret;
1233 down_write(&devnet_rename_sem);
1234 memcpy(dev->name, oldname, IFNAMSIZ);
1235 memcpy(oldname, newname, IFNAMSIZ);
1236 dev->name_assign_type = old_assign_type;
1237 old_assign_type = NET_NAME_RENAMED;
1238 goto rollback;
1239 } else {
1240 netdev_err(dev, "name change rollback failed: %d\n",
1241 ret);
1242 }
1243 }
1244
1245 return err;
1246 }
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1257 {
1258 struct dev_ifalias *new_alias = NULL;
1259
1260 if (len >= IFALIASZ)
1261 return -EINVAL;
1262
1263 if (len) {
1264 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1265 if (!new_alias)
1266 return -ENOMEM;
1267
1268 memcpy(new_alias->ifalias, alias, len);
1269 new_alias->ifalias[len] = 0;
1270 }
1271
1272 mutex_lock(&ifalias_mutex);
1273 new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
1274 mutex_is_locked(&ifalias_mutex));
1275 mutex_unlock(&ifalias_mutex);
1276
1277 if (new_alias)
1278 kfree_rcu(new_alias, rcuhead);
1279
1280 return len;
1281 }
1282 EXPORT_SYMBOL(dev_set_alias);
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293 int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1294 {
1295 const struct dev_ifalias *alias;
1296 int ret = 0;
1297
1298 rcu_read_lock();
1299 alias = rcu_dereference(dev->ifalias);
1300 if (alias)
1301 ret = snprintf(name, len, "%s", alias->ifalias);
1302 rcu_read_unlock();
1303
1304 return ret;
1305 }
1306
1307
1308
1309
1310
1311
1312
1313 void netdev_features_change(struct net_device *dev)
1314 {
1315 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1316 }
1317 EXPORT_SYMBOL(netdev_features_change);
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 void netdev_state_change(struct net_device *dev)
1328 {
1329 if (dev->flags & IFF_UP) {
1330 struct netdev_notifier_change_info change_info = {
1331 .info.dev = dev,
1332 };
1333
1334 call_netdevice_notifiers_info(NETDEV_CHANGE,
1335 &change_info.info);
1336 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1337 }
1338 }
1339 EXPORT_SYMBOL(netdev_state_change);
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 void __netdev_notify_peers(struct net_device *dev)
1353 {
1354 ASSERT_RTNL();
1355 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1356 call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
1357 }
1358 EXPORT_SYMBOL(__netdev_notify_peers);
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370 void netdev_notify_peers(struct net_device *dev)
1371 {
1372 rtnl_lock();
1373 __netdev_notify_peers(dev);
1374 rtnl_unlock();
1375 }
1376 EXPORT_SYMBOL(netdev_notify_peers);
1377
1378 static int napi_threaded_poll(void *data);
1379
1380 static int napi_kthread_create(struct napi_struct *n)
1381 {
1382 int err = 0;
1383
1384
1385
1386
1387
1388 n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
1389 n->dev->name, n->napi_id);
1390 if (IS_ERR(n->thread)) {
1391 err = PTR_ERR(n->thread);
1392 pr_err("kthread_run failed with err %d\n", err);
1393 n->thread = NULL;
1394 }
1395
1396 return err;
1397 }
1398
1399 static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1400 {
1401 const struct net_device_ops *ops = dev->netdev_ops;
1402 int ret;
1403
1404 ASSERT_RTNL();
1405 dev_addr_check(dev);
1406
1407 if (!netif_device_present(dev)) {
1408
1409 if (dev->dev.parent)
1410 pm_runtime_resume(dev->dev.parent);
1411 if (!netif_device_present(dev))
1412 return -ENODEV;
1413 }
1414
1415
1416
1417
1418
1419 netpoll_poll_disable(dev);
1420
1421 ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
1422 ret = notifier_to_errno(ret);
1423 if (ret)
1424 return ret;
1425
1426 set_bit(__LINK_STATE_START, &dev->state);
1427
1428 if (ops->ndo_validate_addr)
1429 ret = ops->ndo_validate_addr(dev);
1430
1431 if (!ret && ops->ndo_open)
1432 ret = ops->ndo_open(dev);
1433
1434 netpoll_poll_enable(dev);
1435
1436 if (ret)
1437 clear_bit(__LINK_STATE_START, &dev->state);
1438 else {
1439 dev->flags |= IFF_UP;
1440 dev_set_rx_mode(dev);
1441 dev_activate(dev);
1442 add_device_randomness(dev->dev_addr, dev->addr_len);
1443 }
1444
1445 return ret;
1446 }
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461 int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1462 {
1463 int ret;
1464
1465 if (dev->flags & IFF_UP)
1466 return 0;
1467
1468 ret = __dev_open(dev, extack);
1469 if (ret < 0)
1470 return ret;
1471
1472 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1473 call_netdevice_notifiers(NETDEV_UP, dev);
1474
1475 return ret;
1476 }
1477 EXPORT_SYMBOL(dev_open);
1478
1479 static void __dev_close_many(struct list_head *head)
1480 {
1481 struct net_device *dev;
1482
1483 ASSERT_RTNL();
1484 might_sleep();
1485
1486 list_for_each_entry(dev, head, close_list) {
1487
1488 netpoll_poll_disable(dev);
1489
1490 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1491
1492 clear_bit(__LINK_STATE_START, &dev->state);
1493
1494
1495
1496
1497
1498
1499
1500 smp_mb__after_atomic();
1501 }
1502
1503 dev_deactivate_many(head);
1504
1505 list_for_each_entry(dev, head, close_list) {
1506 const struct net_device_ops *ops = dev->netdev_ops;
1507
1508
1509
1510
1511
1512
1513
1514
1515 if (ops->ndo_stop)
1516 ops->ndo_stop(dev);
1517
1518 dev->flags &= ~IFF_UP;
1519 netpoll_poll_enable(dev);
1520 }
1521 }
1522
1523 static void __dev_close(struct net_device *dev)
1524 {
1525 LIST_HEAD(single);
1526
1527 list_add(&dev->close_list, &single);
1528 __dev_close_many(&single);
1529 list_del(&single);
1530 }
1531
1532 void dev_close_many(struct list_head *head, bool unlink)
1533 {
1534 struct net_device *dev, *tmp;
1535
1536
1537 list_for_each_entry_safe(dev, tmp, head, close_list)
1538 if (!(dev->flags & IFF_UP))
1539 list_del_init(&dev->close_list);
1540
1541 __dev_close_many(head);
1542
1543 list_for_each_entry_safe(dev, tmp, head, close_list) {
1544 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1545 call_netdevice_notifiers(NETDEV_DOWN, dev);
1546 if (unlink)
1547 list_del_init(&dev->close_list);
1548 }
1549 }
1550 EXPORT_SYMBOL(dev_close_many);
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561 void dev_close(struct net_device *dev)
1562 {
1563 if (dev->flags & IFF_UP) {
1564 LIST_HEAD(single);
1565
1566 list_add(&dev->close_list, &single);
1567 dev_close_many(&single, true);
1568 list_del(&single);
1569 }
1570 }
1571 EXPORT_SYMBOL(dev_close);
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582 void dev_disable_lro(struct net_device *dev)
1583 {
1584 struct net_device *lower_dev;
1585 struct list_head *iter;
1586
1587 dev->wanted_features &= ~NETIF_F_LRO;
1588 netdev_update_features(dev);
1589
1590 if (unlikely(dev->features & NETIF_F_LRO))
1591 netdev_WARN(dev, "failed to disable LRO!\n");
1592
1593 netdev_for_each_lower_dev(dev, lower_dev, iter)
1594 dev_disable_lro(lower_dev);
1595 }
1596 EXPORT_SYMBOL(dev_disable_lro);
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606 static void dev_disable_gro_hw(struct net_device *dev)
1607 {
1608 dev->wanted_features &= ~NETIF_F_GRO_HW;
1609 netdev_update_features(dev);
1610
1611 if (unlikely(dev->features & NETIF_F_GRO_HW))
1612 netdev_WARN(dev, "failed to disable GRO_HW!\n");
1613 }
1614
1615 const char *netdev_cmd_to_name(enum netdev_cmd cmd)
1616 {
1617 #define N(val) \
1618 case NETDEV_##val: \
1619 return "NETDEV_" __stringify(val);
1620 switch (cmd) {
1621 N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
1622 N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
1623 N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
1624 N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
1625 N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
1626 N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
1627 N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
1628 N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
1629 N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
1630 N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
1631 N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
1632 }
1633 #undef N
1634 return "UNKNOWN_NETDEV_EVENT";
1635 }
1636 EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
1637
1638 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1639 struct net_device *dev)
1640 {
1641 struct netdev_notifier_info info = {
1642 .dev = dev,
1643 };
1644
1645 return nb->notifier_call(nb, val, &info);
1646 }
1647
1648 static int call_netdevice_register_notifiers(struct notifier_block *nb,
1649 struct net_device *dev)
1650 {
1651 int err;
1652
1653 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1654 err = notifier_to_errno(err);
1655 if (err)
1656 return err;
1657
1658 if (!(dev->flags & IFF_UP))
1659 return 0;
1660
1661 call_netdevice_notifier(nb, NETDEV_UP, dev);
1662 return 0;
1663 }
1664
1665 static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
1666 struct net_device *dev)
1667 {
1668 if (dev->flags & IFF_UP) {
1669 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1670 dev);
1671 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1672 }
1673 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1674 }
1675
1676 static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
1677 struct net *net)
1678 {
1679 struct net_device *dev;
1680 int err;
1681
1682 for_each_netdev(net, dev) {
1683 err = call_netdevice_register_notifiers(nb, dev);
1684 if (err)
1685 goto rollback;
1686 }
1687 return 0;
1688
1689 rollback:
1690 for_each_netdev_continue_reverse(net, dev)
1691 call_netdevice_unregister_notifiers(nb, dev);
1692 return err;
1693 }
1694
1695 static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
1696 struct net *net)
1697 {
1698 struct net_device *dev;
1699
1700 for_each_netdev(net, dev)
1701 call_netdevice_unregister_notifiers(nb, dev);
1702 }
1703
1704 static int dev_boot_phase = 1;
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720 int register_netdevice_notifier(struct notifier_block *nb)
1721 {
1722 struct net *net;
1723 int err;
1724
1725
1726 down_write(&pernet_ops_rwsem);
1727 rtnl_lock();
1728 err = raw_notifier_chain_register(&netdev_chain, nb);
1729 if (err)
1730 goto unlock;
1731 if (dev_boot_phase)
1732 goto unlock;
1733 for_each_net(net) {
1734 err = call_netdevice_register_net_notifiers(nb, net);
1735 if (err)
1736 goto rollback;
1737 }
1738
1739 unlock:
1740 rtnl_unlock();
1741 up_write(&pernet_ops_rwsem);
1742 return err;
1743
1744 rollback:
1745 for_each_net_continue_reverse(net)
1746 call_netdevice_unregister_net_notifiers(nb, net);
1747
1748 raw_notifier_chain_unregister(&netdev_chain, nb);
1749 goto unlock;
1750 }
1751 EXPORT_SYMBOL(register_netdevice_notifier);
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 int unregister_netdevice_notifier(struct notifier_block *nb)
1768 {
1769 struct net *net;
1770 int err;
1771
1772
1773 down_write(&pernet_ops_rwsem);
1774 rtnl_lock();
1775 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1776 if (err)
1777 goto unlock;
1778
1779 for_each_net(net)
1780 call_netdevice_unregister_net_notifiers(nb, net);
1781
1782 unlock:
1783 rtnl_unlock();
1784 up_write(&pernet_ops_rwsem);
1785 return err;
1786 }
1787 EXPORT_SYMBOL(unregister_netdevice_notifier);
1788
1789 static int __register_netdevice_notifier_net(struct net *net,
1790 struct notifier_block *nb,
1791 bool ignore_call_fail)
1792 {
1793 int err;
1794
1795 err = raw_notifier_chain_register(&net->netdev_chain, nb);
1796 if (err)
1797 return err;
1798 if (dev_boot_phase)
1799 return 0;
1800
1801 err = call_netdevice_register_net_notifiers(nb, net);
1802 if (err && !ignore_call_fail)
1803 goto chain_unregister;
1804
1805 return 0;
1806
1807 chain_unregister:
1808 raw_notifier_chain_unregister(&net->netdev_chain, nb);
1809 return err;
1810 }
1811
1812 static int __unregister_netdevice_notifier_net(struct net *net,
1813 struct notifier_block *nb)
1814 {
1815 int err;
1816
1817 err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
1818 if (err)
1819 return err;
1820
1821 call_netdevice_unregister_net_notifiers(nb, net);
1822 return 0;
1823 }
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
1841 {
1842 int err;
1843
1844 rtnl_lock();
1845 err = __register_netdevice_notifier_net(net, nb, false);
1846 rtnl_unlock();
1847 return err;
1848 }
1849 EXPORT_SYMBOL(register_netdevice_notifier_net);
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867 int unregister_netdevice_notifier_net(struct net *net,
1868 struct notifier_block *nb)
1869 {
1870 int err;
1871
1872 rtnl_lock();
1873 err = __unregister_netdevice_notifier_net(net, nb);
1874 rtnl_unlock();
1875 return err;
1876 }
1877 EXPORT_SYMBOL(unregister_netdevice_notifier_net);
1878
1879 int register_netdevice_notifier_dev_net(struct net_device *dev,
1880 struct notifier_block *nb,
1881 struct netdev_net_notifier *nn)
1882 {
1883 int err;
1884
1885 rtnl_lock();
1886 err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
1887 if (!err) {
1888 nn->nb = nb;
1889 list_add(&nn->list, &dev->net_notifier_list);
1890 }
1891 rtnl_unlock();
1892 return err;
1893 }
1894 EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
1895
1896 int unregister_netdevice_notifier_dev_net(struct net_device *dev,
1897 struct notifier_block *nb,
1898 struct netdev_net_notifier *nn)
1899 {
1900 int err;
1901
1902 rtnl_lock();
1903 list_del(&nn->list);
1904 err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
1905 rtnl_unlock();
1906 return err;
1907 }
1908 EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
1909
1910 static void move_netdevice_notifiers_dev_net(struct net_device *dev,
1911 struct net *net)
1912 {
1913 struct netdev_net_notifier *nn;
1914
1915 list_for_each_entry(nn, &dev->net_notifier_list, list) {
1916 __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
1917 __register_netdevice_notifier_net(net, nn->nb, true);
1918 }
1919 }
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930 static int call_netdevice_notifiers_info(unsigned long val,
1931 struct netdev_notifier_info *info)
1932 {
1933 struct net *net = dev_net(info->dev);
1934 int ret;
1935
1936 ASSERT_RTNL();
1937
1938
1939
1940
1941
1942 ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
1943 if (ret & NOTIFY_STOP_MASK)
1944 return ret;
1945 return raw_notifier_call_chain(&netdev_chain, val, info);
1946 }
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961 static int
1962 call_netdevice_notifiers_info_robust(unsigned long val_up,
1963 unsigned long val_down,
1964 struct netdev_notifier_info *info)
1965 {
1966 struct net *net = dev_net(info->dev);
1967
1968 ASSERT_RTNL();
1969
1970 return raw_notifier_call_chain_robust(&net->netdev_chain,
1971 val_up, val_down, info);
1972 }
1973
1974 static int call_netdevice_notifiers_extack(unsigned long val,
1975 struct net_device *dev,
1976 struct netlink_ext_ack *extack)
1977 {
1978 struct netdev_notifier_info info = {
1979 .dev = dev,
1980 .extack = extack,
1981 };
1982
1983 return call_netdevice_notifiers_info(val, &info);
1984 }
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1996 {
1997 return call_netdevice_notifiers_extack(val, dev, NULL);
1998 }
1999 EXPORT_SYMBOL(call_netdevice_notifiers);
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010 static int call_netdevice_notifiers_mtu(unsigned long val,
2011 struct net_device *dev, u32 arg)
2012 {
2013 struct netdev_notifier_info_ext info = {
2014 .info.dev = dev,
2015 .ext.mtu = arg,
2016 };
2017
2018 BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
2019
2020 return call_netdevice_notifiers_info(val, &info.info);
2021 }
2022
2023 #ifdef CONFIG_NET_INGRESS
2024 static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
2025
2026 void net_inc_ingress_queue(void)
2027 {
2028 static_branch_inc(&ingress_needed_key);
2029 }
2030 EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
2031
2032 void net_dec_ingress_queue(void)
2033 {
2034 static_branch_dec(&ingress_needed_key);
2035 }
2036 EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
2037 #endif
2038
2039 #ifdef CONFIG_NET_EGRESS
2040 static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
2041
2042 void net_inc_egress_queue(void)
2043 {
2044 static_branch_inc(&egress_needed_key);
2045 }
2046 EXPORT_SYMBOL_GPL(net_inc_egress_queue);
2047
2048 void net_dec_egress_queue(void)
2049 {
2050 static_branch_dec(&egress_needed_key);
2051 }
2052 EXPORT_SYMBOL_GPL(net_dec_egress_queue);
2053 #endif
2054
2055 DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
2056 EXPORT_SYMBOL(netstamp_needed_key);
2057 #ifdef CONFIG_JUMP_LABEL
2058 static atomic_t netstamp_needed_deferred;
2059 static atomic_t netstamp_wanted;
2060 static void netstamp_clear(struct work_struct *work)
2061 {
2062 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
2063 int wanted;
2064
2065 wanted = atomic_add_return(deferred, &netstamp_wanted);
2066 if (wanted > 0)
2067 static_branch_enable(&netstamp_needed_key);
2068 else
2069 static_branch_disable(&netstamp_needed_key);
2070 }
2071 static DECLARE_WORK(netstamp_work, netstamp_clear);
2072 #endif
2073
2074 void net_enable_timestamp(void)
2075 {
2076 #ifdef CONFIG_JUMP_LABEL
2077 int wanted;
2078
2079 while (1) {
2080 wanted = atomic_read(&netstamp_wanted);
2081 if (wanted <= 0)
2082 break;
2083 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
2084 return;
2085 }
2086 atomic_inc(&netstamp_needed_deferred);
2087 schedule_work(&netstamp_work);
2088 #else
2089 static_branch_inc(&netstamp_needed_key);
2090 #endif
2091 }
2092 EXPORT_SYMBOL(net_enable_timestamp);
2093
2094 void net_disable_timestamp(void)
2095 {
2096 #ifdef CONFIG_JUMP_LABEL
2097 int wanted;
2098
2099 while (1) {
2100 wanted = atomic_read(&netstamp_wanted);
2101 if (wanted <= 1)
2102 break;
2103 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
2104 return;
2105 }
2106 atomic_dec(&netstamp_needed_deferred);
2107 schedule_work(&netstamp_work);
2108 #else
2109 static_branch_dec(&netstamp_needed_key);
2110 #endif
2111 }
2112 EXPORT_SYMBOL(net_disable_timestamp);
2113
2114 static inline void net_timestamp_set(struct sk_buff *skb)
2115 {
2116 skb->tstamp = 0;
2117 skb->mono_delivery_time = 0;
2118 if (static_branch_unlikely(&netstamp_needed_key))
2119 skb->tstamp = ktime_get_real();
2120 }
2121
2122 #define net_timestamp_check(COND, SKB) \
2123 if (static_branch_unlikely(&netstamp_needed_key)) { \
2124 if ((COND) && !(SKB)->tstamp) \
2125 (SKB)->tstamp = ktime_get_real(); \
2126 } \
2127
2128 bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
2129 {
2130 return __is_skb_forwardable(dev, skb, true);
2131 }
2132 EXPORT_SYMBOL_GPL(is_skb_forwardable);
2133
2134 static int __dev_forward_skb2(struct net_device *dev, struct sk_buff *skb,
2135 bool check_mtu)
2136 {
2137 int ret = ____dev_forward_skb(dev, skb, check_mtu);
2138
2139 if (likely(!ret)) {
2140 skb->protocol = eth_type_trans(skb, dev);
2141 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
2142 }
2143
2144 return ret;
2145 }
2146
2147 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
2148 {
2149 return __dev_forward_skb2(dev, skb, true);
2150 }
2151 EXPORT_SYMBOL_GPL(__dev_forward_skb);
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
2172 {
2173 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
2174 }
2175 EXPORT_SYMBOL_GPL(dev_forward_skb);
2176
2177 int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb)
2178 {
2179 return __dev_forward_skb2(dev, skb, false) ?: netif_rx_internal(skb);
2180 }
2181
2182 static inline int deliver_skb(struct sk_buff *skb,
2183 struct packet_type *pt_prev,
2184 struct net_device *orig_dev)
2185 {
2186 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
2187 return -ENOMEM;
2188 refcount_inc(&skb->users);
2189 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2190 }
2191
2192 static inline void deliver_ptype_list_skb(struct sk_buff *skb,
2193 struct packet_type **pt,
2194 struct net_device *orig_dev,
2195 __be16 type,
2196 struct list_head *ptype_list)
2197 {
2198 struct packet_type *ptype, *pt_prev = *pt;
2199
2200 list_for_each_entry_rcu(ptype, ptype_list, list) {
2201 if (ptype->type != type)
2202 continue;
2203 if (pt_prev)
2204 deliver_skb(skb, pt_prev, orig_dev);
2205 pt_prev = ptype;
2206 }
2207 *pt = pt_prev;
2208 }
2209
2210 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
2211 {
2212 if (!ptype->af_packet_priv || !skb->sk)
2213 return false;
2214
2215 if (ptype->id_match)
2216 return ptype->id_match(ptype, skb->sk);
2217 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
2218 return true;
2219
2220 return false;
2221 }
2222
2223
2224
2225
2226
2227
2228 bool dev_nit_active(struct net_device *dev)
2229 {
2230 return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
2231 }
2232 EXPORT_SYMBOL_GPL(dev_nit_active);
2233
2234
2235
2236
2237
2238
2239 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
2240 {
2241 struct packet_type *ptype;
2242 struct sk_buff *skb2 = NULL;
2243 struct packet_type *pt_prev = NULL;
2244 struct list_head *ptype_list = &ptype_all;
2245
2246 rcu_read_lock();
2247 again:
2248 list_for_each_entry_rcu(ptype, ptype_list, list) {
2249 if (ptype->ignore_outgoing)
2250 continue;
2251
2252
2253
2254
2255 if (skb_loop_sk(ptype, skb))
2256 continue;
2257
2258 if (pt_prev) {
2259 deliver_skb(skb2, pt_prev, skb->dev);
2260 pt_prev = ptype;
2261 continue;
2262 }
2263
2264
2265 skb2 = skb_clone(skb, GFP_ATOMIC);
2266 if (!skb2)
2267 goto out_unlock;
2268
2269 net_timestamp_set(skb2);
2270
2271
2272
2273
2274
2275 skb_reset_mac_header(skb2);
2276
2277 if (skb_network_header(skb2) < skb2->data ||
2278 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
2279 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
2280 ntohs(skb2->protocol),
2281 dev->name);
2282 skb_reset_network_header(skb2);
2283 }
2284
2285 skb2->transport_header = skb2->network_header;
2286 skb2->pkt_type = PACKET_OUTGOING;
2287 pt_prev = ptype;
2288 }
2289
2290 if (ptype_list == &ptype_all) {
2291 ptype_list = &dev->ptype_all;
2292 goto again;
2293 }
2294 out_unlock:
2295 if (pt_prev) {
2296 if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
2297 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
2298 else
2299 kfree_skb(skb2);
2300 }
2301 rcu_read_unlock();
2302 }
2303 EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
2319 {
2320 int i;
2321 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2322
2323
2324 if (tc->offset + tc->count > txq) {
2325 netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
2326 dev->num_tc = 0;
2327 return;
2328 }
2329
2330
2331 for (i = 1; i < TC_BITMASK + 1; i++) {
2332 int q = netdev_get_prio_tc_map(dev, i);
2333
2334 tc = &dev->tc_to_txq[q];
2335 if (tc->offset + tc->count > txq) {
2336 netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
2337 i, q);
2338 netdev_set_prio_tc_map(dev, i, 0);
2339 }
2340 }
2341 }
2342
2343 int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2344 {
2345 if (dev->num_tc) {
2346 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2347 int i;
2348
2349
2350 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
2351 if ((txq - tc->offset) < tc->count)
2352 return i;
2353 }
2354
2355
2356 return -1;
2357 }
2358
2359 return 0;
2360 }
2361 EXPORT_SYMBOL(netdev_txq_to_tc);
2362
2363 #ifdef CONFIG_XPS
2364 static struct static_key xps_needed __read_mostly;
2365 static struct static_key xps_rxqs_needed __read_mostly;
2366 static DEFINE_MUTEX(xps_map_mutex);
2367 #define xmap_dereference(P) \
2368 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
2369
2370 static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
2371 struct xps_dev_maps *old_maps, int tci, u16 index)
2372 {
2373 struct xps_map *map = NULL;
2374 int pos;
2375
2376 if (dev_maps)
2377 map = xmap_dereference(dev_maps->attr_map[tci]);
2378 if (!map)
2379 return false;
2380
2381 for (pos = map->len; pos--;) {
2382 if (map->queues[pos] != index)
2383 continue;
2384
2385 if (map->len > 1) {
2386 map->queues[pos] = map->queues[--map->len];
2387 break;
2388 }
2389
2390 if (old_maps)
2391 RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
2392 RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2393 kfree_rcu(map, rcu);
2394 return false;
2395 }
2396
2397 return true;
2398 }
2399
2400 static bool remove_xps_queue_cpu(struct net_device *dev,
2401 struct xps_dev_maps *dev_maps,
2402 int cpu, u16 offset, u16 count)
2403 {
2404 int num_tc = dev_maps->num_tc;
2405 bool active = false;
2406 int tci;
2407
2408 for (tci = cpu * num_tc; num_tc--; tci++) {
2409 int i, j;
2410
2411 for (i = count, j = offset; i--; j++) {
2412 if (!remove_xps_queue(dev_maps, NULL, tci, j))
2413 break;
2414 }
2415
2416 active |= i < 0;
2417 }
2418
2419 return active;
2420 }
2421
2422 static void reset_xps_maps(struct net_device *dev,
2423 struct xps_dev_maps *dev_maps,
2424 enum xps_map_type type)
2425 {
2426 static_key_slow_dec_cpuslocked(&xps_needed);
2427 if (type == XPS_RXQS)
2428 static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
2429
2430 RCU_INIT_POINTER(dev->xps_maps[type], NULL);
2431
2432 kfree_rcu(dev_maps, rcu);
2433 }
2434
2435 static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
2436 u16 offset, u16 count)
2437 {
2438 struct xps_dev_maps *dev_maps;
2439 bool active = false;
2440 int i, j;
2441
2442 dev_maps = xmap_dereference(dev->xps_maps[type]);
2443 if (!dev_maps)
2444 return;
2445
2446 for (j = 0; j < dev_maps->nr_ids; j++)
2447 active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
2448 if (!active)
2449 reset_xps_maps(dev, dev_maps, type);
2450
2451 if (type == XPS_CPUS) {
2452 for (i = offset + (count - 1); count--; i--)
2453 netdev_queue_numa_node_write(
2454 netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
2455 }
2456 }
2457
2458 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
2459 u16 count)
2460 {
2461 if (!static_key_false(&xps_needed))
2462 return;
2463
2464 cpus_read_lock();
2465 mutex_lock(&xps_map_mutex);
2466
2467 if (static_key_false(&xps_rxqs_needed))
2468 clean_xps_maps(dev, XPS_RXQS, offset, count);
2469
2470 clean_xps_maps(dev, XPS_CPUS, offset, count);
2471
2472 mutex_unlock(&xps_map_mutex);
2473 cpus_read_unlock();
2474 }
2475
2476 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
2477 {
2478 netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
2479 }
2480
2481 static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
2482 u16 index, bool is_rxqs_map)
2483 {
2484 struct xps_map *new_map;
2485 int alloc_len = XPS_MIN_MAP_ALLOC;
2486 int i, pos;
2487
2488 for (pos = 0; map && pos < map->len; pos++) {
2489 if (map->queues[pos] != index)
2490 continue;
2491 return map;
2492 }
2493
2494
2495 if (map) {
2496 if (pos < map->alloc_len)
2497 return map;
2498
2499 alloc_len = map->alloc_len * 2;
2500 }
2501
2502
2503
2504
2505 if (is_rxqs_map)
2506 new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
2507 else
2508 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2509 cpu_to_node(attr_index));
2510 if (!new_map)
2511 return NULL;
2512
2513 for (i = 0; i < pos; i++)
2514 new_map->queues[i] = map->queues[i];
2515 new_map->alloc_len = alloc_len;
2516 new_map->len = pos;
2517
2518 return new_map;
2519 }
2520
2521
2522 static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
2523 struct xps_dev_maps *new_dev_maps, int index,
2524 int tc, bool skip_tc)
2525 {
2526 int i, tci = index * dev_maps->num_tc;
2527 struct xps_map *map;
2528
2529
2530 for (i = 0; i < dev_maps->num_tc; i++, tci++) {
2531 if (i == tc && skip_tc)
2532 continue;
2533
2534
2535 map = xmap_dereference(dev_maps->attr_map[tci]);
2536 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2537 }
2538 }
2539
2540
2541 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
2542 u16 index, enum xps_map_type type)
2543 {
2544 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
2545 const unsigned long *online_mask = NULL;
2546 bool active = false, copy = false;
2547 int i, j, tci, numa_node_id = -2;
2548 int maps_sz, num_tc = 1, tc = 0;
2549 struct xps_map *map, *new_map;
2550 unsigned int nr_ids;
2551
2552 if (dev->num_tc) {
2553
2554 num_tc = dev->num_tc;
2555 if (num_tc < 0)
2556 return -EINVAL;
2557
2558
2559 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
2560
2561 tc = netdev_txq_to_tc(dev, index);
2562 if (tc < 0)
2563 return -EINVAL;
2564 }
2565
2566 mutex_lock(&xps_map_mutex);
2567
2568 dev_maps = xmap_dereference(dev->xps_maps[type]);
2569 if (type == XPS_RXQS) {
2570 maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
2571 nr_ids = dev->num_rx_queues;
2572 } else {
2573 maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
2574 if (num_possible_cpus() > 1)
2575 online_mask = cpumask_bits(cpu_online_mask);
2576 nr_ids = nr_cpu_ids;
2577 }
2578
2579 if (maps_sz < L1_CACHE_BYTES)
2580 maps_sz = L1_CACHE_BYTES;
2581
2582
2583
2584
2585
2586
2587 if (dev_maps &&
2588 dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
2589 copy = true;
2590
2591
2592 for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
2593 j < nr_ids;) {
2594 if (!new_dev_maps) {
2595 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2596 if (!new_dev_maps) {
2597 mutex_unlock(&xps_map_mutex);
2598 return -ENOMEM;
2599 }
2600
2601 new_dev_maps->nr_ids = nr_ids;
2602 new_dev_maps->num_tc = num_tc;
2603 }
2604
2605 tci = j * num_tc + tc;
2606 map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
2607
2608 map = expand_xps_map(map, j, index, type == XPS_RXQS);
2609 if (!map)
2610 goto error;
2611
2612 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2613 }
2614
2615 if (!new_dev_maps)
2616 goto out_no_new_maps;
2617
2618 if (!dev_maps) {
2619
2620 static_key_slow_inc_cpuslocked(&xps_needed);
2621 if (type == XPS_RXQS)
2622 static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
2623 }
2624
2625 for (j = 0; j < nr_ids; j++) {
2626 bool skip_tc = false;
2627
2628 tci = j * num_tc + tc;
2629 if (netif_attr_test_mask(j, mask, nr_ids) &&
2630 netif_attr_test_online(j, online_mask, nr_ids)) {
2631
2632 int pos = 0;
2633
2634 skip_tc = true;
2635
2636 map = xmap_dereference(new_dev_maps->attr_map[tci]);
2637 while ((pos < map->len) && (map->queues[pos] != index))
2638 pos++;
2639
2640 if (pos == map->len)
2641 map->queues[map->len++] = index;
2642 #ifdef CONFIG_NUMA
2643 if (type == XPS_CPUS) {
2644 if (numa_node_id == -2)
2645 numa_node_id = cpu_to_node(j);
2646 else if (numa_node_id != cpu_to_node(j))
2647 numa_node_id = -1;
2648 }
2649 #endif
2650 }
2651
2652 if (copy)
2653 xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
2654 skip_tc);
2655 }
2656
2657 rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
2658
2659
2660 if (!dev_maps)
2661 goto out_no_old_maps;
2662
2663 for (j = 0; j < dev_maps->nr_ids; j++) {
2664 for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
2665 map = xmap_dereference(dev_maps->attr_map[tci]);
2666 if (!map)
2667 continue;
2668
2669 if (copy) {
2670 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2671 if (map == new_map)
2672 continue;
2673 }
2674
2675 RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2676 kfree_rcu(map, rcu);
2677 }
2678 }
2679
2680 old_dev_maps = dev_maps;
2681
2682 out_no_old_maps:
2683 dev_maps = new_dev_maps;
2684 active = true;
2685
2686 out_no_new_maps:
2687 if (type == XPS_CPUS)
2688
2689 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2690 (numa_node_id >= 0) ?
2691 numa_node_id : NUMA_NO_NODE);
2692
2693 if (!dev_maps)
2694 goto out_no_maps;
2695
2696
2697 for (j = 0; j < dev_maps->nr_ids; j++) {
2698 tci = j * dev_maps->num_tc;
2699
2700 for (i = 0; i < dev_maps->num_tc; i++, tci++) {
2701 if (i == tc &&
2702 netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
2703 netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
2704 continue;
2705
2706 active |= remove_xps_queue(dev_maps,
2707 copy ? old_dev_maps : NULL,
2708 tci, index);
2709 }
2710 }
2711
2712 if (old_dev_maps)
2713 kfree_rcu(old_dev_maps, rcu);
2714
2715
2716 if (!active)
2717 reset_xps_maps(dev, dev_maps, type);
2718
2719 out_no_maps:
2720 mutex_unlock(&xps_map_mutex);
2721
2722 return 0;
2723 error:
2724
2725 for (j = 0; j < nr_ids; j++) {
2726 for (i = num_tc, tci = j * num_tc; i--; tci++) {
2727 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2728 map = copy ?
2729 xmap_dereference(dev_maps->attr_map[tci]) :
2730 NULL;
2731 if (new_map && new_map != map)
2732 kfree(new_map);
2733 }
2734 }
2735
2736 mutex_unlock(&xps_map_mutex);
2737
2738 kfree(new_dev_maps);
2739 return -ENOMEM;
2740 }
2741 EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
2742
2743 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2744 u16 index)
2745 {
2746 int ret;
2747
2748 cpus_read_lock();
2749 ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
2750 cpus_read_unlock();
2751
2752 return ret;
2753 }
2754 EXPORT_SYMBOL(netif_set_xps_queue);
2755
2756 #endif
2757 static void netdev_unbind_all_sb_channels(struct net_device *dev)
2758 {
2759 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2760
2761
2762 while (txq-- != &dev->_tx[0]) {
2763 if (txq->sb_dev)
2764 netdev_unbind_sb_channel(dev, txq->sb_dev);
2765 }
2766 }
2767
2768 void netdev_reset_tc(struct net_device *dev)
2769 {
2770 #ifdef CONFIG_XPS
2771 netif_reset_xps_queues_gt(dev, 0);
2772 #endif
2773 netdev_unbind_all_sb_channels(dev);
2774
2775
2776 dev->num_tc = 0;
2777 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
2778 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
2779 }
2780 EXPORT_SYMBOL(netdev_reset_tc);
2781
2782 int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
2783 {
2784 if (tc >= dev->num_tc)
2785 return -EINVAL;
2786
2787 #ifdef CONFIG_XPS
2788 netif_reset_xps_queues(dev, offset, count);
2789 #endif
2790 dev->tc_to_txq[tc].count = count;
2791 dev->tc_to_txq[tc].offset = offset;
2792 return 0;
2793 }
2794 EXPORT_SYMBOL(netdev_set_tc_queue);
2795
2796 int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2797 {
2798 if (num_tc > TC_MAX_QUEUE)
2799 return -EINVAL;
2800
2801 #ifdef CONFIG_XPS
2802 netif_reset_xps_queues_gt(dev, 0);
2803 #endif
2804 netdev_unbind_all_sb_channels(dev);
2805
2806 dev->num_tc = num_tc;
2807 return 0;
2808 }
2809 EXPORT_SYMBOL(netdev_set_num_tc);
2810
2811 void netdev_unbind_sb_channel(struct net_device *dev,
2812 struct net_device *sb_dev)
2813 {
2814 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2815
2816 #ifdef CONFIG_XPS
2817 netif_reset_xps_queues_gt(sb_dev, 0);
2818 #endif
2819 memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
2820 memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
2821
2822 while (txq-- != &dev->_tx[0]) {
2823 if (txq->sb_dev == sb_dev)
2824 txq->sb_dev = NULL;
2825 }
2826 }
2827 EXPORT_SYMBOL(netdev_unbind_sb_channel);
2828
2829 int netdev_bind_sb_channel_queue(struct net_device *dev,
2830 struct net_device *sb_dev,
2831 u8 tc, u16 count, u16 offset)
2832 {
2833
2834 if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
2835 return -EINVAL;
2836
2837
2838 if ((offset + count) > dev->real_num_tx_queues)
2839 return -EINVAL;
2840
2841
2842 sb_dev->tc_to_txq[tc].count = count;
2843 sb_dev->tc_to_txq[tc].offset = offset;
2844
2845
2846
2847
2848 while (count--)
2849 netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
2850
2851 return 0;
2852 }
2853 EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
2854
2855 int netdev_set_sb_channel(struct net_device *dev, u16 channel)
2856 {
2857
2858 if (netif_is_multiqueue(dev))
2859 return -ENODEV;
2860
2861
2862
2863
2864
2865
2866 if (channel > S16_MAX)
2867 return -EINVAL;
2868
2869 dev->num_tc = -channel;
2870
2871 return 0;
2872 }
2873 EXPORT_SYMBOL(netdev_set_sb_channel);
2874
2875
2876
2877
2878
2879 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2880 {
2881 bool disabling;
2882 int rc;
2883
2884 disabling = txq < dev->real_num_tx_queues;
2885
2886 if (txq < 1 || txq > dev->num_tx_queues)
2887 return -EINVAL;
2888
2889 if (dev->reg_state == NETREG_REGISTERED ||
2890 dev->reg_state == NETREG_UNREGISTERING) {
2891 ASSERT_RTNL();
2892
2893 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2894 txq);
2895 if (rc)
2896 return rc;
2897
2898 if (dev->num_tc)
2899 netif_setup_tc(dev, txq);
2900
2901 dev_qdisc_change_real_num_tx(dev, txq);
2902
2903 dev->real_num_tx_queues = txq;
2904
2905 if (disabling) {
2906 synchronize_net();
2907 qdisc_reset_all_tx_gt(dev, txq);
2908 #ifdef CONFIG_XPS
2909 netif_reset_xps_queues_gt(dev, txq);
2910 #endif
2911 }
2912 } else {
2913 dev->real_num_tx_queues = txq;
2914 }
2915
2916 return 0;
2917 }
2918 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2919
2920 #ifdef CONFIG_SYSFS
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2932 {
2933 int rc;
2934
2935 if (rxq < 1 || rxq > dev->num_rx_queues)
2936 return -EINVAL;
2937
2938 if (dev->reg_state == NETREG_REGISTERED) {
2939 ASSERT_RTNL();
2940
2941 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2942 rxq);
2943 if (rc)
2944 return rc;
2945 }
2946
2947 dev->real_num_rx_queues = rxq;
2948 return 0;
2949 }
2950 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2951 #endif
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962 int netif_set_real_num_queues(struct net_device *dev,
2963 unsigned int txq, unsigned int rxq)
2964 {
2965 unsigned int old_rxq = dev->real_num_rx_queues;
2966 int err;
2967
2968 if (txq < 1 || txq > dev->num_tx_queues ||
2969 rxq < 1 || rxq > dev->num_rx_queues)
2970 return -EINVAL;
2971
2972
2973
2974
2975 if (rxq > dev->real_num_rx_queues) {
2976 err = netif_set_real_num_rx_queues(dev, rxq);
2977 if (err)
2978 return err;
2979 }
2980 if (txq > dev->real_num_tx_queues) {
2981 err = netif_set_real_num_tx_queues(dev, txq);
2982 if (err)
2983 goto undo_rx;
2984 }
2985 if (rxq < dev->real_num_rx_queues)
2986 WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
2987 if (txq < dev->real_num_tx_queues)
2988 WARN_ON(netif_set_real_num_tx_queues(dev, txq));
2989
2990 return 0;
2991 undo_rx:
2992 WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
2993 return err;
2994 }
2995 EXPORT_SYMBOL(netif_set_real_num_queues);
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006 void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
3007 {
3008 dev->tso_max_size = min(GSO_MAX_SIZE, size);
3009 if (size < READ_ONCE(dev->gso_max_size))
3010 netif_set_gso_max_size(dev, size);
3011 }
3012 EXPORT_SYMBOL(netif_set_tso_max_size);
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023 void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
3024 {
3025 dev->tso_max_segs = segs;
3026 if (segs < READ_ONCE(dev->gso_max_segs))
3027 netif_set_gso_max_segs(dev, segs);
3028 }
3029 EXPORT_SYMBOL(netif_set_tso_max_segs);
3030
3031
3032
3033
3034
3035
3036 void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
3037 {
3038 netif_set_tso_max_size(to, from->tso_max_size);
3039 netif_set_tso_max_segs(to, from->tso_max_segs);
3040 }
3041 EXPORT_SYMBOL(netif_inherit_tso_max);
3042
3043
3044
3045
3046
3047
3048
3049 int netif_get_num_default_rss_queues(void)
3050 {
3051 cpumask_var_t cpus;
3052 int cpu, count = 0;
3053
3054 if (unlikely(is_kdump_kernel() || !zalloc_cpumask_var(&cpus, GFP_KERNEL)))
3055 return 1;
3056
3057 cpumask_copy(cpus, cpu_online_mask);
3058 for_each_cpu(cpu, cpus) {
3059 ++count;
3060 cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
3061 }
3062 free_cpumask_var(cpus);
3063
3064 return count > 2 ? DIV_ROUND_UP(count, 2) : count;
3065 }
3066 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
3067
3068 static void __netif_reschedule(struct Qdisc *q)
3069 {
3070 struct softnet_data *sd;
3071 unsigned long flags;
3072
3073 local_irq_save(flags);
3074 sd = this_cpu_ptr(&softnet_data);
3075 q->next_sched = NULL;
3076 *sd->output_queue_tailp = q;
3077 sd->output_queue_tailp = &q->next_sched;
3078 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3079 local_irq_restore(flags);
3080 }
3081
3082 void __netif_schedule(struct Qdisc *q)
3083 {
3084 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
3085 __netif_reschedule(q);
3086 }
3087 EXPORT_SYMBOL(__netif_schedule);
3088
3089 struct dev_kfree_skb_cb {
3090 enum skb_free_reason reason;
3091 };
3092
3093 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
3094 {
3095 return (struct dev_kfree_skb_cb *)skb->cb;
3096 }
3097
3098 void netif_schedule_queue(struct netdev_queue *txq)
3099 {
3100 rcu_read_lock();
3101 if (!netif_xmit_stopped(txq)) {
3102 struct Qdisc *q = rcu_dereference(txq->qdisc);
3103
3104 __netif_schedule(q);
3105 }
3106 rcu_read_unlock();
3107 }
3108 EXPORT_SYMBOL(netif_schedule_queue);
3109
3110 void netif_tx_wake_queue(struct netdev_queue *dev_queue)
3111 {
3112 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
3113 struct Qdisc *q;
3114
3115 rcu_read_lock();
3116 q = rcu_dereference(dev_queue->qdisc);
3117 __netif_schedule(q);
3118 rcu_read_unlock();
3119 }
3120 }
3121 EXPORT_SYMBOL(netif_tx_wake_queue);
3122
3123 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
3124 {
3125 unsigned long flags;
3126
3127 if (unlikely(!skb))
3128 return;
3129
3130 if (likely(refcount_read(&skb->users) == 1)) {
3131 smp_rmb();
3132 refcount_set(&skb->users, 0);
3133 } else if (likely(!refcount_dec_and_test(&skb->users))) {
3134 return;
3135 }
3136 get_kfree_skb_cb(skb)->reason = reason;
3137 local_irq_save(flags);
3138 skb->next = __this_cpu_read(softnet_data.completion_queue);
3139 __this_cpu_write(softnet_data.completion_queue, skb);
3140 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3141 local_irq_restore(flags);
3142 }
3143 EXPORT_SYMBOL(__dev_kfree_skb_irq);
3144
3145 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
3146 {
3147 if (in_hardirq() || irqs_disabled())
3148 __dev_kfree_skb_irq(skb, reason);
3149 else
3150 dev_kfree_skb(skb);
3151 }
3152 EXPORT_SYMBOL(__dev_kfree_skb_any);
3153
3154
3155
3156
3157
3158
3159
3160
3161 void netif_device_detach(struct net_device *dev)
3162 {
3163 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
3164 netif_running(dev)) {
3165 netif_tx_stop_all_queues(dev);
3166 }
3167 }
3168 EXPORT_SYMBOL(netif_device_detach);
3169
3170
3171
3172
3173
3174
3175
3176 void netif_device_attach(struct net_device *dev)
3177 {
3178 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
3179 netif_running(dev)) {
3180 netif_tx_wake_all_queues(dev);
3181 __netdev_watchdog_up(dev);
3182 }
3183 }
3184 EXPORT_SYMBOL(netif_device_attach);
3185
3186
3187
3188
3189
3190 static u16 skb_tx_hash(const struct net_device *dev,
3191 const struct net_device *sb_dev,
3192 struct sk_buff *skb)
3193 {
3194 u32 hash;
3195 u16 qoffset = 0;
3196 u16 qcount = dev->real_num_tx_queues;
3197
3198 if (dev->num_tc) {
3199 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
3200
3201 qoffset = sb_dev->tc_to_txq[tc].offset;
3202 qcount = sb_dev->tc_to_txq[tc].count;
3203 if (unlikely(!qcount)) {
3204 net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
3205 sb_dev->name, qoffset, tc);
3206 qoffset = 0;
3207 qcount = dev->real_num_tx_queues;
3208 }
3209 }
3210
3211 if (skb_rx_queue_recorded(skb)) {
3212 hash = skb_get_rx_queue(skb);
3213 if (hash >= qoffset)
3214 hash -= qoffset;
3215 while (unlikely(hash >= qcount))
3216 hash -= qcount;
3217 return hash + qoffset;
3218 }
3219
3220 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
3221 }
3222
3223 static void skb_warn_bad_offload(const struct sk_buff *skb)
3224 {
3225 static const netdev_features_t null_features;
3226 struct net_device *dev = skb->dev;
3227 const char *name = "";
3228
3229 if (!net_ratelimit())
3230 return;
3231
3232 if (dev) {
3233 if (dev->dev.parent)
3234 name = dev_driver_string(dev->dev.parent);
3235 else
3236 name = netdev_name(dev);
3237 }
3238 skb_dump(KERN_WARNING, skb, false);
3239 WARN(1, "%s: caps=(%pNF, %pNF)\n",
3240 name, dev ? &dev->features : &null_features,
3241 skb->sk ? &skb->sk->sk_route_caps : &null_features);
3242 }
3243
3244
3245
3246
3247
3248 int skb_checksum_help(struct sk_buff *skb)
3249 {
3250 __wsum csum;
3251 int ret = 0, offset;
3252
3253 if (skb->ip_summed == CHECKSUM_COMPLETE)
3254 goto out_set_summed;
3255
3256 if (unlikely(skb_is_gso(skb))) {
3257 skb_warn_bad_offload(skb);
3258 return -EINVAL;
3259 }
3260
3261
3262
3263
3264 if (skb_has_shared_frag(skb)) {
3265 ret = __skb_linearize(skb);
3266 if (ret)
3267 goto out;
3268 }
3269
3270 offset = skb_checksum_start_offset(skb);
3271 ret = -EINVAL;
3272 if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
3273 DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
3274 goto out;
3275 }
3276 csum = skb_checksum(skb, offset, skb->len - offset, 0);
3277
3278 offset += skb->csum_offset;
3279 if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
3280 DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
3281 goto out;
3282 }
3283 ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
3284 if (ret)
3285 goto out;
3286
3287 *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
3288 out_set_summed:
3289 skb->ip_summed = CHECKSUM_NONE;
3290 out:
3291 return ret;
3292 }
3293 EXPORT_SYMBOL(skb_checksum_help);
3294
3295 int skb_crc32c_csum_help(struct sk_buff *skb)
3296 {
3297 __le32 crc32c_csum;
3298 int ret = 0, offset, start;
3299
3300 if (skb->ip_summed != CHECKSUM_PARTIAL)
3301 goto out;
3302
3303 if (unlikely(skb_is_gso(skb)))
3304 goto out;
3305
3306
3307
3308
3309 if (unlikely(skb_has_shared_frag(skb))) {
3310 ret = __skb_linearize(skb);
3311 if (ret)
3312 goto out;
3313 }
3314 start = skb_checksum_start_offset(skb);
3315 offset = start + offsetof(struct sctphdr, checksum);
3316 if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
3317 ret = -EINVAL;
3318 goto out;
3319 }
3320
3321 ret = skb_ensure_writable(skb, offset + sizeof(__le32));
3322 if (ret)
3323 goto out;
3324
3325 crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
3326 skb->len - start, ~(__u32)0,
3327 crc32c_csum_stub));
3328 *(__le32 *)(skb->data + offset) = crc32c_csum;
3329 skb->ip_summed = CHECKSUM_NONE;
3330 skb->csum_not_inet = 0;
3331 out:
3332 return ret;
3333 }
3334
3335 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
3336 {
3337 __be16 type = skb->protocol;
3338
3339
3340 if (type == htons(ETH_P_TEB)) {
3341 struct ethhdr *eth;
3342
3343 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
3344 return 0;
3345
3346 eth = (struct ethhdr *)skb->data;
3347 type = eth->h_proto;
3348 }
3349
3350 return __vlan_get_protocol(skb, type, depth);
3351 }
3352
3353
3354
3355 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
3356 {
3357 if (tx_path)
3358 return skb->ip_summed != CHECKSUM_PARTIAL &&
3359 skb->ip_summed != CHECKSUM_UNNECESSARY;
3360
3361 return skb->ip_summed == CHECKSUM_NONE;
3362 }
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
3378 netdev_features_t features, bool tx_path)
3379 {
3380 struct sk_buff *segs;
3381
3382 if (unlikely(skb_needs_check(skb, tx_path))) {
3383 int err;
3384
3385
3386 err = skb_cow_head(skb, 0);
3387 if (err < 0)
3388 return ERR_PTR(err);
3389 }
3390
3391
3392
3393
3394
3395 if (features & NETIF_F_GSO_PARTIAL) {
3396 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
3397 struct net_device *dev = skb->dev;
3398
3399 partial_features |= dev->features & dev->gso_partial_features;
3400 if (!skb_gso_ok(skb, features | partial_features))
3401 features &= ~NETIF_F_GSO_PARTIAL;
3402 }
3403
3404 BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
3405 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
3406
3407 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
3408 SKB_GSO_CB(skb)->encap_level = 0;
3409
3410 skb_reset_mac_header(skb);
3411 skb_reset_mac_len(skb);
3412
3413 segs = skb_mac_gso_segment(skb, features);
3414
3415 if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
3416 skb_warn_bad_offload(skb);
3417
3418 return segs;
3419 }
3420 EXPORT_SYMBOL(__skb_gso_segment);
3421
3422
3423 #ifdef CONFIG_BUG
3424 static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
3425 {
3426 netdev_err(dev, "hw csum failure\n");
3427 skb_dump(KERN_ERR, skb, true);
3428 dump_stack();
3429 }
3430
3431 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
3432 {
3433 DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
3434 }
3435 EXPORT_SYMBOL(netdev_rx_csum_fault);
3436 #endif
3437
3438
3439 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
3440 {
3441 #ifdef CONFIG_HIGHMEM
3442 int i;
3443
3444 if (!(dev->features & NETIF_F_HIGHDMA)) {
3445 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
3446 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3447
3448 if (PageHighMem(skb_frag_page(frag)))
3449 return 1;
3450 }
3451 }
3452 #endif
3453 return 0;
3454 }
3455
3456
3457
3458
3459 #if IS_ENABLED(CONFIG_NET_MPLS_GSO)
3460 static netdev_features_t net_mpls_features(struct sk_buff *skb,
3461 netdev_features_t features,
3462 __be16 type)
3463 {
3464 if (eth_p_mpls(type))
3465 features &= skb->dev->mpls_features;
3466
3467 return features;
3468 }
3469 #else
3470 static netdev_features_t net_mpls_features(struct sk_buff *skb,
3471 netdev_features_t features,
3472 __be16 type)
3473 {
3474 return features;
3475 }
3476 #endif
3477
3478 static netdev_features_t harmonize_features(struct sk_buff *skb,
3479 netdev_features_t features)
3480 {
3481 __be16 type;
3482
3483 type = skb_network_protocol(skb, NULL);
3484 features = net_mpls_features(skb, features, type);
3485
3486 if (skb->ip_summed != CHECKSUM_NONE &&
3487 !can_checksum_protocol(features, type)) {
3488 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3489 }
3490 if (illegal_highdma(skb->dev, skb))
3491 features &= ~NETIF_F_SG;
3492
3493 return features;
3494 }
3495
3496 netdev_features_t passthru_features_check(struct sk_buff *skb,
3497 struct net_device *dev,
3498 netdev_features_t features)
3499 {
3500 return features;
3501 }
3502 EXPORT_SYMBOL(passthru_features_check);
3503
3504 static netdev_features_t dflt_features_check(struct sk_buff *skb,
3505 struct net_device *dev,
3506 netdev_features_t features)
3507 {
3508 return vlan_features_check(skb, features);
3509 }
3510
3511 static netdev_features_t gso_features_check(const struct sk_buff *skb,
3512 struct net_device *dev,
3513 netdev_features_t features)
3514 {
3515 u16 gso_segs = skb_shinfo(skb)->gso_segs;
3516
3517 if (gso_segs > READ_ONCE(dev->gso_max_segs))
3518 return features & ~NETIF_F_GSO_MASK;
3519
3520 if (!skb_shinfo(skb)->gso_type) {
3521 skb_warn_bad_offload(skb);
3522 return features & ~NETIF_F_GSO_MASK;
3523 }
3524
3525
3526
3527
3528
3529
3530
3531 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
3532 features &= ~dev->gso_partial_features;
3533
3534
3535
3536
3537 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
3538 struct iphdr *iph = skb->encapsulation ?
3539 inner_ip_hdr(skb) : ip_hdr(skb);
3540
3541 if (!(iph->frag_off & htons(IP_DF)))
3542 features &= ~NETIF_F_TSO_MANGLEID;
3543 }
3544
3545 return features;
3546 }
3547
3548 netdev_features_t netif_skb_features(struct sk_buff *skb)
3549 {
3550 struct net_device *dev = skb->dev;
3551 netdev_features_t features = dev->features;
3552
3553 if (skb_is_gso(skb))
3554 features = gso_features_check(skb, dev, features);
3555
3556
3557
3558
3559
3560 if (skb->encapsulation)
3561 features &= dev->hw_enc_features;
3562
3563 if (skb_vlan_tagged(skb))
3564 features = netdev_intersect_features(features,
3565 dev->vlan_features |
3566 NETIF_F_HW_VLAN_CTAG_TX |
3567 NETIF_F_HW_VLAN_STAG_TX);
3568
3569 if (dev->netdev_ops->ndo_features_check)
3570 features &= dev->netdev_ops->ndo_features_check(skb, dev,
3571 features);
3572 else
3573 features &= dflt_features_check(skb, dev, features);
3574
3575 return harmonize_features(skb, features);
3576 }
3577 EXPORT_SYMBOL(netif_skb_features);
3578
3579 static int xmit_one(struct sk_buff *skb, struct net_device *dev,
3580 struct netdev_queue *txq, bool more)
3581 {
3582 unsigned int len;
3583 int rc;
3584
3585 if (dev_nit_active(dev))
3586 dev_queue_xmit_nit(skb, dev);
3587
3588 len = skb->len;
3589 trace_net_dev_start_xmit(skb, dev);
3590 rc = netdev_start_xmit(skb, dev, txq, more);
3591 trace_net_dev_xmit(skb, rc, dev, len);
3592
3593 return rc;
3594 }
3595
3596 struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
3597 struct netdev_queue *txq, int *ret)
3598 {
3599 struct sk_buff *skb = first;
3600 int rc = NETDEV_TX_OK;
3601
3602 while (skb) {
3603 struct sk_buff *next = skb->next;
3604
3605 skb_mark_not_on_list(skb);
3606 rc = xmit_one(skb, dev, txq, next != NULL);
3607 if (unlikely(!dev_xmit_complete(rc))) {
3608 skb->next = next;
3609 goto out;
3610 }
3611
3612 skb = next;
3613 if (netif_tx_queue_stopped(txq) && skb) {
3614 rc = NETDEV_TX_BUSY;
3615 break;
3616 }
3617 }
3618
3619 out:
3620 *ret = rc;
3621 return skb;
3622 }
3623
3624 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
3625 netdev_features_t features)
3626 {
3627 if (skb_vlan_tag_present(skb) &&
3628 !vlan_hw_offload_capable(features, skb->vlan_proto))
3629 skb = __vlan_hwaccel_push_inside(skb);
3630 return skb;
3631 }
3632
3633 int skb_csum_hwoffload_help(struct sk_buff *skb,
3634 const netdev_features_t features)
3635 {
3636 if (unlikely(skb_csum_is_sctp(skb)))
3637 return !!(features & NETIF_F_SCTP_CRC) ? 0 :
3638 skb_crc32c_csum_help(skb);
3639
3640 if (features & NETIF_F_HW_CSUM)
3641 return 0;
3642
3643 if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
3644 switch (skb->csum_offset) {
3645 case offsetof(struct tcphdr, check):
3646 case offsetof(struct udphdr, check):
3647 return 0;
3648 }
3649 }
3650
3651 return skb_checksum_help(skb);
3652 }
3653 EXPORT_SYMBOL(skb_csum_hwoffload_help);
3654
3655 static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
3656 {
3657 netdev_features_t features;
3658
3659 features = netif_skb_features(skb);
3660 skb = validate_xmit_vlan(skb, features);
3661 if (unlikely(!skb))
3662 goto out_null;
3663
3664 skb = sk_validate_xmit_skb(skb, dev);
3665 if (unlikely(!skb))
3666 goto out_null;
3667
3668 if (netif_needs_gso(skb, features)) {
3669 struct sk_buff *segs;
3670
3671 segs = skb_gso_segment(skb, features);
3672 if (IS_ERR(segs)) {
3673 goto out_kfree_skb;
3674 } else if (segs) {
3675 consume_skb(skb);
3676 skb = segs;
3677 }
3678 } else {
3679 if (skb_needs_linearize(skb, features) &&
3680 __skb_linearize(skb))
3681 goto out_kfree_skb;
3682
3683
3684
3685
3686
3687 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3688 if (skb->encapsulation)
3689 skb_set_inner_transport_header(skb,
3690 skb_checksum_start_offset(skb));
3691 else
3692 skb_set_transport_header(skb,
3693 skb_checksum_start_offset(skb));
3694 if (skb_csum_hwoffload_help(skb, features))
3695 goto out_kfree_skb;
3696 }
3697 }
3698
3699 skb = validate_xmit_xfrm(skb, features, again);
3700
3701 return skb;
3702
3703 out_kfree_skb:
3704 kfree_skb(skb);
3705 out_null:
3706 dev_core_stats_tx_dropped_inc(dev);
3707 return NULL;
3708 }
3709
3710 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
3711 {
3712 struct sk_buff *next, *head = NULL, *tail;
3713
3714 for (; skb != NULL; skb = next) {
3715 next = skb->next;
3716 skb_mark_not_on_list(skb);
3717
3718
3719 skb->prev = skb;
3720
3721 skb = validate_xmit_skb(skb, dev, again);
3722 if (!skb)
3723 continue;
3724
3725 if (!head)
3726 head = skb;
3727 else
3728 tail->next = skb;
3729
3730
3731
3732 tail = skb->prev;
3733 }
3734 return head;
3735 }
3736 EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
3737
3738 static void qdisc_pkt_len_init(struct sk_buff *skb)
3739 {
3740 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3741
3742 qdisc_skb_cb(skb)->pkt_len = skb->len;
3743
3744
3745
3746
3747 if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
3748 unsigned int hdr_len;
3749 u16 gso_segs = shinfo->gso_segs;
3750
3751
3752 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3753
3754
3755 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
3756 const struct tcphdr *th;
3757 struct tcphdr _tcphdr;
3758
3759 th = skb_header_pointer(skb, skb_transport_offset(skb),
3760 sizeof(_tcphdr), &_tcphdr);
3761 if (likely(th))
3762 hdr_len += __tcp_hdrlen(th);
3763 } else {
3764 struct udphdr _udphdr;
3765
3766 if (skb_header_pointer(skb, skb_transport_offset(skb),
3767 sizeof(_udphdr), &_udphdr))
3768 hdr_len += sizeof(struct udphdr);
3769 }
3770
3771 if (shinfo->gso_type & SKB_GSO_DODGY)
3772 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3773 shinfo->gso_size);
3774
3775 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3776 }
3777 }
3778
3779 static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
3780 struct sk_buff **to_free,
3781 struct netdev_queue *txq)
3782 {
3783 int rc;
3784
3785 rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
3786 if (rc == NET_XMIT_SUCCESS)
3787 trace_qdisc_enqueue(q, txq, skb);
3788 return rc;
3789 }
3790
3791 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3792 struct net_device *dev,
3793 struct netdev_queue *txq)
3794 {
3795 spinlock_t *root_lock = qdisc_lock(q);
3796 struct sk_buff *to_free = NULL;
3797 bool contended;
3798 int rc;
3799
3800 qdisc_calculate_pkt_len(skb, q);
3801
3802 if (q->flags & TCQ_F_NOLOCK) {
3803 if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
3804 qdisc_run_begin(q)) {
3805
3806
3807
3808 if (unlikely(!nolock_qdisc_is_empty(q))) {
3809 rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
3810 __qdisc_run(q);
3811 qdisc_run_end(q);
3812
3813 goto no_lock_out;
3814 }
3815
3816 qdisc_bstats_cpu_update(q, skb);
3817 if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
3818 !nolock_qdisc_is_empty(q))
3819 __qdisc_run(q);
3820
3821 qdisc_run_end(q);
3822 return NET_XMIT_SUCCESS;
3823 }
3824
3825 rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
3826 qdisc_run(q);
3827
3828 no_lock_out:
3829 if (unlikely(to_free))
3830 kfree_skb_list_reason(to_free,
3831 SKB_DROP_REASON_QDISC_DROP);
3832 return rc;
3833 }
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845 contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT);
3846 if (unlikely(contended))
3847 spin_lock(&q->busylock);
3848
3849 spin_lock(root_lock);
3850 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3851 __qdisc_drop(skb, &to_free);
3852 rc = NET_XMIT_DROP;
3853 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3854 qdisc_run_begin(q)) {
3855
3856
3857
3858
3859
3860
3861 qdisc_bstats_update(q, skb);
3862
3863 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3864 if (unlikely(contended)) {
3865 spin_unlock(&q->busylock);
3866 contended = false;
3867 }
3868 __qdisc_run(q);
3869 }
3870
3871 qdisc_run_end(q);
3872 rc = NET_XMIT_SUCCESS;
3873 } else {
3874 rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
3875 if (qdisc_run_begin(q)) {
3876 if (unlikely(contended)) {
3877 spin_unlock(&q->busylock);
3878 contended = false;
3879 }
3880 __qdisc_run(q);
3881 qdisc_run_end(q);
3882 }
3883 }
3884 spin_unlock(root_lock);
3885 if (unlikely(to_free))
3886 kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
3887 if (unlikely(contended))
3888 spin_unlock(&q->busylock);
3889 return rc;
3890 }
3891
3892 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3893 static void skb_update_prio(struct sk_buff *skb)
3894 {
3895 const struct netprio_map *map;
3896 const struct sock *sk;
3897 unsigned int prioidx;
3898
3899 if (skb->priority)
3900 return;
3901 map = rcu_dereference_bh(skb->dev->priomap);
3902 if (!map)
3903 return;
3904 sk = skb_to_full_sk(skb);
3905 if (!sk)
3906 return;
3907
3908 prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
3909
3910 if (prioidx < map->priomap_len)
3911 skb->priority = map->priomap[prioidx];
3912 }
3913 #else
3914 #define skb_update_prio(skb)
3915 #endif
3916
3917
3918
3919
3920
3921
3922
3923 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3924 {
3925 skb_reset_mac_header(skb);
3926 __skb_pull(skb, skb_network_offset(skb));
3927 skb->pkt_type = PACKET_LOOPBACK;
3928 if (skb->ip_summed == CHECKSUM_NONE)
3929 skb->ip_summed = CHECKSUM_UNNECESSARY;
3930 DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb));
3931 skb_dst_force(skb);
3932 netif_rx(skb);
3933 return 0;
3934 }
3935 EXPORT_SYMBOL(dev_loopback_xmit);
3936
3937 #ifdef CONFIG_NET_EGRESS
3938 static struct sk_buff *
3939 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3940 {
3941 #ifdef CONFIG_NET_CLS_ACT
3942 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
3943 struct tcf_result cl_res;
3944
3945 if (!miniq)
3946 return skb;
3947
3948
3949 tc_skb_cb(skb)->mru = 0;
3950 tc_skb_cb(skb)->post_ct = false;
3951 mini_qdisc_bstats_cpu_update(miniq, skb);
3952
3953 switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
3954 case TC_ACT_OK:
3955 case TC_ACT_RECLASSIFY:
3956 skb->tc_index = TC_H_MIN(cl_res.classid);
3957 break;
3958 case TC_ACT_SHOT:
3959 mini_qdisc_qstats_cpu_drop(miniq);
3960 *ret = NET_XMIT_DROP;
3961 kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
3962 return NULL;
3963 case TC_ACT_STOLEN:
3964 case TC_ACT_QUEUED:
3965 case TC_ACT_TRAP:
3966 *ret = NET_XMIT_SUCCESS;
3967 consume_skb(skb);
3968 return NULL;
3969 case TC_ACT_REDIRECT:
3970
3971 skb_do_redirect(skb);
3972 *ret = NET_XMIT_SUCCESS;
3973 return NULL;
3974 default:
3975 break;
3976 }
3977 #endif
3978
3979 return skb;
3980 }
3981
3982 static struct netdev_queue *
3983 netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
3984 {
3985 int qm = skb_get_queue_mapping(skb);
3986
3987 return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
3988 }
3989
3990 static bool netdev_xmit_txqueue_skipped(void)
3991 {
3992 return __this_cpu_read(softnet_data.xmit.skip_txqueue);
3993 }
3994
3995 void netdev_xmit_skip_txqueue(bool skip)
3996 {
3997 __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
3998 }
3999 EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
4000 #endif
4001
4002 #ifdef CONFIG_XPS
4003 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
4004 struct xps_dev_maps *dev_maps, unsigned int tci)
4005 {
4006 int tc = netdev_get_prio_tc_map(dev, skb->priority);
4007 struct xps_map *map;
4008 int queue_index = -1;
4009
4010 if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
4011 return queue_index;
4012
4013 tci *= dev_maps->num_tc;
4014 tci += tc;
4015
4016 map = rcu_dereference(dev_maps->attr_map[tci]);
4017 if (map) {
4018 if (map->len == 1)
4019 queue_index = map->queues[0];
4020 else
4021 queue_index = map->queues[reciprocal_scale(
4022 skb_get_hash(skb), map->len)];
4023 if (unlikely(queue_index >= dev->real_num_tx_queues))
4024 queue_index = -1;
4025 }
4026 return queue_index;
4027 }
4028 #endif
4029
4030 static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
4031 struct sk_buff *skb)
4032 {
4033 #ifdef CONFIG_XPS
4034 struct xps_dev_maps *dev_maps;
4035 struct sock *sk = skb->sk;
4036 int queue_index = -1;
4037
4038 if (!static_key_false(&xps_needed))
4039 return -1;
4040
4041 rcu_read_lock();
4042 if (!static_key_false(&xps_rxqs_needed))
4043 goto get_cpus_map;
4044
4045 dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
4046 if (dev_maps) {
4047 int tci = sk_rx_queue_get(sk);
4048
4049 if (tci >= 0)
4050 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
4051 tci);
4052 }
4053
4054 get_cpus_map:
4055 if (queue_index < 0) {
4056 dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
4057 if (dev_maps) {
4058 unsigned int tci = skb->sender_cpu - 1;
4059
4060 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
4061 tci);
4062 }
4063 }
4064 rcu_read_unlock();
4065
4066 return queue_index;
4067 #else
4068 return -1;
4069 #endif
4070 }
4071
4072 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
4073 struct net_device *sb_dev)
4074 {
4075 return 0;
4076 }
4077 EXPORT_SYMBOL(dev_pick_tx_zero);
4078
4079 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
4080 struct net_device *sb_dev)
4081 {
4082 return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
4083 }
4084 EXPORT_SYMBOL(dev_pick_tx_cpu_id);
4085
4086 u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
4087 struct net_device *sb_dev)
4088 {
4089 struct sock *sk = skb->sk;
4090 int queue_index = sk_tx_queue_get(sk);
4091
4092 sb_dev = sb_dev ? : dev;
4093
4094 if (queue_index < 0 || skb->ooo_okay ||
4095 queue_index >= dev->real_num_tx_queues) {
4096 int new_index = get_xps_queue(dev, sb_dev, skb);
4097
4098 if (new_index < 0)
4099 new_index = skb_tx_hash(dev, sb_dev, skb);
4100
4101 if (queue_index != new_index && sk &&
4102 sk_fullsock(sk) &&
4103 rcu_access_pointer(sk->sk_dst_cache))
4104 sk_tx_queue_set(sk, new_index);
4105
4106 queue_index = new_index;
4107 }
4108
4109 return queue_index;
4110 }
4111 EXPORT_SYMBOL(netdev_pick_tx);
4112
4113 struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
4114 struct sk_buff *skb,
4115 struct net_device *sb_dev)
4116 {
4117 int queue_index = 0;
4118
4119 #ifdef CONFIG_XPS
4120 u32 sender_cpu = skb->sender_cpu - 1;
4121
4122 if (sender_cpu >= (u32)NR_CPUS)
4123 skb->sender_cpu = raw_smp_processor_id() + 1;
4124 #endif
4125
4126 if (dev->real_num_tx_queues != 1) {
4127 const struct net_device_ops *ops = dev->netdev_ops;
4128
4129 if (ops->ndo_select_queue)
4130 queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
4131 else
4132 queue_index = netdev_pick_tx(dev, skb, sb_dev);
4133
4134 queue_index = netdev_cap_txqueue(dev, queue_index);
4135 }
4136
4137 skb_set_queue_mapping(skb, queue_index);
4138 return netdev_get_tx_queue(dev, queue_index);
4139 }
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162 int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
4163 {
4164 struct net_device *dev = skb->dev;
4165 struct netdev_queue *txq = NULL;
4166 struct Qdisc *q;
4167 int rc = -ENOMEM;
4168 bool again = false;
4169
4170 skb_reset_mac_header(skb);
4171 skb_assert_len(skb);
4172
4173 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
4174 __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
4175
4176
4177
4178
4179 rcu_read_lock_bh();
4180
4181 skb_update_prio(skb);
4182
4183 qdisc_pkt_len_init(skb);
4184 #ifdef CONFIG_NET_CLS_ACT
4185 skb->tc_at_ingress = 0;
4186 #endif
4187 #ifdef CONFIG_NET_EGRESS
4188 if (static_branch_unlikely(&egress_needed_key)) {
4189 if (nf_hook_egress_active()) {
4190 skb = nf_hook_egress(skb, &rc, dev);
4191 if (!skb)
4192 goto out;
4193 }
4194
4195 netdev_xmit_skip_txqueue(false);
4196
4197 nf_skip_egress(skb, true);
4198 skb = sch_handle_egress(skb, &rc, dev);
4199 if (!skb)
4200 goto out;
4201 nf_skip_egress(skb, false);
4202
4203 if (netdev_xmit_txqueue_skipped())
4204 txq = netdev_tx_queue_mapping(dev, skb);
4205 }
4206 #endif
4207
4208
4209
4210 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
4211 skb_dst_drop(skb);
4212 else
4213 skb_dst_force(skb);
4214
4215 if (!txq)
4216 txq = netdev_core_pick_tx(dev, skb, sb_dev);
4217
4218 q = rcu_dereference_bh(txq->qdisc);
4219
4220 trace_net_dev_queue(skb);
4221 if (q->enqueue) {
4222 rc = __dev_xmit_skb(skb, q, dev, txq);
4223 goto out;
4224 }
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238 if (dev->flags & IFF_UP) {
4239 int cpu = smp_processor_id();
4240
4241
4242
4243
4244 if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
4245 if (dev_xmit_recursion())
4246 goto recursion_alert;
4247
4248 skb = validate_xmit_skb(skb, dev, &again);
4249 if (!skb)
4250 goto out;
4251
4252 HARD_TX_LOCK(dev, txq, cpu);
4253
4254 if (!netif_xmit_stopped(txq)) {
4255 dev_xmit_recursion_inc();
4256 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
4257 dev_xmit_recursion_dec();
4258 if (dev_xmit_complete(rc)) {
4259 HARD_TX_UNLOCK(dev, txq);
4260 goto out;
4261 }
4262 }
4263 HARD_TX_UNLOCK(dev, txq);
4264 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
4265 dev->name);
4266 } else {
4267
4268
4269
4270 recursion_alert:
4271 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
4272 dev->name);
4273 }
4274 }
4275
4276 rc = -ENETDOWN;
4277 rcu_read_unlock_bh();
4278
4279 dev_core_stats_tx_dropped_inc(dev);
4280 kfree_skb_list(skb);
4281 return rc;
4282 out:
4283 rcu_read_unlock_bh();
4284 return rc;
4285 }
4286 EXPORT_SYMBOL(__dev_queue_xmit);
4287
4288 int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
4289 {
4290 struct net_device *dev = skb->dev;
4291 struct sk_buff *orig_skb = skb;
4292 struct netdev_queue *txq;
4293 int ret = NETDEV_TX_BUSY;
4294 bool again = false;
4295
4296 if (unlikely(!netif_running(dev) ||
4297 !netif_carrier_ok(dev)))
4298 goto drop;
4299
4300 skb = validate_xmit_skb_list(skb, dev, &again);
4301 if (skb != orig_skb)
4302 goto drop;
4303
4304 skb_set_queue_mapping(skb, queue_id);
4305 txq = skb_get_tx_queue(dev, skb);
4306
4307 local_bh_disable();
4308
4309 dev_xmit_recursion_inc();
4310 HARD_TX_LOCK(dev, txq, smp_processor_id());
4311 if (!netif_xmit_frozen_or_drv_stopped(txq))
4312 ret = netdev_start_xmit(skb, dev, txq, false);
4313 HARD_TX_UNLOCK(dev, txq);
4314 dev_xmit_recursion_dec();
4315
4316 local_bh_enable();
4317 return ret;
4318 drop:
4319 dev_core_stats_tx_dropped_inc(dev);
4320 kfree_skb_list(skb);
4321 return NET_XMIT_DROP;
4322 }
4323 EXPORT_SYMBOL(__dev_direct_xmit);
4324
4325
4326
4327
4328
4329 int netdev_max_backlog __read_mostly = 1000;
4330 EXPORT_SYMBOL(netdev_max_backlog);
4331
4332 int netdev_tstamp_prequeue __read_mostly = 1;
4333 unsigned int sysctl_skb_defer_max __read_mostly = 64;
4334 int netdev_budget __read_mostly = 300;
4335
4336 unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
4337 int weight_p __read_mostly = 64;
4338 int dev_weight_rx_bias __read_mostly = 1;
4339 int dev_weight_tx_bias __read_mostly = 1;
4340 int dev_rx_weight __read_mostly = 64;
4341 int dev_tx_weight __read_mostly = 64;
4342
4343
4344 static inline void ____napi_schedule(struct softnet_data *sd,
4345 struct napi_struct *napi)
4346 {
4347 struct task_struct *thread;
4348
4349 lockdep_assert_irqs_disabled();
4350
4351 if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
4352
4353
4354
4355
4356
4357
4358 thread = READ_ONCE(napi->thread);
4359 if (thread) {
4360
4361
4362
4363
4364
4365 if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
4366 set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
4367 wake_up_process(thread);
4368 return;
4369 }
4370 }
4371
4372 list_add_tail(&napi->poll_list, &sd->poll_list);
4373 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4374 }
4375
4376 #ifdef CONFIG_RPS
4377
4378
4379 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
4380 EXPORT_SYMBOL(rps_sock_flow_table);
4381 u32 rps_cpu_mask __read_mostly;
4382 EXPORT_SYMBOL(rps_cpu_mask);
4383
4384 struct static_key_false rps_needed __read_mostly;
4385 EXPORT_SYMBOL(rps_needed);
4386 struct static_key_false rfs_needed __read_mostly;
4387 EXPORT_SYMBOL(rfs_needed);
4388
4389 static struct rps_dev_flow *
4390 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
4391 struct rps_dev_flow *rflow, u16 next_cpu)
4392 {
4393 if (next_cpu < nr_cpu_ids) {
4394 #ifdef CONFIG_RFS_ACCEL
4395 struct netdev_rx_queue *rxqueue;
4396 struct rps_dev_flow_table *flow_table;
4397 struct rps_dev_flow *old_rflow;
4398 u32 flow_id;
4399 u16 rxq_index;
4400 int rc;
4401
4402
4403 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
4404 !(dev->features & NETIF_F_NTUPLE))
4405 goto out;
4406 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
4407 if (rxq_index == skb_get_rx_queue(skb))
4408 goto out;
4409
4410 rxqueue = dev->_rx + rxq_index;
4411 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4412 if (!flow_table)
4413 goto out;
4414 flow_id = skb_get_hash(skb) & flow_table->mask;
4415 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
4416 rxq_index, flow_id);
4417 if (rc < 0)
4418 goto out;
4419 old_rflow = rflow;
4420 rflow = &flow_table->flows[flow_id];
4421 rflow->filter = rc;
4422 if (old_rflow->filter == rflow->filter)
4423 old_rflow->filter = RPS_NO_FILTER;
4424 out:
4425 #endif
4426 rflow->last_qtail =
4427 per_cpu(softnet_data, next_cpu).input_queue_head;
4428 }
4429
4430 rflow->cpu = next_cpu;
4431 return rflow;
4432 }
4433
4434
4435
4436
4437
4438
4439 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
4440 struct rps_dev_flow **rflowp)
4441 {
4442 const struct rps_sock_flow_table *sock_flow_table;
4443 struct netdev_rx_queue *rxqueue = dev->_rx;
4444 struct rps_dev_flow_table *flow_table;
4445 struct rps_map *map;
4446 int cpu = -1;
4447 u32 tcpu;
4448 u32 hash;
4449
4450 if (skb_rx_queue_recorded(skb)) {
4451 u16 index = skb_get_rx_queue(skb);
4452
4453 if (unlikely(index >= dev->real_num_rx_queues)) {
4454 WARN_ONCE(dev->real_num_rx_queues > 1,
4455 "%s received packet on queue %u, but number "
4456 "of RX queues is %u\n",
4457 dev->name, index, dev->real_num_rx_queues);
4458 goto done;
4459 }
4460 rxqueue += index;
4461 }
4462
4463
4464
4465 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4466 map = rcu_dereference(rxqueue->rps_map);
4467 if (!flow_table && !map)
4468 goto done;
4469
4470 skb_reset_network_header(skb);
4471 hash = skb_get_hash(skb);
4472 if (!hash)
4473 goto done;
4474
4475 sock_flow_table = rcu_dereference(rps_sock_flow_table);
4476 if (flow_table && sock_flow_table) {
4477 struct rps_dev_flow *rflow;
4478 u32 next_cpu;
4479 u32 ident;
4480
4481
4482 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
4483 if ((ident ^ hash) & ~rps_cpu_mask)
4484 goto try_rps;
4485
4486 next_cpu = ident & rps_cpu_mask;
4487
4488
4489
4490
4491 rflow = &flow_table->flows[hash & flow_table->mask];
4492 tcpu = rflow->cpu;
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505 if (unlikely(tcpu != next_cpu) &&
4506 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
4507 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
4508 rflow->last_qtail)) >= 0)) {
4509 tcpu = next_cpu;
4510 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
4511 }
4512
4513 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
4514 *rflowp = rflow;
4515 cpu = tcpu;
4516 goto done;
4517 }
4518 }
4519
4520 try_rps:
4521
4522 if (map) {
4523 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
4524 if (cpu_online(tcpu)) {
4525 cpu = tcpu;
4526 goto done;
4527 }
4528 }
4529
4530 done:
4531 return cpu;
4532 }
4533
4534 #ifdef CONFIG_RFS_ACCEL
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
4548 u32 flow_id, u16 filter_id)
4549 {
4550 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
4551 struct rps_dev_flow_table *flow_table;
4552 struct rps_dev_flow *rflow;
4553 bool expire = true;
4554 unsigned int cpu;
4555
4556 rcu_read_lock();
4557 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4558 if (flow_table && flow_id <= flow_table->mask) {
4559 rflow = &flow_table->flows[flow_id];
4560 cpu = READ_ONCE(rflow->cpu);
4561 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
4562 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
4563 rflow->last_qtail) <
4564 (int)(10 * flow_table->mask)))
4565 expire = false;
4566 }
4567 rcu_read_unlock();
4568 return expire;
4569 }
4570 EXPORT_SYMBOL(rps_may_expire_flow);
4571
4572 #endif
4573
4574
4575 static void rps_trigger_softirq(void *data)
4576 {
4577 struct softnet_data *sd = data;
4578
4579 ____napi_schedule(sd, &sd->backlog);
4580 sd->received_rps++;
4581 }
4582
4583 #endif
4584
4585
4586 static void trigger_rx_softirq(void *data)
4587 {
4588 struct softnet_data *sd = data;
4589
4590 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4591 smp_store_release(&sd->defer_ipi_scheduled, 0);
4592 }
4593
4594
4595
4596
4597
4598
4599 static int napi_schedule_rps(struct softnet_data *sd)
4600 {
4601 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
4602
4603 #ifdef CONFIG_RPS
4604 if (sd != mysd) {
4605 sd->rps_ipi_next = mysd->rps_ipi_list;
4606 mysd->rps_ipi_list = sd;
4607
4608 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4609 return 1;
4610 }
4611 #endif
4612 __napi_schedule_irqoff(&mysd->backlog);
4613 return 0;
4614 }
4615
4616 #ifdef CONFIG_NET_FLOW_LIMIT
4617 int netdev_flow_limit_table_len __read_mostly = (1 << 12);
4618 #endif
4619
4620 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
4621 {
4622 #ifdef CONFIG_NET_FLOW_LIMIT
4623 struct sd_flow_limit *fl;
4624 struct softnet_data *sd;
4625 unsigned int old_flow, new_flow;
4626
4627 if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
4628 return false;
4629
4630 sd = this_cpu_ptr(&softnet_data);
4631
4632 rcu_read_lock();
4633 fl = rcu_dereference(sd->flow_limit);
4634 if (fl) {
4635 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
4636 old_flow = fl->history[fl->history_head];
4637 fl->history[fl->history_head] = new_flow;
4638
4639 fl->history_head++;
4640 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
4641
4642 if (likely(fl->buckets[old_flow]))
4643 fl->buckets[old_flow]--;
4644
4645 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
4646 fl->count++;
4647 rcu_read_unlock();
4648 return true;
4649 }
4650 }
4651 rcu_read_unlock();
4652 #endif
4653 return false;
4654 }
4655
4656
4657
4658
4659
4660 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
4661 unsigned int *qtail)
4662 {
4663 enum skb_drop_reason reason;
4664 struct softnet_data *sd;
4665 unsigned long flags;
4666 unsigned int qlen;
4667
4668 reason = SKB_DROP_REASON_NOT_SPECIFIED;
4669 sd = &per_cpu(softnet_data, cpu);
4670
4671 rps_lock_irqsave(sd, &flags);
4672 if (!netif_running(skb->dev))
4673 goto drop;
4674 qlen = skb_queue_len(&sd->input_pkt_queue);
4675 if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
4676 if (qlen) {
4677 enqueue:
4678 __skb_queue_tail(&sd->input_pkt_queue, skb);
4679 input_queue_tail_incr_save(sd, qtail);
4680 rps_unlock_irq_restore(sd, &flags);
4681 return NET_RX_SUCCESS;
4682 }
4683
4684
4685
4686
4687 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
4688 napi_schedule_rps(sd);
4689 goto enqueue;
4690 }
4691 reason = SKB_DROP_REASON_CPU_BACKLOG;
4692
4693 drop:
4694 sd->dropped++;
4695 rps_unlock_irq_restore(sd, &flags);
4696
4697 dev_core_stats_rx_dropped_inc(skb->dev);
4698 kfree_skb_reason(skb, reason);
4699 return NET_RX_DROP;
4700 }
4701
4702 static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
4703 {
4704 struct net_device *dev = skb->dev;
4705 struct netdev_rx_queue *rxqueue;
4706
4707 rxqueue = dev->_rx;
4708
4709 if (skb_rx_queue_recorded(skb)) {
4710 u16 index = skb_get_rx_queue(skb);
4711
4712 if (unlikely(index >= dev->real_num_rx_queues)) {
4713 WARN_ONCE(dev->real_num_rx_queues > 1,
4714 "%s received packet on queue %u, but number "
4715 "of RX queues is %u\n",
4716 dev->name, index, dev->real_num_rx_queues);
4717
4718 return rxqueue;
4719 }
4720 rxqueue += index;
4721 }
4722 return rxqueue;
4723 }
4724
4725 u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
4726 struct bpf_prog *xdp_prog)
4727 {
4728 void *orig_data, *orig_data_end, *hard_start;
4729 struct netdev_rx_queue *rxqueue;
4730 bool orig_bcast, orig_host;
4731 u32 mac_len, frame_sz;
4732 __be16 orig_eth_type;
4733 struct ethhdr *eth;
4734 u32 metalen, act;
4735 int off;
4736
4737
4738
4739
4740 mac_len = skb->data - skb_mac_header(skb);
4741 hard_start = skb->data - skb_headroom(skb);
4742
4743
4744 frame_sz = (void *)skb_end_pointer(skb) - hard_start;
4745 frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
4746
4747 rxqueue = netif_get_rxqueue(skb);
4748 xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
4749 xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
4750 skb_headlen(skb) + mac_len, true);
4751
4752 orig_data_end = xdp->data_end;
4753 orig_data = xdp->data;
4754 eth = (struct ethhdr *)xdp->data;
4755 orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
4756 orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
4757 orig_eth_type = eth->h_proto;
4758
4759 act = bpf_prog_run_xdp(xdp_prog, xdp);
4760
4761
4762 off = xdp->data - orig_data;
4763 if (off) {
4764 if (off > 0)
4765 __skb_pull(skb, off);
4766 else if (off < 0)
4767 __skb_push(skb, -off);
4768
4769 skb->mac_header += off;
4770 skb_reset_network_header(skb);
4771 }
4772
4773
4774 off = xdp->data_end - orig_data_end;
4775 if (off != 0) {
4776 skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
4777 skb->len += off;
4778 }
4779
4780
4781 eth = (struct ethhdr *)xdp->data;
4782 if ((orig_eth_type != eth->h_proto) ||
4783 (orig_host != ether_addr_equal_64bits(eth->h_dest,
4784 skb->dev->dev_addr)) ||
4785 (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
4786 __skb_push(skb, ETH_HLEN);
4787 skb->pkt_type = PACKET_HOST;
4788 skb->protocol = eth_type_trans(skb, skb->dev);
4789 }
4790
4791
4792
4793
4794
4795
4796
4797
4798 switch (act) {
4799 case XDP_REDIRECT:
4800 case XDP_TX:
4801 __skb_push(skb, mac_len);
4802 break;
4803 case XDP_PASS:
4804 metalen = xdp->data - xdp->data_meta;
4805 if (metalen)
4806 skb_metadata_set(skb, metalen);
4807 break;
4808 }
4809
4810 return act;
4811 }
4812
4813 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
4814 struct xdp_buff *xdp,
4815 struct bpf_prog *xdp_prog)
4816 {
4817 u32 act = XDP_DROP;
4818
4819
4820
4821
4822 if (skb_is_redirected(skb))
4823 return XDP_PASS;
4824
4825
4826
4827
4828
4829 if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
4830 skb_headroom(skb) < XDP_PACKET_HEADROOM) {
4831 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
4832 int troom = skb->tail + skb->data_len - skb->end;
4833
4834
4835
4836
4837 if (pskb_expand_head(skb,
4838 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
4839 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
4840 goto do_drop;
4841 if (skb_linearize(skb))
4842 goto do_drop;
4843 }
4844
4845 act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
4846 switch (act) {
4847 case XDP_REDIRECT:
4848 case XDP_TX:
4849 case XDP_PASS:
4850 break;
4851 default:
4852 bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
4853 fallthrough;
4854 case XDP_ABORTED:
4855 trace_xdp_exception(skb->dev, xdp_prog, act);
4856 fallthrough;
4857 case XDP_DROP:
4858 do_drop:
4859 kfree_skb(skb);
4860 break;
4861 }
4862
4863 return act;
4864 }
4865
4866
4867
4868
4869
4870
4871
4872 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
4873 {
4874 struct net_device *dev = skb->dev;
4875 struct netdev_queue *txq;
4876 bool free_skb = true;
4877 int cpu, rc;
4878
4879 txq = netdev_core_pick_tx(dev, skb, NULL);
4880 cpu = smp_processor_id();
4881 HARD_TX_LOCK(dev, txq, cpu);
4882 if (!netif_xmit_frozen_or_drv_stopped(txq)) {
4883 rc = netdev_start_xmit(skb, dev, txq, 0);
4884 if (dev_xmit_complete(rc))
4885 free_skb = false;
4886 }
4887 HARD_TX_UNLOCK(dev, txq);
4888 if (free_skb) {
4889 trace_xdp_exception(dev, xdp_prog, XDP_TX);
4890 dev_core_stats_tx_dropped_inc(dev);
4891 kfree_skb(skb);
4892 }
4893 }
4894
4895 static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
4896
4897 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
4898 {
4899 if (xdp_prog) {
4900 struct xdp_buff xdp;
4901 u32 act;
4902 int err;
4903
4904 act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
4905 if (act != XDP_PASS) {
4906 switch (act) {
4907 case XDP_REDIRECT:
4908 err = xdp_do_generic_redirect(skb->dev, skb,
4909 &xdp, xdp_prog);
4910 if (err)
4911 goto out_redir;
4912 break;
4913 case XDP_TX:
4914 generic_xdp_tx(skb, xdp_prog);
4915 break;
4916 }
4917 return XDP_DROP;
4918 }
4919 }
4920 return XDP_PASS;
4921 out_redir:
4922 kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
4923 return XDP_DROP;
4924 }
4925 EXPORT_SYMBOL_GPL(do_xdp_generic);
4926
4927 static int netif_rx_internal(struct sk_buff *skb)
4928 {
4929 int ret;
4930
4931 net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
4932
4933 trace_netif_rx(skb);
4934
4935 #ifdef CONFIG_RPS
4936 if (static_branch_unlikely(&rps_needed)) {
4937 struct rps_dev_flow voidflow, *rflow = &voidflow;
4938 int cpu;
4939
4940 rcu_read_lock();
4941
4942 cpu = get_rps_cpu(skb->dev, skb, &rflow);
4943 if (cpu < 0)
4944 cpu = smp_processor_id();
4945
4946 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4947
4948 rcu_read_unlock();
4949 } else
4950 #endif
4951 {
4952 unsigned int qtail;
4953
4954 ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
4955 }
4956 return ret;
4957 }
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967 int __netif_rx(struct sk_buff *skb)
4968 {
4969 int ret;
4970
4971 lockdep_assert_once(hardirq_count() | softirq_count());
4972
4973 trace_netif_rx_entry(skb);
4974 ret = netif_rx_internal(skb);
4975 trace_netif_rx_exit(ret);
4976 return ret;
4977 }
4978 EXPORT_SYMBOL(__netif_rx);
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999 int netif_rx(struct sk_buff *skb)
5000 {
5001 bool need_bh_off = !(hardirq_count() | softirq_count());
5002 int ret;
5003
5004 if (need_bh_off)
5005 local_bh_disable();
5006 trace_netif_rx_entry(skb);
5007 ret = netif_rx_internal(skb);
5008 trace_netif_rx_exit(ret);
5009 if (need_bh_off)
5010 local_bh_enable();
5011 return ret;
5012 }
5013 EXPORT_SYMBOL(netif_rx);
5014
5015 static __latent_entropy void net_tx_action(struct softirq_action *h)
5016 {
5017 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
5018
5019 if (sd->completion_queue) {
5020 struct sk_buff *clist;
5021
5022 local_irq_disable();
5023 clist = sd->completion_queue;
5024 sd->completion_queue = NULL;
5025 local_irq_enable();
5026
5027 while (clist) {
5028 struct sk_buff *skb = clist;
5029
5030 clist = clist->next;
5031
5032 WARN_ON(refcount_read(&skb->users));
5033 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
5034 trace_consume_skb(skb);
5035 else
5036 trace_kfree_skb(skb, net_tx_action,
5037 SKB_DROP_REASON_NOT_SPECIFIED);
5038
5039 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
5040 __kfree_skb(skb);
5041 else
5042 __kfree_skb_defer(skb);
5043 }
5044 }
5045
5046 if (sd->output_queue) {
5047 struct Qdisc *head;
5048
5049 local_irq_disable();
5050 head = sd->output_queue;
5051 sd->output_queue = NULL;
5052 sd->output_queue_tailp = &sd->output_queue;
5053 local_irq_enable();
5054
5055 rcu_read_lock();
5056
5057 while (head) {
5058 struct Qdisc *q = head;
5059 spinlock_t *root_lock = NULL;
5060
5061 head = head->next_sched;
5062
5063
5064
5065
5066 smp_mb__before_atomic();
5067
5068 if (!(q->flags & TCQ_F_NOLOCK)) {
5069 root_lock = qdisc_lock(q);
5070 spin_lock(root_lock);
5071 } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
5072 &q->state))) {
5073
5074
5075
5076
5077
5078
5079
5080
5081 clear_bit(__QDISC_STATE_SCHED, &q->state);
5082 continue;
5083 }
5084
5085 clear_bit(__QDISC_STATE_SCHED, &q->state);
5086 qdisc_run(q);
5087 if (root_lock)
5088 spin_unlock(root_lock);
5089 }
5090
5091 rcu_read_unlock();
5092 }
5093
5094 xfrm_dev_backlog(sd);
5095 }
5096
5097 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
5098
5099 int (*br_fdb_test_addr_hook)(struct net_device *dev,
5100 unsigned char *addr) __read_mostly;
5101 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
5102 #endif
5103
5104 static inline struct sk_buff *
5105 sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
5106 struct net_device *orig_dev, bool *another)
5107 {
5108 #ifdef CONFIG_NET_CLS_ACT
5109 struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
5110 struct tcf_result cl_res;
5111
5112
5113
5114
5115
5116
5117 if (!miniq)
5118 return skb;
5119
5120 if (*pt_prev) {
5121 *ret = deliver_skb(skb, *pt_prev, orig_dev);
5122 *pt_prev = NULL;
5123 }
5124
5125 qdisc_skb_cb(skb)->pkt_len = skb->len;
5126 tc_skb_cb(skb)->mru = 0;
5127 tc_skb_cb(skb)->post_ct = false;
5128 skb->tc_at_ingress = 1;
5129 mini_qdisc_bstats_cpu_update(miniq, skb);
5130
5131 switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
5132 case TC_ACT_OK:
5133 case TC_ACT_RECLASSIFY:
5134 skb->tc_index = TC_H_MIN(cl_res.classid);
5135 break;
5136 case TC_ACT_SHOT:
5137 mini_qdisc_qstats_cpu_drop(miniq);
5138 kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
5139 return NULL;
5140 case TC_ACT_STOLEN:
5141 case TC_ACT_QUEUED:
5142 case TC_ACT_TRAP:
5143 consume_skb(skb);
5144 return NULL;
5145 case TC_ACT_REDIRECT:
5146
5147
5148
5149
5150 __skb_push(skb, skb->mac_len);
5151 if (skb_do_redirect(skb) == -EAGAIN) {
5152 __skb_pull(skb, skb->mac_len);
5153 *another = true;
5154 break;
5155 }
5156 return NULL;
5157 case TC_ACT_CONSUMED:
5158 return NULL;
5159 default:
5160 break;
5161 }
5162 #endif
5163 return skb;
5164 }
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175 bool netdev_is_rx_handler_busy(struct net_device *dev)
5176 {
5177 ASSERT_RTNL();
5178 return dev && rtnl_dereference(dev->rx_handler);
5179 }
5180 EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196 int netdev_rx_handler_register(struct net_device *dev,
5197 rx_handler_func_t *rx_handler,
5198 void *rx_handler_data)
5199 {
5200 if (netdev_is_rx_handler_busy(dev))
5201 return -EBUSY;
5202
5203 if (dev->priv_flags & IFF_NO_RX_HANDLER)
5204 return -EINVAL;
5205
5206
5207 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
5208 rcu_assign_pointer(dev->rx_handler, rx_handler);
5209
5210 return 0;
5211 }
5212 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222 void netdev_rx_handler_unregister(struct net_device *dev)
5223 {
5224
5225 ASSERT_RTNL();
5226 RCU_INIT_POINTER(dev->rx_handler, NULL);
5227
5228
5229
5230
5231 synchronize_net();
5232 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
5233 }
5234 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
5235
5236
5237
5238
5239
5240 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
5241 {
5242 switch (skb->protocol) {
5243 case htons(ETH_P_ARP):
5244 case htons(ETH_P_IP):
5245 case htons(ETH_P_IPV6):
5246 case htons(ETH_P_8021Q):
5247 case htons(ETH_P_8021AD):
5248 return true;
5249 default:
5250 return false;
5251 }
5252 }
5253
5254 static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
5255 int *ret, struct net_device *orig_dev)
5256 {
5257 if (nf_hook_ingress_active(skb)) {
5258 int ingress_retval;
5259
5260 if (*pt_prev) {
5261 *ret = deliver_skb(skb, *pt_prev, orig_dev);
5262 *pt_prev = NULL;
5263 }
5264
5265 rcu_read_lock();
5266 ingress_retval = nf_hook_ingress(skb);
5267 rcu_read_unlock();
5268 return ingress_retval;
5269 }
5270 return 0;
5271 }
5272
5273 static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
5274 struct packet_type **ppt_prev)
5275 {
5276 struct packet_type *ptype, *pt_prev;
5277 rx_handler_func_t *rx_handler;
5278 struct sk_buff *skb = *pskb;
5279 struct net_device *orig_dev;
5280 bool deliver_exact = false;
5281 int ret = NET_RX_DROP;
5282 __be16 type;
5283
5284 net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
5285
5286 trace_netif_receive_skb(skb);
5287
5288 orig_dev = skb->dev;
5289
5290 skb_reset_network_header(skb);
5291 if (!skb_transport_header_was_set(skb))
5292 skb_reset_transport_header(skb);
5293 skb_reset_mac_len(skb);
5294
5295 pt_prev = NULL;
5296
5297 another_round:
5298 skb->skb_iif = skb->dev->ifindex;
5299
5300 __this_cpu_inc(softnet_data.processed);
5301
5302 if (static_branch_unlikely(&generic_xdp_needed_key)) {
5303 int ret2;
5304
5305 migrate_disable();
5306 ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
5307 migrate_enable();
5308
5309 if (ret2 != XDP_PASS) {
5310 ret = NET_RX_DROP;
5311 goto out;
5312 }
5313 }
5314
5315 if (eth_type_vlan(skb->protocol)) {
5316 skb = skb_vlan_untag(skb);
5317 if (unlikely(!skb))
5318 goto out;
5319 }
5320
5321 if (skb_skip_tc_classify(skb))
5322 goto skip_classify;
5323
5324 if (pfmemalloc)
5325 goto skip_taps;
5326
5327 list_for_each_entry_rcu(ptype, &ptype_all, list) {
5328 if (pt_prev)
5329 ret = deliver_skb(skb, pt_prev, orig_dev);
5330 pt_prev = ptype;
5331 }
5332
5333 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
5334 if (pt_prev)
5335 ret = deliver_skb(skb, pt_prev, orig_dev);
5336 pt_prev = ptype;
5337 }
5338
5339 skip_taps:
5340 #ifdef CONFIG_NET_INGRESS
5341 if (static_branch_unlikely(&ingress_needed_key)) {
5342 bool another = false;
5343
5344 nf_skip_egress(skb, true);
5345 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
5346 &another);
5347 if (another)
5348 goto another_round;
5349 if (!skb)
5350 goto out;
5351
5352 nf_skip_egress(skb, false);
5353 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
5354 goto out;
5355 }
5356 #endif
5357 skb_reset_redirect(skb);
5358 skip_classify:
5359 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
5360 goto drop;
5361
5362 if (skb_vlan_tag_present(skb)) {
5363 if (pt_prev) {
5364 ret = deliver_skb(skb, pt_prev, orig_dev);
5365 pt_prev = NULL;
5366 }
5367 if (vlan_do_receive(&skb))
5368 goto another_round;
5369 else if (unlikely(!skb))
5370 goto out;
5371 }
5372
5373 rx_handler = rcu_dereference(skb->dev->rx_handler);
5374 if (rx_handler) {
5375 if (pt_prev) {
5376 ret = deliver_skb(skb, pt_prev, orig_dev);
5377 pt_prev = NULL;
5378 }
5379 switch (rx_handler(&skb)) {
5380 case RX_HANDLER_CONSUMED:
5381 ret = NET_RX_SUCCESS;
5382 goto out;
5383 case RX_HANDLER_ANOTHER:
5384 goto another_round;
5385 case RX_HANDLER_EXACT:
5386 deliver_exact = true;
5387 break;
5388 case RX_HANDLER_PASS:
5389 break;
5390 default:
5391 BUG();
5392 }
5393 }
5394
5395 if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) {
5396 check_vlan_id:
5397 if (skb_vlan_tag_get_id(skb)) {
5398
5399
5400
5401 skb->pkt_type = PACKET_OTHERHOST;
5402 } else if (eth_type_vlan(skb->protocol)) {
5403
5404
5405
5406
5407 __vlan_hwaccel_clear_tag(skb);
5408 skb = skb_vlan_untag(skb);
5409 if (unlikely(!skb))
5410 goto out;
5411 if (vlan_do_receive(&skb))
5412
5413
5414
5415 goto another_round;
5416 else if (unlikely(!skb))
5417 goto out;
5418 else
5419
5420
5421
5422
5423 goto check_vlan_id;
5424 }
5425
5426
5427
5428
5429 __vlan_hwaccel_clear_tag(skb);
5430 }
5431
5432 type = skb->protocol;
5433
5434
5435 if (likely(!deliver_exact)) {
5436 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5437 &ptype_base[ntohs(type) &
5438 PTYPE_HASH_MASK]);
5439 }
5440
5441 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5442 &orig_dev->ptype_specific);
5443
5444 if (unlikely(skb->dev != orig_dev)) {
5445 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5446 &skb->dev->ptype_specific);
5447 }
5448
5449 if (pt_prev) {
5450 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
5451 goto drop;
5452 *ppt_prev = pt_prev;
5453 } else {
5454 drop:
5455 if (!deliver_exact)
5456 dev_core_stats_rx_dropped_inc(skb->dev);
5457 else
5458 dev_core_stats_rx_nohandler_inc(skb->dev);
5459 kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
5460
5461
5462
5463 ret = NET_RX_DROP;
5464 }
5465
5466 out:
5467
5468
5469
5470
5471
5472
5473 *pskb = skb;
5474 return ret;
5475 }
5476
5477 static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
5478 {
5479 struct net_device *orig_dev = skb->dev;
5480 struct packet_type *pt_prev = NULL;
5481 int ret;
5482
5483 ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
5484 if (pt_prev)
5485 ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
5486 skb->dev, pt_prev, orig_dev);
5487 return ret;
5488 }
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505 int netif_receive_skb_core(struct sk_buff *skb)
5506 {
5507 int ret;
5508
5509 rcu_read_lock();
5510 ret = __netif_receive_skb_one_core(skb, false);
5511 rcu_read_unlock();
5512
5513 return ret;
5514 }
5515 EXPORT_SYMBOL(netif_receive_skb_core);
5516
5517 static inline void __netif_receive_skb_list_ptype(struct list_head *head,
5518 struct packet_type *pt_prev,
5519 struct net_device *orig_dev)
5520 {
5521 struct sk_buff *skb, *next;
5522
5523 if (!pt_prev)
5524 return;
5525 if (list_empty(head))
5526 return;
5527 if (pt_prev->list_func != NULL)
5528 INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
5529 ip_list_rcv, head, pt_prev, orig_dev);
5530 else
5531 list_for_each_entry_safe(skb, next, head, list) {
5532 skb_list_del_init(skb);
5533 pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
5534 }
5535 }
5536
5537 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
5538 {
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549 struct packet_type *pt_curr = NULL;
5550
5551 struct net_device *od_curr = NULL;
5552 struct list_head sublist;
5553 struct sk_buff *skb, *next;
5554
5555 INIT_LIST_HEAD(&sublist);
5556 list_for_each_entry_safe(skb, next, head, list) {
5557 struct net_device *orig_dev = skb->dev;
5558 struct packet_type *pt_prev = NULL;
5559
5560 skb_list_del_init(skb);
5561 __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
5562 if (!pt_prev)
5563 continue;
5564 if (pt_curr != pt_prev || od_curr != orig_dev) {
5565
5566 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5567
5568 INIT_LIST_HEAD(&sublist);
5569 pt_curr = pt_prev;
5570 od_curr = orig_dev;
5571 }
5572 list_add_tail(&skb->list, &sublist);
5573 }
5574
5575
5576 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5577 }
5578
5579 static int __netif_receive_skb(struct sk_buff *skb)
5580 {
5581 int ret;
5582
5583 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
5584 unsigned int noreclaim_flag;
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595 noreclaim_flag = memalloc_noreclaim_save();
5596 ret = __netif_receive_skb_one_core(skb, true);
5597 memalloc_noreclaim_restore(noreclaim_flag);
5598 } else
5599 ret = __netif_receive_skb_one_core(skb, false);
5600
5601 return ret;
5602 }
5603
5604 static void __netif_receive_skb_list(struct list_head *head)
5605 {
5606 unsigned long noreclaim_flag = 0;
5607 struct sk_buff *skb, *next;
5608 bool pfmemalloc = false;
5609
5610 list_for_each_entry_safe(skb, next, head, list) {
5611 if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
5612 struct list_head sublist;
5613
5614
5615 list_cut_before(&sublist, head, &skb->list);
5616 if (!list_empty(&sublist))
5617 __netif_receive_skb_list_core(&sublist, pfmemalloc);
5618 pfmemalloc = !pfmemalloc;
5619
5620 if (pfmemalloc)
5621 noreclaim_flag = memalloc_noreclaim_save();
5622 else
5623 memalloc_noreclaim_restore(noreclaim_flag);
5624 }
5625 }
5626
5627 if (!list_empty(head))
5628 __netif_receive_skb_list_core(head, pfmemalloc);
5629
5630 if (pfmemalloc)
5631 memalloc_noreclaim_restore(noreclaim_flag);
5632 }
5633
5634 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
5635 {
5636 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
5637 struct bpf_prog *new = xdp->prog;
5638 int ret = 0;
5639
5640 switch (xdp->command) {
5641 case XDP_SETUP_PROG:
5642 rcu_assign_pointer(dev->xdp_prog, new);
5643 if (old)
5644 bpf_prog_put(old);
5645
5646 if (old && !new) {
5647 static_branch_dec(&generic_xdp_needed_key);
5648 } else if (new && !old) {
5649 static_branch_inc(&generic_xdp_needed_key);
5650 dev_disable_lro(dev);
5651 dev_disable_gro_hw(dev);
5652 }
5653 break;
5654
5655 default:
5656 ret = -EINVAL;
5657 break;
5658 }
5659
5660 return ret;
5661 }
5662
5663 static int netif_receive_skb_internal(struct sk_buff *skb)
5664 {
5665 int ret;
5666
5667 net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
5668
5669 if (skb_defer_rx_timestamp(skb))
5670 return NET_RX_SUCCESS;
5671
5672 rcu_read_lock();
5673 #ifdef CONFIG_RPS
5674 if (static_branch_unlikely(&rps_needed)) {
5675 struct rps_dev_flow voidflow, *rflow = &voidflow;
5676 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5677
5678 if (cpu >= 0) {
5679 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5680 rcu_read_unlock();
5681 return ret;
5682 }
5683 }
5684 #endif
5685 ret = __netif_receive_skb(skb);
5686 rcu_read_unlock();
5687 return ret;
5688 }
5689
5690 void netif_receive_skb_list_internal(struct list_head *head)
5691 {
5692 struct sk_buff *skb, *next;
5693 struct list_head sublist;
5694
5695 INIT_LIST_HEAD(&sublist);
5696 list_for_each_entry_safe(skb, next, head, list) {
5697 net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
5698 skb_list_del_init(skb);
5699 if (!skb_defer_rx_timestamp(skb))
5700 list_add_tail(&skb->list, &sublist);
5701 }
5702 list_splice_init(&sublist, head);
5703
5704 rcu_read_lock();
5705 #ifdef CONFIG_RPS
5706 if (static_branch_unlikely(&rps_needed)) {
5707 list_for_each_entry_safe(skb, next, head, list) {
5708 struct rps_dev_flow voidflow, *rflow = &voidflow;
5709 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5710
5711 if (cpu >= 0) {
5712
5713 skb_list_del_init(skb);
5714 enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5715 }
5716 }
5717 }
5718 #endif
5719 __netif_receive_skb_list(head);
5720 rcu_read_unlock();
5721 }
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738 int netif_receive_skb(struct sk_buff *skb)
5739 {
5740 int ret;
5741
5742 trace_netif_receive_skb_entry(skb);
5743
5744 ret = netif_receive_skb_internal(skb);
5745 trace_netif_receive_skb_exit(ret);
5746
5747 return ret;
5748 }
5749 EXPORT_SYMBOL(netif_receive_skb);
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761 void netif_receive_skb_list(struct list_head *head)
5762 {
5763 struct sk_buff *skb;
5764
5765 if (list_empty(head))
5766 return;
5767 if (trace_netif_receive_skb_list_entry_enabled()) {
5768 list_for_each_entry(skb, head, list)
5769 trace_netif_receive_skb_list_entry(skb);
5770 }
5771 netif_receive_skb_list_internal(head);
5772 trace_netif_receive_skb_list_exit(0);
5773 }
5774 EXPORT_SYMBOL(netif_receive_skb_list);
5775
5776 static DEFINE_PER_CPU(struct work_struct, flush_works);
5777
5778
5779 static void flush_backlog(struct work_struct *work)
5780 {
5781 struct sk_buff *skb, *tmp;
5782 struct softnet_data *sd;
5783
5784 local_bh_disable();
5785 sd = this_cpu_ptr(&softnet_data);
5786
5787 rps_lock_irq_disable(sd);
5788 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5789 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5790 __skb_unlink(skb, &sd->input_pkt_queue);
5791 dev_kfree_skb_irq(skb);
5792 input_queue_head_incr(sd);
5793 }
5794 }
5795 rps_unlock_irq_enable(sd);
5796
5797 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5798 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5799 __skb_unlink(skb, &sd->process_queue);
5800 kfree_skb(skb);
5801 input_queue_head_incr(sd);
5802 }
5803 }
5804 local_bh_enable();
5805 }
5806
5807 static bool flush_required(int cpu)
5808 {
5809 #if IS_ENABLED(CONFIG_RPS)
5810 struct softnet_data *sd = &per_cpu(softnet_data, cpu);
5811 bool do_flush;
5812
5813 rps_lock_irq_disable(sd);
5814
5815
5816
5817
5818 do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
5819 !skb_queue_empty_lockless(&sd->process_queue);
5820 rps_unlock_irq_enable(sd);
5821
5822 return do_flush;
5823 #endif
5824
5825
5826
5827
5828
5829 return true;
5830 }
5831
5832 static void flush_all_backlogs(void)
5833 {
5834 static cpumask_t flush_cpus;
5835 unsigned int cpu;
5836
5837
5838
5839
5840
5841 ASSERT_RTNL();
5842
5843 cpus_read_lock();
5844
5845 cpumask_clear(&flush_cpus);
5846 for_each_online_cpu(cpu) {
5847 if (flush_required(cpu)) {
5848 queue_work_on(cpu, system_highpri_wq,
5849 per_cpu_ptr(&flush_works, cpu));
5850 cpumask_set_cpu(cpu, &flush_cpus);
5851 }
5852 }
5853
5854
5855
5856
5857
5858 for_each_cpu(cpu, &flush_cpus)
5859 flush_work(per_cpu_ptr(&flush_works, cpu));
5860
5861 cpus_read_unlock();
5862 }
5863
5864 static void net_rps_send_ipi(struct softnet_data *remsd)
5865 {
5866 #ifdef CONFIG_RPS
5867 while (remsd) {
5868 struct softnet_data *next = remsd->rps_ipi_next;
5869
5870 if (cpu_online(remsd->cpu))
5871 smp_call_function_single_async(remsd->cpu, &remsd->csd);
5872 remsd = next;
5873 }
5874 #endif
5875 }
5876
5877
5878
5879
5880
5881 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
5882 {
5883 #ifdef CONFIG_RPS
5884 struct softnet_data *remsd = sd->rps_ipi_list;
5885
5886 if (remsd) {
5887 sd->rps_ipi_list = NULL;
5888
5889 local_irq_enable();
5890
5891
5892 net_rps_send_ipi(remsd);
5893 } else
5894 #endif
5895 local_irq_enable();
5896 }
5897
5898 static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
5899 {
5900 #ifdef CONFIG_RPS
5901 return sd->rps_ipi_list != NULL;
5902 #else
5903 return false;
5904 #endif
5905 }
5906
5907 static int process_backlog(struct napi_struct *napi, int quota)
5908 {
5909 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
5910 bool again = true;
5911 int work = 0;
5912
5913
5914
5915
5916 if (sd_has_rps_ipi_waiting(sd)) {
5917 local_irq_disable();
5918 net_rps_action_and_irq_enable(sd);
5919 }
5920
5921 napi->weight = READ_ONCE(dev_rx_weight);
5922 while (again) {
5923 struct sk_buff *skb;
5924
5925 while ((skb = __skb_dequeue(&sd->process_queue))) {
5926 rcu_read_lock();
5927 __netif_receive_skb(skb);
5928 rcu_read_unlock();
5929 input_queue_head_incr(sd);
5930 if (++work >= quota)
5931 return work;
5932
5933 }
5934
5935 rps_lock_irq_disable(sd);
5936 if (skb_queue_empty(&sd->input_pkt_queue)) {
5937
5938
5939
5940
5941
5942
5943
5944
5945 napi->state = 0;
5946 again = false;
5947 } else {
5948 skb_queue_splice_tail_init(&sd->input_pkt_queue,
5949 &sd->process_queue);
5950 }
5951 rps_unlock_irq_enable(sd);
5952 }
5953
5954 return work;
5955 }
5956
5957
5958
5959
5960
5961
5962
5963
5964 void __napi_schedule(struct napi_struct *n)
5965 {
5966 unsigned long flags;
5967
5968 local_irq_save(flags);
5969 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
5970 local_irq_restore(flags);
5971 }
5972 EXPORT_SYMBOL(__napi_schedule);
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983 bool napi_schedule_prep(struct napi_struct *n)
5984 {
5985 unsigned long val, new;
5986
5987 do {
5988 val = READ_ONCE(n->state);
5989 if (unlikely(val & NAPIF_STATE_DISABLE))
5990 return false;
5991 new = val | NAPIF_STATE_SCHED;
5992
5993
5994
5995
5996
5997
5998
5999 new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
6000 NAPIF_STATE_MISSED;
6001 } while (cmpxchg(&n->state, val, new) != val);
6002
6003 return !(val & NAPIF_STATE_SCHED);
6004 }
6005 EXPORT_SYMBOL(napi_schedule_prep);
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017 void __napi_schedule_irqoff(struct napi_struct *n)
6018 {
6019 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
6020 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
6021 else
6022 __napi_schedule(n);
6023 }
6024 EXPORT_SYMBOL(__napi_schedule_irqoff);
6025
6026 bool napi_complete_done(struct napi_struct *n, int work_done)
6027 {
6028 unsigned long flags, val, new, timeout = 0;
6029 bool ret = true;
6030
6031
6032
6033
6034
6035
6036
6037 if (unlikely(n->state & (NAPIF_STATE_NPSVC |
6038 NAPIF_STATE_IN_BUSY_POLL)))
6039 return false;
6040
6041 if (work_done) {
6042 if (n->gro_bitmask)
6043 timeout = READ_ONCE(n->dev->gro_flush_timeout);
6044 n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
6045 }
6046 if (n->defer_hard_irqs_count > 0) {
6047 n->defer_hard_irqs_count--;
6048 timeout = READ_ONCE(n->dev->gro_flush_timeout);
6049 if (timeout)
6050 ret = false;
6051 }
6052 if (n->gro_bitmask) {
6053
6054
6055
6056
6057 napi_gro_flush(n, !!timeout);
6058 }
6059
6060 gro_normal_list(n);
6061
6062 if (unlikely(!list_empty(&n->poll_list))) {
6063
6064 local_irq_save(flags);
6065 list_del_init(&n->poll_list);
6066 local_irq_restore(flags);
6067 }
6068
6069 do {
6070 val = READ_ONCE(n->state);
6071
6072 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
6073
6074 new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
6075 NAPIF_STATE_SCHED_THREADED |
6076 NAPIF_STATE_PREFER_BUSY_POLL);
6077
6078
6079
6080
6081
6082 new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
6083 NAPIF_STATE_SCHED;
6084 } while (cmpxchg(&n->state, val, new) != val);
6085
6086 if (unlikely(val & NAPIF_STATE_MISSED)) {
6087 __napi_schedule(n);
6088 return false;
6089 }
6090
6091 if (timeout)
6092 hrtimer_start(&n->timer, ns_to_ktime(timeout),
6093 HRTIMER_MODE_REL_PINNED);
6094 return ret;
6095 }
6096 EXPORT_SYMBOL(napi_complete_done);
6097
6098
6099 static struct napi_struct *napi_by_id(unsigned int napi_id)
6100 {
6101 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
6102 struct napi_struct *napi;
6103
6104 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
6105 if (napi->napi_id == napi_id)
6106 return napi;
6107
6108 return NULL;
6109 }
6110
6111 #if defined(CONFIG_NET_RX_BUSY_POLL)
6112
6113 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
6114 {
6115 if (!skip_schedule) {
6116 gro_normal_list(napi);
6117 __napi_schedule(napi);
6118 return;
6119 }
6120
6121 if (napi->gro_bitmask) {
6122
6123
6124
6125 napi_gro_flush(napi, HZ >= 1000);
6126 }
6127
6128 gro_normal_list(napi);
6129 clear_bit(NAPI_STATE_SCHED, &napi->state);
6130 }
6131
6132 static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
6133 u16 budget)
6134 {
6135 bool skip_schedule = false;
6136 unsigned long timeout;
6137 int rc;
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148 clear_bit(NAPI_STATE_MISSED, &napi->state);
6149 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
6150
6151 local_bh_disable();
6152
6153 if (prefer_busy_poll) {
6154 napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
6155 timeout = READ_ONCE(napi->dev->gro_flush_timeout);
6156 if (napi->defer_hard_irqs_count && timeout) {
6157 hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
6158 skip_schedule = true;
6159 }
6160 }
6161
6162
6163
6164
6165 rc = napi->poll(napi, budget);
6166
6167
6168
6169
6170 trace_napi_poll(napi, rc, budget);
6171 netpoll_poll_unlock(have_poll_lock);
6172 if (rc == budget)
6173 __busy_poll_stop(napi, skip_schedule);
6174 local_bh_enable();
6175 }
6176
6177 void napi_busy_loop(unsigned int napi_id,
6178 bool (*loop_end)(void *, unsigned long),
6179 void *loop_end_arg, bool prefer_busy_poll, u16 budget)
6180 {
6181 unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
6182 int (*napi_poll)(struct napi_struct *napi, int budget);
6183 void *have_poll_lock = NULL;
6184 struct napi_struct *napi;
6185
6186 restart:
6187 napi_poll = NULL;
6188
6189 rcu_read_lock();
6190
6191 napi = napi_by_id(napi_id);
6192 if (!napi)
6193 goto out;
6194
6195 preempt_disable();
6196 for (;;) {
6197 int work = 0;
6198
6199 local_bh_disable();
6200 if (!napi_poll) {
6201 unsigned long val = READ_ONCE(napi->state);
6202
6203
6204
6205
6206 if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
6207 NAPIF_STATE_IN_BUSY_POLL)) {
6208 if (prefer_busy_poll)
6209 set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
6210 goto count;
6211 }
6212 if (cmpxchg(&napi->state, val,
6213 val | NAPIF_STATE_IN_BUSY_POLL |
6214 NAPIF_STATE_SCHED) != val) {
6215 if (prefer_busy_poll)
6216 set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
6217 goto count;
6218 }
6219 have_poll_lock = netpoll_poll_lock(napi);
6220 napi_poll = napi->poll;
6221 }
6222 work = napi_poll(napi, budget);
6223 trace_napi_poll(napi, work, budget);
6224 gro_normal_list(napi);
6225 count:
6226 if (work > 0)
6227 __NET_ADD_STATS(dev_net(napi->dev),
6228 LINUX_MIB_BUSYPOLLRXPACKETS, work);
6229 local_bh_enable();
6230
6231 if (!loop_end || loop_end(loop_end_arg, start_time))
6232 break;
6233
6234 if (unlikely(need_resched())) {
6235 if (napi_poll)
6236 busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
6237 preempt_enable();
6238 rcu_read_unlock();
6239 cond_resched();
6240 if (loop_end(loop_end_arg, start_time))
6241 return;
6242 goto restart;
6243 }
6244 cpu_relax();
6245 }
6246 if (napi_poll)
6247 busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
6248 preempt_enable();
6249 out:
6250 rcu_read_unlock();
6251 }
6252 EXPORT_SYMBOL(napi_busy_loop);
6253
6254 #endif
6255
6256 static void napi_hash_add(struct napi_struct *napi)
6257 {
6258 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
6259 return;
6260
6261 spin_lock(&napi_hash_lock);
6262
6263
6264 do {
6265 if (unlikely(++napi_gen_id < MIN_NAPI_ID))
6266 napi_gen_id = MIN_NAPI_ID;
6267 } while (napi_by_id(napi_gen_id));
6268 napi->napi_id = napi_gen_id;
6269
6270 hlist_add_head_rcu(&napi->napi_hash_node,
6271 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6272
6273 spin_unlock(&napi_hash_lock);
6274 }
6275
6276
6277
6278
6279 static void napi_hash_del(struct napi_struct *napi)
6280 {
6281 spin_lock(&napi_hash_lock);
6282
6283 hlist_del_init_rcu(&napi->napi_hash_node);
6284
6285 spin_unlock(&napi_hash_lock);
6286 }
6287
6288 static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
6289 {
6290 struct napi_struct *napi;
6291
6292 napi = container_of(timer, struct napi_struct, timer);
6293
6294
6295
6296
6297 if (!napi_disable_pending(napi) &&
6298 !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
6299 clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
6300 __napi_schedule_irqoff(napi);
6301 }
6302
6303 return HRTIMER_NORESTART;
6304 }
6305
6306 static void init_gro_hash(struct napi_struct *napi)
6307 {
6308 int i;
6309
6310 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6311 INIT_LIST_HEAD(&napi->gro_hash[i].list);
6312 napi->gro_hash[i].count = 0;
6313 }
6314 napi->gro_bitmask = 0;
6315 }
6316
6317 int dev_set_threaded(struct net_device *dev, bool threaded)
6318 {
6319 struct napi_struct *napi;
6320 int err = 0;
6321
6322 if (dev->threaded == threaded)
6323 return 0;
6324
6325 if (threaded) {
6326 list_for_each_entry(napi, &dev->napi_list, dev_list) {
6327 if (!napi->thread) {
6328 err = napi_kthread_create(napi);
6329 if (err) {
6330 threaded = false;
6331 break;
6332 }
6333 }
6334 }
6335 }
6336
6337 dev->threaded = threaded;
6338
6339
6340
6341
6342 smp_mb__before_atomic();
6343
6344
6345
6346
6347
6348
6349
6350 list_for_each_entry(napi, &dev->napi_list, dev_list) {
6351 if (threaded)
6352 set_bit(NAPI_STATE_THREADED, &napi->state);
6353 else
6354 clear_bit(NAPI_STATE_THREADED, &napi->state);
6355 }
6356
6357 return err;
6358 }
6359 EXPORT_SYMBOL(dev_set_threaded);
6360
6361
6362
6363
6364
6365
6366
6367 static void napi_get_frags_check(struct napi_struct *napi)
6368 {
6369 struct sk_buff *skb;
6370
6371 local_bh_disable();
6372 skb = napi_get_frags(napi);
6373 WARN_ON_ONCE(skb && skb->head_frag);
6374 napi_free_frags(napi);
6375 local_bh_enable();
6376 }
6377
6378 void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
6379 int (*poll)(struct napi_struct *, int), int weight)
6380 {
6381 if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
6382 return;
6383
6384 INIT_LIST_HEAD(&napi->poll_list);
6385 INIT_HLIST_NODE(&napi->napi_hash_node);
6386 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
6387 napi->timer.function = napi_watchdog;
6388 init_gro_hash(napi);
6389 napi->skb = NULL;
6390 INIT_LIST_HEAD(&napi->rx_list);
6391 napi->rx_count = 0;
6392 napi->poll = poll;
6393 if (weight > NAPI_POLL_WEIGHT)
6394 netdev_err_once(dev, "%s() called with weight %d\n", __func__,
6395 weight);
6396 napi->weight = weight;
6397 napi->dev = dev;
6398 #ifdef CONFIG_NETPOLL
6399 napi->poll_owner = -1;
6400 #endif
6401 set_bit(NAPI_STATE_SCHED, &napi->state);
6402 set_bit(NAPI_STATE_NPSVC, &napi->state);
6403 list_add_rcu(&napi->dev_list, &dev->napi_list);
6404 napi_hash_add(napi);
6405 napi_get_frags_check(napi);
6406
6407
6408
6409
6410 if (dev->threaded && napi_kthread_create(napi))
6411 dev->threaded = 0;
6412 }
6413 EXPORT_SYMBOL(netif_napi_add_weight);
6414
6415 void napi_disable(struct napi_struct *n)
6416 {
6417 unsigned long val, new;
6418
6419 might_sleep();
6420 set_bit(NAPI_STATE_DISABLE, &n->state);
6421
6422 for ( ; ; ) {
6423 val = READ_ONCE(n->state);
6424 if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
6425 usleep_range(20, 200);
6426 continue;
6427 }
6428
6429 new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
6430 new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
6431
6432 if (cmpxchg(&n->state, val, new) == val)
6433 break;
6434 }
6435
6436 hrtimer_cancel(&n->timer);
6437
6438 clear_bit(NAPI_STATE_DISABLE, &n->state);
6439 }
6440 EXPORT_SYMBOL(napi_disable);
6441
6442
6443
6444
6445
6446
6447
6448
6449 void napi_enable(struct napi_struct *n)
6450 {
6451 unsigned long val, new;
6452
6453 do {
6454 val = READ_ONCE(n->state);
6455 BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
6456
6457 new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
6458 if (n->dev->threaded && n->thread)
6459 new |= NAPIF_STATE_THREADED;
6460 } while (cmpxchg(&n->state, val, new) != val);
6461 }
6462 EXPORT_SYMBOL(napi_enable);
6463
6464 static void flush_gro_hash(struct napi_struct *napi)
6465 {
6466 int i;
6467
6468 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6469 struct sk_buff *skb, *n;
6470
6471 list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
6472 kfree_skb(skb);
6473 napi->gro_hash[i].count = 0;
6474 }
6475 }
6476
6477
6478 void __netif_napi_del(struct napi_struct *napi)
6479 {
6480 if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
6481 return;
6482
6483 napi_hash_del(napi);
6484 list_del_rcu(&napi->dev_list);
6485 napi_free_frags(napi);
6486
6487 flush_gro_hash(napi);
6488 napi->gro_bitmask = 0;
6489
6490 if (napi->thread) {
6491 kthread_stop(napi->thread);
6492 napi->thread = NULL;
6493 }
6494 }
6495 EXPORT_SYMBOL(__netif_napi_del);
6496
6497 static int __napi_poll(struct napi_struct *n, bool *repoll)
6498 {
6499 int work, weight;
6500
6501 weight = n->weight;
6502
6503
6504
6505
6506
6507
6508
6509 work = 0;
6510 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
6511 work = n->poll(n, weight);
6512 trace_napi_poll(n, work, weight);
6513 }
6514
6515 if (unlikely(work > weight))
6516 netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
6517 n->poll, work, weight);
6518
6519 if (likely(work < weight))
6520 return work;
6521
6522
6523
6524
6525
6526
6527 if (unlikely(napi_disable_pending(n))) {
6528 napi_complete(n);
6529 return work;
6530 }
6531
6532
6533
6534
6535 if (napi_prefer_busy_poll(n)) {
6536 if (napi_complete_done(n, work)) {
6537
6538
6539
6540 napi_schedule(n);
6541 }
6542 return work;
6543 }
6544
6545 if (n->gro_bitmask) {
6546
6547
6548
6549 napi_gro_flush(n, HZ >= 1000);
6550 }
6551
6552 gro_normal_list(n);
6553
6554
6555
6556
6557 if (unlikely(!list_empty(&n->poll_list))) {
6558 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
6559 n->dev ? n->dev->name : "backlog");
6560 return work;
6561 }
6562
6563 *repoll = true;
6564
6565 return work;
6566 }
6567
6568 static int napi_poll(struct napi_struct *n, struct list_head *repoll)
6569 {
6570 bool do_repoll = false;
6571 void *have;
6572 int work;
6573
6574 list_del_init(&n->poll_list);
6575
6576 have = netpoll_poll_lock(n);
6577
6578 work = __napi_poll(n, &do_repoll);
6579
6580 if (do_repoll)
6581 list_add_tail(&n->poll_list, repoll);
6582
6583 netpoll_poll_unlock(have);
6584
6585 return work;
6586 }
6587
6588 static int napi_thread_wait(struct napi_struct *napi)
6589 {
6590 bool woken = false;
6591
6592 set_current_state(TASK_INTERRUPTIBLE);
6593
6594 while (!kthread_should_stop()) {
6595
6596
6597
6598
6599
6600 if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
6601 WARN_ON(!list_empty(&napi->poll_list));
6602 __set_current_state(TASK_RUNNING);
6603 return 0;
6604 }
6605
6606 schedule();
6607
6608 woken = true;
6609 set_current_state(TASK_INTERRUPTIBLE);
6610 }
6611 __set_current_state(TASK_RUNNING);
6612
6613 return -1;
6614 }
6615
6616 static int napi_threaded_poll(void *data)
6617 {
6618 struct napi_struct *napi = data;
6619 void *have;
6620
6621 while (!napi_thread_wait(napi)) {
6622 for (;;) {
6623 bool repoll = false;
6624
6625 local_bh_disable();
6626
6627 have = netpoll_poll_lock(napi);
6628 __napi_poll(napi, &repoll);
6629 netpoll_poll_unlock(have);
6630
6631 local_bh_enable();
6632
6633 if (!repoll)
6634 break;
6635
6636 cond_resched();
6637 }
6638 }
6639 return 0;
6640 }
6641
6642 static void skb_defer_free_flush(struct softnet_data *sd)
6643 {
6644 struct sk_buff *skb, *next;
6645 unsigned long flags;
6646
6647
6648 if (!READ_ONCE(sd->defer_list))
6649 return;
6650
6651 spin_lock_irqsave(&sd->defer_lock, flags);
6652 skb = sd->defer_list;
6653 sd->defer_list = NULL;
6654 sd->defer_count = 0;
6655 spin_unlock_irqrestore(&sd->defer_lock, flags);
6656
6657 while (skb != NULL) {
6658 next = skb->next;
6659 napi_consume_skb(skb, 1);
6660 skb = next;
6661 }
6662 }
6663
6664 static __latent_entropy void net_rx_action(struct softirq_action *h)
6665 {
6666 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
6667 unsigned long time_limit = jiffies +
6668 usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
6669 int budget = READ_ONCE(netdev_budget);
6670 LIST_HEAD(list);
6671 LIST_HEAD(repoll);
6672
6673 local_irq_disable();
6674 list_splice_init(&sd->poll_list, &list);
6675 local_irq_enable();
6676
6677 for (;;) {
6678 struct napi_struct *n;
6679
6680 skb_defer_free_flush(sd);
6681
6682 if (list_empty(&list)) {
6683 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
6684 goto end;
6685 break;
6686 }
6687
6688 n = list_first_entry(&list, struct napi_struct, poll_list);
6689 budget -= napi_poll(n, &repoll);
6690
6691
6692
6693
6694
6695 if (unlikely(budget <= 0 ||
6696 time_after_eq(jiffies, time_limit))) {
6697 sd->time_squeeze++;
6698 break;
6699 }
6700 }
6701
6702 local_irq_disable();
6703
6704 list_splice_tail_init(&sd->poll_list, &list);
6705 list_splice_tail(&repoll, &list);
6706 list_splice(&list, &sd->poll_list);
6707 if (!list_empty(&sd->poll_list))
6708 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
6709
6710 net_rps_action_and_irq_enable(sd);
6711 end:;
6712 }
6713
6714 struct netdev_adjacent {
6715 struct net_device *dev;
6716 netdevice_tracker dev_tracker;
6717
6718
6719 bool master;
6720
6721
6722 bool ignore;
6723
6724
6725 u16 ref_nr;
6726
6727
6728 void *private;
6729
6730 struct list_head list;
6731 struct rcu_head rcu;
6732 };
6733
6734 static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
6735 struct list_head *adj_list)
6736 {
6737 struct netdev_adjacent *adj;
6738
6739 list_for_each_entry(adj, adj_list, list) {
6740 if (adj->dev == adj_dev)
6741 return adj;
6742 }
6743 return NULL;
6744 }
6745
6746 static int ____netdev_has_upper_dev(struct net_device *upper_dev,
6747 struct netdev_nested_priv *priv)
6748 {
6749 struct net_device *dev = (struct net_device *)priv->data;
6750
6751 return upper_dev == dev;
6752 }
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763 bool netdev_has_upper_dev(struct net_device *dev,
6764 struct net_device *upper_dev)
6765 {
6766 struct netdev_nested_priv priv = {
6767 .data = (void *)upper_dev,
6768 };
6769
6770 ASSERT_RTNL();
6771
6772 return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6773 &priv);
6774 }
6775 EXPORT_SYMBOL(netdev_has_upper_dev);
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
6788 struct net_device *upper_dev)
6789 {
6790 struct netdev_nested_priv priv = {
6791 .data = (void *)upper_dev,
6792 };
6793
6794 return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6795 &priv);
6796 }
6797 EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
6798
6799
6800
6801
6802
6803
6804
6805
6806 bool netdev_has_any_upper_dev(struct net_device *dev)
6807 {
6808 ASSERT_RTNL();
6809
6810 return !list_empty(&dev->adj_list.upper);
6811 }
6812 EXPORT_SYMBOL(netdev_has_any_upper_dev);
6813
6814
6815
6816
6817
6818
6819
6820
6821 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
6822 {
6823 struct netdev_adjacent *upper;
6824
6825 ASSERT_RTNL();
6826
6827 if (list_empty(&dev->adj_list.upper))
6828 return NULL;
6829
6830 upper = list_first_entry(&dev->adj_list.upper,
6831 struct netdev_adjacent, list);
6832 if (likely(upper->master))
6833 return upper->dev;
6834 return NULL;
6835 }
6836 EXPORT_SYMBOL(netdev_master_upper_dev_get);
6837
6838 static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
6839 {
6840 struct netdev_adjacent *upper;
6841
6842 ASSERT_RTNL();
6843
6844 if (list_empty(&dev->adj_list.upper))
6845 return NULL;
6846
6847 upper = list_first_entry(&dev->adj_list.upper,
6848 struct netdev_adjacent, list);
6849 if (likely(upper->master) && !upper->ignore)
6850 return upper->dev;
6851 return NULL;
6852 }
6853
6854
6855
6856
6857
6858
6859
6860
6861 static bool netdev_has_any_lower_dev(struct net_device *dev)
6862 {
6863 ASSERT_RTNL();
6864
6865 return !list_empty(&dev->adj_list.lower);
6866 }
6867
6868 void *netdev_adjacent_get_private(struct list_head *adj_list)
6869 {
6870 struct netdev_adjacent *adj;
6871
6872 adj = list_entry(adj_list, struct netdev_adjacent, list);
6873
6874 return adj->private;
6875 }
6876 EXPORT_SYMBOL(netdev_adjacent_get_private);
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
6887 struct list_head **iter)
6888 {
6889 struct netdev_adjacent *upper;
6890
6891 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6892
6893 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6894
6895 if (&upper->list == &dev->adj_list.upper)
6896 return NULL;
6897
6898 *iter = &upper->list;
6899
6900 return upper->dev;
6901 }
6902 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
6903
6904 static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
6905 struct list_head **iter,
6906 bool *ignore)
6907 {
6908 struct netdev_adjacent *upper;
6909
6910 upper = list_entry((*iter)->next, struct netdev_adjacent, list);
6911
6912 if (&upper->list == &dev->adj_list.upper)
6913 return NULL;
6914
6915 *iter = &upper->list;
6916 *ignore = upper->ignore;
6917
6918 return upper->dev;
6919 }
6920
6921 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
6922 struct list_head **iter)
6923 {
6924 struct netdev_adjacent *upper;
6925
6926 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6927
6928 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6929
6930 if (&upper->list == &dev->adj_list.upper)
6931 return NULL;
6932
6933 *iter = &upper->list;
6934
6935 return upper->dev;
6936 }
6937
6938 static int __netdev_walk_all_upper_dev(struct net_device *dev,
6939 int (*fn)(struct net_device *dev,
6940 struct netdev_nested_priv *priv),
6941 struct netdev_nested_priv *priv)
6942 {
6943 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6944 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6945 int ret, cur = 0;
6946 bool ignore;
6947
6948 now = dev;
6949 iter = &dev->adj_list.upper;
6950
6951 while (1) {
6952 if (now != dev) {
6953 ret = fn(now, priv);
6954 if (ret)
6955 return ret;
6956 }
6957
6958 next = NULL;
6959 while (1) {
6960 udev = __netdev_next_upper_dev(now, &iter, &ignore);
6961 if (!udev)
6962 break;
6963 if (ignore)
6964 continue;
6965
6966 next = udev;
6967 niter = &udev->adj_list.upper;
6968 dev_stack[cur] = now;
6969 iter_stack[cur++] = iter;
6970 break;
6971 }
6972
6973 if (!next) {
6974 if (!cur)
6975 return 0;
6976 next = dev_stack[--cur];
6977 niter = iter_stack[cur];
6978 }
6979
6980 now = next;
6981 iter = niter;
6982 }
6983
6984 return 0;
6985 }
6986
6987 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
6988 int (*fn)(struct net_device *dev,
6989 struct netdev_nested_priv *priv),
6990 struct netdev_nested_priv *priv)
6991 {
6992 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6993 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6994 int ret, cur = 0;
6995
6996 now = dev;
6997 iter = &dev->adj_list.upper;
6998
6999 while (1) {
7000 if (now != dev) {
7001 ret = fn(now, priv);
7002 if (ret)
7003 return ret;
7004 }
7005
7006 next = NULL;
7007 while (1) {
7008 udev = netdev_next_upper_dev_rcu(now, &iter);
7009 if (!udev)
7010 break;
7011
7012 next = udev;
7013 niter = &udev->adj_list.upper;
7014 dev_stack[cur] = now;
7015 iter_stack[cur++] = iter;
7016 break;
7017 }
7018
7019 if (!next) {
7020 if (!cur)
7021 return 0;
7022 next = dev_stack[--cur];
7023 niter = iter_stack[cur];
7024 }
7025
7026 now = next;
7027 iter = niter;
7028 }
7029
7030 return 0;
7031 }
7032 EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
7033
7034 static bool __netdev_has_upper_dev(struct net_device *dev,
7035 struct net_device *upper_dev)
7036 {
7037 struct netdev_nested_priv priv = {
7038 .flags = 0,
7039 .data = (void *)upper_dev,
7040 };
7041
7042 ASSERT_RTNL();
7043
7044 return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
7045 &priv);
7046 }
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059 void *netdev_lower_get_next_private(struct net_device *dev,
7060 struct list_head **iter)
7061 {
7062 struct netdev_adjacent *lower;
7063
7064 lower = list_entry(*iter, struct netdev_adjacent, list);
7065
7066 if (&lower->list == &dev->adj_list.lower)
7067 return NULL;
7068
7069 *iter = lower->list.next;
7070
7071 return lower->private;
7072 }
7073 EXPORT_SYMBOL(netdev_lower_get_next_private);
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
7086 struct list_head **iter)
7087 {
7088 struct netdev_adjacent *lower;
7089
7090 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
7091
7092 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
7093
7094 if (&lower->list == &dev->adj_list.lower)
7095 return NULL;
7096
7097 *iter = &lower->list;
7098
7099 return lower->private;
7100 }
7101 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114 void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
7115 {
7116 struct netdev_adjacent *lower;
7117
7118 lower = list_entry(*iter, struct netdev_adjacent, list);
7119
7120 if (&lower->list == &dev->adj_list.lower)
7121 return NULL;
7122
7123 *iter = lower->list.next;
7124
7125 return lower->dev;
7126 }
7127 EXPORT_SYMBOL(netdev_lower_get_next);
7128
7129 static struct net_device *netdev_next_lower_dev(struct net_device *dev,
7130 struct list_head **iter)
7131 {
7132 struct netdev_adjacent *lower;
7133
7134 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
7135
7136 if (&lower->list == &dev->adj_list.lower)
7137 return NULL;
7138
7139 *iter = &lower->list;
7140
7141 return lower->dev;
7142 }
7143
7144 static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
7145 struct list_head **iter,
7146 bool *ignore)
7147 {
7148 struct netdev_adjacent *lower;
7149
7150 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
7151
7152 if (&lower->list == &dev->adj_list.lower)
7153 return NULL;
7154
7155 *iter = &lower->list;
7156 *ignore = lower->ignore;
7157
7158 return lower->dev;
7159 }
7160
7161 int netdev_walk_all_lower_dev(struct net_device *dev,
7162 int (*fn)(struct net_device *dev,
7163 struct netdev_nested_priv *priv),
7164 struct netdev_nested_priv *priv)
7165 {
7166 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7167 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7168 int ret, cur = 0;
7169
7170 now = dev;
7171 iter = &dev->adj_list.lower;
7172
7173 while (1) {
7174 if (now != dev) {
7175 ret = fn(now, priv);
7176 if (ret)
7177 return ret;
7178 }
7179
7180 next = NULL;
7181 while (1) {
7182 ldev = netdev_next_lower_dev(now, &iter);
7183 if (!ldev)
7184 break;
7185
7186 next = ldev;
7187 niter = &ldev->adj_list.lower;
7188 dev_stack[cur] = now;
7189 iter_stack[cur++] = iter;
7190 break;
7191 }
7192
7193 if (!next) {
7194 if (!cur)
7195 return 0;
7196 next = dev_stack[--cur];
7197 niter = iter_stack[cur];
7198 }
7199
7200 now = next;
7201 iter = niter;
7202 }
7203
7204 return 0;
7205 }
7206 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
7207
7208 static int __netdev_walk_all_lower_dev(struct net_device *dev,
7209 int (*fn)(struct net_device *dev,
7210 struct netdev_nested_priv *priv),
7211 struct netdev_nested_priv *priv)
7212 {
7213 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7214 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7215 int ret, cur = 0;
7216 bool ignore;
7217
7218 now = dev;
7219 iter = &dev->adj_list.lower;
7220
7221 while (1) {
7222 if (now != dev) {
7223 ret = fn(now, priv);
7224 if (ret)
7225 return ret;
7226 }
7227
7228 next = NULL;
7229 while (1) {
7230 ldev = __netdev_next_lower_dev(now, &iter, &ignore);
7231 if (!ldev)
7232 break;
7233 if (ignore)
7234 continue;
7235
7236 next = ldev;
7237 niter = &ldev->adj_list.lower;
7238 dev_stack[cur] = now;
7239 iter_stack[cur++] = iter;
7240 break;
7241 }
7242
7243 if (!next) {
7244 if (!cur)
7245 return 0;
7246 next = dev_stack[--cur];
7247 niter = iter_stack[cur];
7248 }
7249
7250 now = next;
7251 iter = niter;
7252 }
7253
7254 return 0;
7255 }
7256
7257 struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
7258 struct list_head **iter)
7259 {
7260 struct netdev_adjacent *lower;
7261
7262 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
7263 if (&lower->list == &dev->adj_list.lower)
7264 return NULL;
7265
7266 *iter = &lower->list;
7267
7268 return lower->dev;
7269 }
7270 EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
7271
7272 static u8 __netdev_upper_depth(struct net_device *dev)
7273 {
7274 struct net_device *udev;
7275 struct list_head *iter;
7276 u8 max_depth = 0;
7277 bool ignore;
7278
7279 for (iter = &dev->adj_list.upper,
7280 udev = __netdev_next_upper_dev(dev, &iter, &ignore);
7281 udev;
7282 udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
7283 if (ignore)
7284 continue;
7285 if (max_depth < udev->upper_level)
7286 max_depth = udev->upper_level;
7287 }
7288
7289 return max_depth;
7290 }
7291
7292 static u8 __netdev_lower_depth(struct net_device *dev)
7293 {
7294 struct net_device *ldev;
7295 struct list_head *iter;
7296 u8 max_depth = 0;
7297 bool ignore;
7298
7299 for (iter = &dev->adj_list.lower,
7300 ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
7301 ldev;
7302 ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
7303 if (ignore)
7304 continue;
7305 if (max_depth < ldev->lower_level)
7306 max_depth = ldev->lower_level;
7307 }
7308
7309 return max_depth;
7310 }
7311
7312 static int __netdev_update_upper_level(struct net_device *dev,
7313 struct netdev_nested_priv *__unused)
7314 {
7315 dev->upper_level = __netdev_upper_depth(dev) + 1;
7316 return 0;
7317 }
7318
7319 #ifdef CONFIG_LOCKDEP
7320 static LIST_HEAD(net_unlink_list);
7321
7322 static void net_unlink_todo(struct net_device *dev)
7323 {
7324 if (list_empty(&dev->unlink_list))
7325 list_add_tail(&dev->unlink_list, &net_unlink_list);
7326 }
7327 #endif
7328
7329 static int __netdev_update_lower_level(struct net_device *dev,
7330 struct netdev_nested_priv *priv)
7331 {
7332 dev->lower_level = __netdev_lower_depth(dev) + 1;
7333
7334 #ifdef CONFIG_LOCKDEP
7335 if (!priv)
7336 return 0;
7337
7338 if (priv->flags & NESTED_SYNC_IMM)
7339 dev->nested_level = dev->lower_level - 1;
7340 if (priv->flags & NESTED_SYNC_TODO)
7341 net_unlink_todo(dev);
7342 #endif
7343 return 0;
7344 }
7345
7346 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
7347 int (*fn)(struct net_device *dev,
7348 struct netdev_nested_priv *priv),
7349 struct netdev_nested_priv *priv)
7350 {
7351 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7352 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7353 int ret, cur = 0;
7354
7355 now = dev;
7356 iter = &dev->adj_list.lower;
7357
7358 while (1) {
7359 if (now != dev) {
7360 ret = fn(now, priv);
7361 if (ret)
7362 return ret;
7363 }
7364
7365 next = NULL;
7366 while (1) {
7367 ldev = netdev_next_lower_dev_rcu(now, &iter);
7368 if (!ldev)
7369 break;
7370
7371 next = ldev;
7372 niter = &ldev->adj_list.lower;
7373 dev_stack[cur] = now;
7374 iter_stack[cur++] = iter;
7375 break;
7376 }
7377
7378 if (!next) {
7379 if (!cur)
7380 return 0;
7381 next = dev_stack[--cur];
7382 niter = iter_stack[cur];
7383 }
7384
7385 now = next;
7386 iter = niter;
7387 }
7388
7389 return 0;
7390 }
7391 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402 void *netdev_lower_get_first_private_rcu(struct net_device *dev)
7403 {
7404 struct netdev_adjacent *lower;
7405
7406 lower = list_first_or_null_rcu(&dev->adj_list.lower,
7407 struct netdev_adjacent, list);
7408 if (lower)
7409 return lower->private;
7410 return NULL;
7411 }
7412 EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
7413
7414
7415
7416
7417
7418
7419
7420
7421 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
7422 {
7423 struct netdev_adjacent *upper;
7424
7425 upper = list_first_or_null_rcu(&dev->adj_list.upper,
7426 struct netdev_adjacent, list);
7427 if (upper && likely(upper->master))
7428 return upper->dev;
7429 return NULL;
7430 }
7431 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
7432
7433 static int netdev_adjacent_sysfs_add(struct net_device *dev,
7434 struct net_device *adj_dev,
7435 struct list_head *dev_list)
7436 {
7437 char linkname[IFNAMSIZ+7];
7438
7439 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7440 "upper_%s" : "lower_%s", adj_dev->name);
7441 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
7442 linkname);
7443 }
7444 static void netdev_adjacent_sysfs_del(struct net_device *dev,
7445 char *name,
7446 struct list_head *dev_list)
7447 {
7448 char linkname[IFNAMSIZ+7];
7449
7450 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7451 "upper_%s" : "lower_%s", name);
7452 sysfs_remove_link(&(dev->dev.kobj), linkname);
7453 }
7454
7455 static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
7456 struct net_device *adj_dev,
7457 struct list_head *dev_list)
7458 {
7459 return (dev_list == &dev->adj_list.upper ||
7460 dev_list == &dev->adj_list.lower) &&
7461 net_eq(dev_net(dev), dev_net(adj_dev));
7462 }
7463
7464 static int __netdev_adjacent_dev_insert(struct net_device *dev,
7465 struct net_device *adj_dev,
7466 struct list_head *dev_list,
7467 void *private, bool master)
7468 {
7469 struct netdev_adjacent *adj;
7470 int ret;
7471
7472 adj = __netdev_find_adj(adj_dev, dev_list);
7473
7474 if (adj) {
7475 adj->ref_nr += 1;
7476 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
7477 dev->name, adj_dev->name, adj->ref_nr);
7478
7479 return 0;
7480 }
7481
7482 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
7483 if (!adj)
7484 return -ENOMEM;
7485
7486 adj->dev = adj_dev;
7487 adj->master = master;
7488 adj->ref_nr = 1;
7489 adj->private = private;
7490 adj->ignore = false;
7491 netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL);
7492
7493 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
7494 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
7495
7496 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
7497 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
7498 if (ret)
7499 goto free_adj;
7500 }
7501
7502
7503 if (master) {
7504 ret = sysfs_create_link(&(dev->dev.kobj),
7505 &(adj_dev->dev.kobj), "master");
7506 if (ret)
7507 goto remove_symlinks;
7508
7509 list_add_rcu(&adj->list, dev_list);
7510 } else {
7511 list_add_tail_rcu(&adj->list, dev_list);
7512 }
7513
7514 return 0;
7515
7516 remove_symlinks:
7517 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7518 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7519 free_adj:
7520 netdev_put(adj_dev, &adj->dev_tracker);
7521 kfree(adj);
7522
7523 return ret;
7524 }
7525
7526 static void __netdev_adjacent_dev_remove(struct net_device *dev,
7527 struct net_device *adj_dev,
7528 u16 ref_nr,
7529 struct list_head *dev_list)
7530 {
7531 struct netdev_adjacent *adj;
7532
7533 pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
7534 dev->name, adj_dev->name, ref_nr);
7535
7536 adj = __netdev_find_adj(adj_dev, dev_list);
7537
7538 if (!adj) {
7539 pr_err("Adjacency does not exist for device %s from %s\n",
7540 dev->name, adj_dev->name);
7541 WARN_ON(1);
7542 return;
7543 }
7544
7545 if (adj->ref_nr > ref_nr) {
7546 pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
7547 dev->name, adj_dev->name, ref_nr,
7548 adj->ref_nr - ref_nr);
7549 adj->ref_nr -= ref_nr;
7550 return;
7551 }
7552
7553 if (adj->master)
7554 sysfs_remove_link(&(dev->dev.kobj), "master");
7555
7556 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7557 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7558
7559 list_del_rcu(&adj->list);
7560 pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
7561 adj_dev->name, dev->name, adj_dev->name);
7562 netdev_put(adj_dev, &adj->dev_tracker);
7563 kfree_rcu(adj, rcu);
7564 }
7565
7566 static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
7567 struct net_device *upper_dev,
7568 struct list_head *up_list,
7569 struct list_head *down_list,
7570 void *private, bool master)
7571 {
7572 int ret;
7573
7574 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
7575 private, master);
7576 if (ret)
7577 return ret;
7578
7579 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
7580 private, false);
7581 if (ret) {
7582 __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
7583 return ret;
7584 }
7585
7586 return 0;
7587 }
7588
7589 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
7590 struct net_device *upper_dev,
7591 u16 ref_nr,
7592 struct list_head *up_list,
7593 struct list_head *down_list)
7594 {
7595 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
7596 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
7597 }
7598
7599 static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
7600 struct net_device *upper_dev,
7601 void *private, bool master)
7602 {
7603 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
7604 &dev->adj_list.upper,
7605 &upper_dev->adj_list.lower,
7606 private, master);
7607 }
7608
7609 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
7610 struct net_device *upper_dev)
7611 {
7612 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
7613 &dev->adj_list.upper,
7614 &upper_dev->adj_list.lower);
7615 }
7616
7617 static int __netdev_upper_dev_link(struct net_device *dev,
7618 struct net_device *upper_dev, bool master,
7619 void *upper_priv, void *upper_info,
7620 struct netdev_nested_priv *priv,
7621 struct netlink_ext_ack *extack)
7622 {
7623 struct netdev_notifier_changeupper_info changeupper_info = {
7624 .info = {
7625 .dev = dev,
7626 .extack = extack,
7627 },
7628 .upper_dev = upper_dev,
7629 .master = master,
7630 .linking = true,
7631 .upper_info = upper_info,
7632 };
7633 struct net_device *master_dev;
7634 int ret = 0;
7635
7636 ASSERT_RTNL();
7637
7638 if (dev == upper_dev)
7639 return -EBUSY;
7640
7641
7642 if (__netdev_has_upper_dev(upper_dev, dev))
7643 return -EBUSY;
7644
7645 if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
7646 return -EMLINK;
7647
7648 if (!master) {
7649 if (__netdev_has_upper_dev(dev, upper_dev))
7650 return -EEXIST;
7651 } else {
7652 master_dev = __netdev_master_upper_dev_get(dev);
7653 if (master_dev)
7654 return master_dev == upper_dev ? -EEXIST : -EBUSY;
7655 }
7656
7657 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7658 &changeupper_info.info);
7659 ret = notifier_to_errno(ret);
7660 if (ret)
7661 return ret;
7662
7663 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
7664 master);
7665 if (ret)
7666 return ret;
7667
7668 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7669 &changeupper_info.info);
7670 ret = notifier_to_errno(ret);
7671 if (ret)
7672 goto rollback;
7673
7674 __netdev_update_upper_level(dev, NULL);
7675 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7676
7677 __netdev_update_lower_level(upper_dev, priv);
7678 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7679 priv);
7680
7681 return 0;
7682
7683 rollback:
7684 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7685
7686 return ret;
7687 }
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700 int netdev_upper_dev_link(struct net_device *dev,
7701 struct net_device *upper_dev,
7702 struct netlink_ext_ack *extack)
7703 {
7704 struct netdev_nested_priv priv = {
7705 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7706 .data = NULL,
7707 };
7708
7709 return __netdev_upper_dev_link(dev, upper_dev, false,
7710 NULL, NULL, &priv, extack);
7711 }
7712 EXPORT_SYMBOL(netdev_upper_dev_link);
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728 int netdev_master_upper_dev_link(struct net_device *dev,
7729 struct net_device *upper_dev,
7730 void *upper_priv, void *upper_info,
7731 struct netlink_ext_ack *extack)
7732 {
7733 struct netdev_nested_priv priv = {
7734 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7735 .data = NULL,
7736 };
7737
7738 return __netdev_upper_dev_link(dev, upper_dev, true,
7739 upper_priv, upper_info, &priv, extack);
7740 }
7741 EXPORT_SYMBOL(netdev_master_upper_dev_link);
7742
7743 static void __netdev_upper_dev_unlink(struct net_device *dev,
7744 struct net_device *upper_dev,
7745 struct netdev_nested_priv *priv)
7746 {
7747 struct netdev_notifier_changeupper_info changeupper_info = {
7748 .info = {
7749 .dev = dev,
7750 },
7751 .upper_dev = upper_dev,
7752 .linking = false,
7753 };
7754
7755 ASSERT_RTNL();
7756
7757 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
7758
7759 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7760 &changeupper_info.info);
7761
7762 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7763
7764 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7765 &changeupper_info.info);
7766
7767 __netdev_update_upper_level(dev, NULL);
7768 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7769
7770 __netdev_update_lower_level(upper_dev, priv);
7771 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7772 priv);
7773 }
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783 void netdev_upper_dev_unlink(struct net_device *dev,
7784 struct net_device *upper_dev)
7785 {
7786 struct netdev_nested_priv priv = {
7787 .flags = NESTED_SYNC_TODO,
7788 .data = NULL,
7789 };
7790
7791 __netdev_upper_dev_unlink(dev, upper_dev, &priv);
7792 }
7793 EXPORT_SYMBOL(netdev_upper_dev_unlink);
7794
7795 static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
7796 struct net_device *lower_dev,
7797 bool val)
7798 {
7799 struct netdev_adjacent *adj;
7800
7801 adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
7802 if (adj)
7803 adj->ignore = val;
7804
7805 adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
7806 if (adj)
7807 adj->ignore = val;
7808 }
7809
7810 static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
7811 struct net_device *lower_dev)
7812 {
7813 __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
7814 }
7815
7816 static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
7817 struct net_device *lower_dev)
7818 {
7819 __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
7820 }
7821
7822 int netdev_adjacent_change_prepare(struct net_device *old_dev,
7823 struct net_device *new_dev,
7824 struct net_device *dev,
7825 struct netlink_ext_ack *extack)
7826 {
7827 struct netdev_nested_priv priv = {
7828 .flags = 0,
7829 .data = NULL,
7830 };
7831 int err;
7832
7833 if (!new_dev)
7834 return 0;
7835
7836 if (old_dev && new_dev != old_dev)
7837 netdev_adjacent_dev_disable(dev, old_dev);
7838 err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
7839 extack);
7840 if (err) {
7841 if (old_dev && new_dev != old_dev)
7842 netdev_adjacent_dev_enable(dev, old_dev);
7843 return err;
7844 }
7845
7846 return 0;
7847 }
7848 EXPORT_SYMBOL(netdev_adjacent_change_prepare);
7849
7850 void netdev_adjacent_change_commit(struct net_device *old_dev,
7851 struct net_device *new_dev,
7852 struct net_device *dev)
7853 {
7854 struct netdev_nested_priv priv = {
7855 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7856 .data = NULL,
7857 };
7858
7859 if (!new_dev || !old_dev)
7860 return;
7861
7862 if (new_dev == old_dev)
7863 return;
7864
7865 netdev_adjacent_dev_enable(dev, old_dev);
7866 __netdev_upper_dev_unlink(old_dev, dev, &priv);
7867 }
7868 EXPORT_SYMBOL(netdev_adjacent_change_commit);
7869
7870 void netdev_adjacent_change_abort(struct net_device *old_dev,
7871 struct net_device *new_dev,
7872 struct net_device *dev)
7873 {
7874 struct netdev_nested_priv priv = {
7875 .flags = 0,
7876 .data = NULL,
7877 };
7878
7879 if (!new_dev)
7880 return;
7881
7882 if (old_dev && new_dev != old_dev)
7883 netdev_adjacent_dev_enable(dev, old_dev);
7884
7885 __netdev_upper_dev_unlink(new_dev, dev, &priv);
7886 }
7887 EXPORT_SYMBOL(netdev_adjacent_change_abort);
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897 void netdev_bonding_info_change(struct net_device *dev,
7898 struct netdev_bonding_info *bonding_info)
7899 {
7900 struct netdev_notifier_bonding_info info = {
7901 .info.dev = dev,
7902 };
7903
7904 memcpy(&info.bonding_info, bonding_info,
7905 sizeof(struct netdev_bonding_info));
7906 call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
7907 &info.info);
7908 }
7909 EXPORT_SYMBOL(netdev_bonding_info_change);
7910
7911 static int netdev_offload_xstats_enable_l3(struct net_device *dev,
7912 struct netlink_ext_ack *extack)
7913 {
7914 struct netdev_notifier_offload_xstats_info info = {
7915 .info.dev = dev,
7916 .info.extack = extack,
7917 .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
7918 };
7919 int err;
7920 int rc;
7921
7922 dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
7923 GFP_KERNEL);
7924 if (!dev->offload_xstats_l3)
7925 return -ENOMEM;
7926
7927 rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
7928 NETDEV_OFFLOAD_XSTATS_DISABLE,
7929 &info.info);
7930 err = notifier_to_errno(rc);
7931 if (err)
7932 goto free_stats;
7933
7934 return 0;
7935
7936 free_stats:
7937 kfree(dev->offload_xstats_l3);
7938 dev->offload_xstats_l3 = NULL;
7939 return err;
7940 }
7941
7942 int netdev_offload_xstats_enable(struct net_device *dev,
7943 enum netdev_offload_xstats_type type,
7944 struct netlink_ext_ack *extack)
7945 {
7946 ASSERT_RTNL();
7947
7948 if (netdev_offload_xstats_enabled(dev, type))
7949 return -EALREADY;
7950
7951 switch (type) {
7952 case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
7953 return netdev_offload_xstats_enable_l3(dev, extack);
7954 }
7955
7956 WARN_ON(1);
7957 return -EINVAL;
7958 }
7959 EXPORT_SYMBOL(netdev_offload_xstats_enable);
7960
7961 static void netdev_offload_xstats_disable_l3(struct net_device *dev)
7962 {
7963 struct netdev_notifier_offload_xstats_info info = {
7964 .info.dev = dev,
7965 .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
7966 };
7967
7968 call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
7969 &info.info);
7970 kfree(dev->offload_xstats_l3);
7971 dev->offload_xstats_l3 = NULL;
7972 }
7973
7974 int netdev_offload_xstats_disable(struct net_device *dev,
7975 enum netdev_offload_xstats_type type)
7976 {
7977 ASSERT_RTNL();
7978
7979 if (!netdev_offload_xstats_enabled(dev, type))
7980 return -EALREADY;
7981
7982 switch (type) {
7983 case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
7984 netdev_offload_xstats_disable_l3(dev);
7985 return 0;
7986 }
7987
7988 WARN_ON(1);
7989 return -EINVAL;
7990 }
7991 EXPORT_SYMBOL(netdev_offload_xstats_disable);
7992
7993 static void netdev_offload_xstats_disable_all(struct net_device *dev)
7994 {
7995 netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
7996 }
7997
7998 static struct rtnl_hw_stats64 *
7999 netdev_offload_xstats_get_ptr(const struct net_device *dev,
8000 enum netdev_offload_xstats_type type)
8001 {
8002 switch (type) {
8003 case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
8004 return dev->offload_xstats_l3;
8005 }
8006
8007 WARN_ON(1);
8008 return NULL;
8009 }
8010
8011 bool netdev_offload_xstats_enabled(const struct net_device *dev,
8012 enum netdev_offload_xstats_type type)
8013 {
8014 ASSERT_RTNL();
8015
8016 return netdev_offload_xstats_get_ptr(dev, type);
8017 }
8018 EXPORT_SYMBOL(netdev_offload_xstats_enabled);
8019
8020 struct netdev_notifier_offload_xstats_ru {
8021 bool used;
8022 };
8023
8024 struct netdev_notifier_offload_xstats_rd {
8025 struct rtnl_hw_stats64 stats;
8026 bool used;
8027 };
8028
8029 static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
8030 const struct rtnl_hw_stats64 *src)
8031 {
8032 dest->rx_packets += src->rx_packets;
8033 dest->tx_packets += src->tx_packets;
8034 dest->rx_bytes += src->rx_bytes;
8035 dest->tx_bytes += src->tx_bytes;
8036 dest->rx_errors += src->rx_errors;
8037 dest->tx_errors += src->tx_errors;
8038 dest->rx_dropped += src->rx_dropped;
8039 dest->tx_dropped += src->tx_dropped;
8040 dest->multicast += src->multicast;
8041 }
8042
8043 static int netdev_offload_xstats_get_used(struct net_device *dev,
8044 enum netdev_offload_xstats_type type,
8045 bool *p_used,
8046 struct netlink_ext_ack *extack)
8047 {
8048 struct netdev_notifier_offload_xstats_ru report_used = {};
8049 struct netdev_notifier_offload_xstats_info info = {
8050 .info.dev = dev,
8051 .info.extack = extack,
8052 .type = type,
8053 .report_used = &report_used,
8054 };
8055 int rc;
8056
8057 WARN_ON(!netdev_offload_xstats_enabled(dev, type));
8058 rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
8059 &info.info);
8060 *p_used = report_used.used;
8061 return notifier_to_errno(rc);
8062 }
8063
8064 static int netdev_offload_xstats_get_stats(struct net_device *dev,
8065 enum netdev_offload_xstats_type type,
8066 struct rtnl_hw_stats64 *p_stats,
8067 bool *p_used,
8068 struct netlink_ext_ack *extack)
8069 {
8070 struct netdev_notifier_offload_xstats_rd report_delta = {};
8071 struct netdev_notifier_offload_xstats_info info = {
8072 .info.dev = dev,
8073 .info.extack = extack,
8074 .type = type,
8075 .report_delta = &report_delta,
8076 };
8077 struct rtnl_hw_stats64 *stats;
8078 int rc;
8079
8080 stats = netdev_offload_xstats_get_ptr(dev, type);
8081 if (WARN_ON(!stats))
8082 return -EINVAL;
8083
8084 rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
8085 &info.info);
8086
8087
8088
8089
8090 netdev_hw_stats64_add(stats, &report_delta.stats);
8091
8092 if (p_stats)
8093 *p_stats = *stats;
8094 *p_used = report_delta.used;
8095
8096 return notifier_to_errno(rc);
8097 }
8098
8099 int netdev_offload_xstats_get(struct net_device *dev,
8100 enum netdev_offload_xstats_type type,
8101 struct rtnl_hw_stats64 *p_stats, bool *p_used,
8102 struct netlink_ext_ack *extack)
8103 {
8104 ASSERT_RTNL();
8105
8106 if (p_stats)
8107 return netdev_offload_xstats_get_stats(dev, type, p_stats,
8108 p_used, extack);
8109 else
8110 return netdev_offload_xstats_get_used(dev, type, p_used,
8111 extack);
8112 }
8113 EXPORT_SYMBOL(netdev_offload_xstats_get);
8114
8115 void
8116 netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
8117 const struct rtnl_hw_stats64 *stats)
8118 {
8119 report_delta->used = true;
8120 netdev_hw_stats64_add(&report_delta->stats, stats);
8121 }
8122 EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
8123
8124 void
8125 netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
8126 {
8127 report_used->used = true;
8128 }
8129 EXPORT_SYMBOL(netdev_offload_xstats_report_used);
8130
8131 void netdev_offload_xstats_push_delta(struct net_device *dev,
8132 enum netdev_offload_xstats_type type,
8133 const struct rtnl_hw_stats64 *p_stats)
8134 {
8135 struct rtnl_hw_stats64 *stats;
8136
8137 ASSERT_RTNL();
8138
8139 stats = netdev_offload_xstats_get_ptr(dev, type);
8140 if (WARN_ON(!stats))
8141 return;
8142
8143 netdev_hw_stats64_add(stats, p_stats);
8144 }
8145 EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158 struct net_device *netdev_get_xmit_slave(struct net_device *dev,
8159 struct sk_buff *skb,
8160 bool all_slaves)
8161 {
8162 const struct net_device_ops *ops = dev->netdev_ops;
8163
8164 if (!ops->ndo_get_xmit_slave)
8165 return NULL;
8166 return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
8167 }
8168 EXPORT_SYMBOL(netdev_get_xmit_slave);
8169
8170 static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
8171 struct sock *sk)
8172 {
8173 const struct net_device_ops *ops = dev->netdev_ops;
8174
8175 if (!ops->ndo_sk_get_lower_dev)
8176 return NULL;
8177 return ops->ndo_sk_get_lower_dev(dev, sk);
8178 }
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188 struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
8189 struct sock *sk)
8190 {
8191 struct net_device *lower;
8192
8193 lower = netdev_sk_get_lower_dev(dev, sk);
8194 while (lower) {
8195 dev = lower;
8196 lower = netdev_sk_get_lower_dev(dev, sk);
8197 }
8198
8199 return dev;
8200 }
8201 EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
8202
8203 static void netdev_adjacent_add_links(struct net_device *dev)
8204 {
8205 struct netdev_adjacent *iter;
8206
8207 struct net *net = dev_net(dev);
8208
8209 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8210 if (!net_eq(net, dev_net(iter->dev)))
8211 continue;
8212 netdev_adjacent_sysfs_add(iter->dev, dev,
8213 &iter->dev->adj_list.lower);
8214 netdev_adjacent_sysfs_add(dev, iter->dev,
8215 &dev->adj_list.upper);
8216 }
8217
8218 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8219 if (!net_eq(net, dev_net(iter->dev)))
8220 continue;
8221 netdev_adjacent_sysfs_add(iter->dev, dev,
8222 &iter->dev->adj_list.upper);
8223 netdev_adjacent_sysfs_add(dev, iter->dev,
8224 &dev->adj_list.lower);
8225 }
8226 }
8227
8228 static void netdev_adjacent_del_links(struct net_device *dev)
8229 {
8230 struct netdev_adjacent *iter;
8231
8232 struct net *net = dev_net(dev);
8233
8234 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8235 if (!net_eq(net, dev_net(iter->dev)))
8236 continue;
8237 netdev_adjacent_sysfs_del(iter->dev, dev->name,
8238 &iter->dev->adj_list.lower);
8239 netdev_adjacent_sysfs_del(dev, iter->dev->name,
8240 &dev->adj_list.upper);
8241 }
8242
8243 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8244 if (!net_eq(net, dev_net(iter->dev)))
8245 continue;
8246 netdev_adjacent_sysfs_del(iter->dev, dev->name,
8247 &iter->dev->adj_list.upper);
8248 netdev_adjacent_sysfs_del(dev, iter->dev->name,
8249 &dev->adj_list.lower);
8250 }
8251 }
8252
8253 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
8254 {
8255 struct netdev_adjacent *iter;
8256
8257 struct net *net = dev_net(dev);
8258
8259 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8260 if (!net_eq(net, dev_net(iter->dev)))
8261 continue;
8262 netdev_adjacent_sysfs_del(iter->dev, oldname,
8263 &iter->dev->adj_list.lower);
8264 netdev_adjacent_sysfs_add(iter->dev, dev,
8265 &iter->dev->adj_list.lower);
8266 }
8267
8268 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8269 if (!net_eq(net, dev_net(iter->dev)))
8270 continue;
8271 netdev_adjacent_sysfs_del(iter->dev, oldname,
8272 &iter->dev->adj_list.upper);
8273 netdev_adjacent_sysfs_add(iter->dev, dev,
8274 &iter->dev->adj_list.upper);
8275 }
8276 }
8277
8278 void *netdev_lower_dev_get_private(struct net_device *dev,
8279 struct net_device *lower_dev)
8280 {
8281 struct netdev_adjacent *lower;
8282
8283 if (!lower_dev)
8284 return NULL;
8285 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
8286 if (!lower)
8287 return NULL;
8288
8289 return lower->private;
8290 }
8291 EXPORT_SYMBOL(netdev_lower_dev_get_private);
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302 void netdev_lower_state_changed(struct net_device *lower_dev,
8303 void *lower_state_info)
8304 {
8305 struct netdev_notifier_changelowerstate_info changelowerstate_info = {
8306 .info.dev = lower_dev,
8307 };
8308
8309 ASSERT_RTNL();
8310 changelowerstate_info.lower_state_info = lower_state_info;
8311 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
8312 &changelowerstate_info.info);
8313 }
8314 EXPORT_SYMBOL(netdev_lower_state_changed);
8315
8316 static void dev_change_rx_flags(struct net_device *dev, int flags)
8317 {
8318 const struct net_device_ops *ops = dev->netdev_ops;
8319
8320 if (ops->ndo_change_rx_flags)
8321 ops->ndo_change_rx_flags(dev, flags);
8322 }
8323
8324 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
8325 {
8326 unsigned int old_flags = dev->flags;
8327 kuid_t uid;
8328 kgid_t gid;
8329
8330 ASSERT_RTNL();
8331
8332 dev->flags |= IFF_PROMISC;
8333 dev->promiscuity += inc;
8334 if (dev->promiscuity == 0) {
8335
8336
8337
8338
8339 if (inc < 0)
8340 dev->flags &= ~IFF_PROMISC;
8341 else {
8342 dev->promiscuity -= inc;
8343 netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
8344 return -EOVERFLOW;
8345 }
8346 }
8347 if (dev->flags != old_flags) {
8348 pr_info("device %s %s promiscuous mode\n",
8349 dev->name,
8350 dev->flags & IFF_PROMISC ? "entered" : "left");
8351 if (audit_enabled) {
8352 current_uid_gid(&uid, &gid);
8353 audit_log(audit_context(), GFP_ATOMIC,
8354 AUDIT_ANOM_PROMISCUOUS,
8355 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
8356 dev->name, (dev->flags & IFF_PROMISC),
8357 (old_flags & IFF_PROMISC),
8358 from_kuid(&init_user_ns, audit_get_loginuid(current)),
8359 from_kuid(&init_user_ns, uid),
8360 from_kgid(&init_user_ns, gid),
8361 audit_get_sessionid(current));
8362 }
8363
8364 dev_change_rx_flags(dev, IFF_PROMISC);
8365 }
8366 if (notify)
8367 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
8368 return 0;
8369 }
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382 int dev_set_promiscuity(struct net_device *dev, int inc)
8383 {
8384 unsigned int old_flags = dev->flags;
8385 int err;
8386
8387 err = __dev_set_promiscuity(dev, inc, true);
8388 if (err < 0)
8389 return err;
8390 if (dev->flags != old_flags)
8391 dev_set_rx_mode(dev);
8392 return err;
8393 }
8394 EXPORT_SYMBOL(dev_set_promiscuity);
8395
8396 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
8397 {
8398 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
8399
8400 ASSERT_RTNL();
8401
8402 dev->flags |= IFF_ALLMULTI;
8403 dev->allmulti += inc;
8404 if (dev->allmulti == 0) {
8405
8406
8407
8408
8409 if (inc < 0)
8410 dev->flags &= ~IFF_ALLMULTI;
8411 else {
8412 dev->allmulti -= inc;
8413 netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
8414 return -EOVERFLOW;
8415 }
8416 }
8417 if (dev->flags ^ old_flags) {
8418 dev_change_rx_flags(dev, IFF_ALLMULTI);
8419 dev_set_rx_mode(dev);
8420 if (notify)
8421 __dev_notify_flags(dev, old_flags,
8422 dev->gflags ^ old_gflags);
8423 }
8424 return 0;
8425 }
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440 int dev_set_allmulti(struct net_device *dev, int inc)
8441 {
8442 return __dev_set_allmulti(dev, inc, true);
8443 }
8444 EXPORT_SYMBOL(dev_set_allmulti);
8445
8446
8447
8448
8449
8450
8451
8452 void __dev_set_rx_mode(struct net_device *dev)
8453 {
8454 const struct net_device_ops *ops = dev->netdev_ops;
8455
8456
8457 if (!(dev->flags&IFF_UP))
8458 return;
8459
8460 if (!netif_device_present(dev))
8461 return;
8462
8463 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
8464
8465
8466
8467 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
8468 __dev_set_promiscuity(dev, 1, false);
8469 dev->uc_promisc = true;
8470 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
8471 __dev_set_promiscuity(dev, -1, false);
8472 dev->uc_promisc = false;
8473 }
8474 }
8475
8476 if (ops->ndo_set_rx_mode)
8477 ops->ndo_set_rx_mode(dev);
8478 }
8479
8480 void dev_set_rx_mode(struct net_device *dev)
8481 {
8482 netif_addr_lock_bh(dev);
8483 __dev_set_rx_mode(dev);
8484 netif_addr_unlock_bh(dev);
8485 }
8486
8487
8488
8489
8490
8491
8492
8493 unsigned int dev_get_flags(const struct net_device *dev)
8494 {
8495 unsigned int flags;
8496
8497 flags = (dev->flags & ~(IFF_PROMISC |
8498 IFF_ALLMULTI |
8499 IFF_RUNNING |
8500 IFF_LOWER_UP |
8501 IFF_DORMANT)) |
8502 (dev->gflags & (IFF_PROMISC |
8503 IFF_ALLMULTI));
8504
8505 if (netif_running(dev)) {
8506 if (netif_oper_up(dev))
8507 flags |= IFF_RUNNING;
8508 if (netif_carrier_ok(dev))
8509 flags |= IFF_LOWER_UP;
8510 if (netif_dormant(dev))
8511 flags |= IFF_DORMANT;
8512 }
8513
8514 return flags;
8515 }
8516 EXPORT_SYMBOL(dev_get_flags);
8517
8518 int __dev_change_flags(struct net_device *dev, unsigned int flags,
8519 struct netlink_ext_ack *extack)
8520 {
8521 unsigned int old_flags = dev->flags;
8522 int ret;
8523
8524 ASSERT_RTNL();
8525
8526
8527
8528
8529
8530 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
8531 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
8532 IFF_AUTOMEDIA)) |
8533 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
8534 IFF_ALLMULTI));
8535
8536
8537
8538
8539
8540 if ((old_flags ^ flags) & IFF_MULTICAST)
8541 dev_change_rx_flags(dev, IFF_MULTICAST);
8542
8543 dev_set_rx_mode(dev);
8544
8545
8546
8547
8548
8549
8550
8551 ret = 0;
8552 if ((old_flags ^ flags) & IFF_UP) {
8553 if (old_flags & IFF_UP)
8554 __dev_close(dev);
8555 else
8556 ret = __dev_open(dev, extack);
8557 }
8558
8559 if ((flags ^ dev->gflags) & IFF_PROMISC) {
8560 int inc = (flags & IFF_PROMISC) ? 1 : -1;
8561 unsigned int old_flags = dev->flags;
8562
8563 dev->gflags ^= IFF_PROMISC;
8564
8565 if (__dev_set_promiscuity(dev, inc, false) >= 0)
8566 if (dev->flags != old_flags)
8567 dev_set_rx_mode(dev);
8568 }
8569
8570
8571
8572
8573
8574 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
8575 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
8576
8577 dev->gflags ^= IFF_ALLMULTI;
8578 __dev_set_allmulti(dev, inc, false);
8579 }
8580
8581 return ret;
8582 }
8583
8584 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
8585 unsigned int gchanges)
8586 {
8587 unsigned int changes = dev->flags ^ old_flags;
8588
8589 if (gchanges)
8590 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
8591
8592 if (changes & IFF_UP) {
8593 if (dev->flags & IFF_UP)
8594 call_netdevice_notifiers(NETDEV_UP, dev);
8595 else
8596 call_netdevice_notifiers(NETDEV_DOWN, dev);
8597 }
8598
8599 if (dev->flags & IFF_UP &&
8600 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
8601 struct netdev_notifier_change_info change_info = {
8602 .info = {
8603 .dev = dev,
8604 },
8605 .flags_changed = changes,
8606 };
8607
8608 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
8609 }
8610 }
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621 int dev_change_flags(struct net_device *dev, unsigned int flags,
8622 struct netlink_ext_ack *extack)
8623 {
8624 int ret;
8625 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
8626
8627 ret = __dev_change_flags(dev, flags, extack);
8628 if (ret < 0)
8629 return ret;
8630
8631 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
8632 __dev_notify_flags(dev, old_flags, changes);
8633 return ret;
8634 }
8635 EXPORT_SYMBOL(dev_change_flags);
8636
8637 int __dev_set_mtu(struct net_device *dev, int new_mtu)
8638 {
8639 const struct net_device_ops *ops = dev->netdev_ops;
8640
8641 if (ops->ndo_change_mtu)
8642 return ops->ndo_change_mtu(dev, new_mtu);
8643
8644
8645 WRITE_ONCE(dev->mtu, new_mtu);
8646 return 0;
8647 }
8648 EXPORT_SYMBOL(__dev_set_mtu);
8649
8650 int dev_validate_mtu(struct net_device *dev, int new_mtu,
8651 struct netlink_ext_ack *extack)
8652 {
8653
8654 if (new_mtu < 0 || new_mtu < dev->min_mtu) {
8655 NL_SET_ERR_MSG(extack, "mtu less than device minimum");
8656 return -EINVAL;
8657 }
8658
8659 if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
8660 NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
8661 return -EINVAL;
8662 }
8663 return 0;
8664 }
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674 int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
8675 struct netlink_ext_ack *extack)
8676 {
8677 int err, orig_mtu;
8678
8679 if (new_mtu == dev->mtu)
8680 return 0;
8681
8682 err = dev_validate_mtu(dev, new_mtu, extack);
8683 if (err)
8684 return err;
8685
8686 if (!netif_device_present(dev))
8687 return -ENODEV;
8688
8689 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
8690 err = notifier_to_errno(err);
8691 if (err)
8692 return err;
8693
8694 orig_mtu = dev->mtu;
8695 err = __dev_set_mtu(dev, new_mtu);
8696
8697 if (!err) {
8698 err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8699 orig_mtu);
8700 err = notifier_to_errno(err);
8701 if (err) {
8702
8703
8704
8705 __dev_set_mtu(dev, orig_mtu);
8706 call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8707 new_mtu);
8708 }
8709 }
8710 return err;
8711 }
8712
8713 int dev_set_mtu(struct net_device *dev, int new_mtu)
8714 {
8715 struct netlink_ext_ack extack;
8716 int err;
8717
8718 memset(&extack, 0, sizeof(extack));
8719 err = dev_set_mtu_ext(dev, new_mtu, &extack);
8720 if (err && extack._msg)
8721 net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
8722 return err;
8723 }
8724 EXPORT_SYMBOL(dev_set_mtu);
8725
8726
8727
8728
8729
8730
8731 int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
8732 {
8733 unsigned int orig_len = dev->tx_queue_len;
8734 int res;
8735
8736 if (new_len != (unsigned int)new_len)
8737 return -ERANGE;
8738
8739 if (new_len != orig_len) {
8740 dev->tx_queue_len = new_len;
8741 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
8742 res = notifier_to_errno(res);
8743 if (res)
8744 goto err_rollback;
8745 res = dev_qdisc_change_tx_queue_len(dev);
8746 if (res)
8747 goto err_rollback;
8748 }
8749
8750 return 0;
8751
8752 err_rollback:
8753 netdev_err(dev, "refused to change device tx_queue_len\n");
8754 dev->tx_queue_len = orig_len;
8755 return res;
8756 }
8757
8758
8759
8760
8761
8762
8763 void dev_set_group(struct net_device *dev, int new_group)
8764 {
8765 dev->group = new_group;
8766 }
8767
8768
8769
8770
8771
8772
8773
8774 int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
8775 struct netlink_ext_ack *extack)
8776 {
8777 struct netdev_notifier_pre_changeaddr_info info = {
8778 .info.dev = dev,
8779 .info.extack = extack,
8780 .dev_addr = addr,
8781 };
8782 int rc;
8783
8784 rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
8785 return notifier_to_errno(rc);
8786 }
8787 EXPORT_SYMBOL(dev_pre_changeaddr_notify);
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
8798 struct netlink_ext_ack *extack)
8799 {
8800 const struct net_device_ops *ops = dev->netdev_ops;
8801 int err;
8802
8803 if (!ops->ndo_set_mac_address)
8804 return -EOPNOTSUPP;
8805 if (sa->sa_family != dev->type)
8806 return -EINVAL;
8807 if (!netif_device_present(dev))
8808 return -ENODEV;
8809 err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
8810 if (err)
8811 return err;
8812 err = ops->ndo_set_mac_address(dev, sa);
8813 if (err)
8814 return err;
8815 dev->addr_assign_type = NET_ADDR_SET;
8816 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
8817 add_device_randomness(dev->dev_addr, dev->addr_len);
8818 return 0;
8819 }
8820 EXPORT_SYMBOL(dev_set_mac_address);
8821
8822 static DECLARE_RWSEM(dev_addr_sem);
8823
8824 int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
8825 struct netlink_ext_ack *extack)
8826 {
8827 int ret;
8828
8829 down_write(&dev_addr_sem);
8830 ret = dev_set_mac_address(dev, sa, extack);
8831 up_write(&dev_addr_sem);
8832 return ret;
8833 }
8834 EXPORT_SYMBOL(dev_set_mac_address_user);
8835
8836 int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
8837 {
8838 size_t size = sizeof(sa->sa_data);
8839 struct net_device *dev;
8840 int ret = 0;
8841
8842 down_read(&dev_addr_sem);
8843 rcu_read_lock();
8844
8845 dev = dev_get_by_name_rcu(net, dev_name);
8846 if (!dev) {
8847 ret = -ENODEV;
8848 goto unlock;
8849 }
8850 if (!dev->addr_len)
8851 memset(sa->sa_data, 0, size);
8852 else
8853 memcpy(sa->sa_data, dev->dev_addr,
8854 min_t(size_t, size, dev->addr_len));
8855 sa->sa_family = dev->type;
8856
8857 unlock:
8858 rcu_read_unlock();
8859 up_read(&dev_addr_sem);
8860 return ret;
8861 }
8862 EXPORT_SYMBOL(dev_get_mac_address);
8863
8864
8865
8866
8867
8868
8869
8870
8871 int dev_change_carrier(struct net_device *dev, bool new_carrier)
8872 {
8873 const struct net_device_ops *ops = dev->netdev_ops;
8874
8875 if (!ops->ndo_change_carrier)
8876 return -EOPNOTSUPP;
8877 if (!netif_device_present(dev))
8878 return -ENODEV;
8879 return ops->ndo_change_carrier(dev, new_carrier);
8880 }
8881
8882
8883
8884
8885
8886
8887
8888
8889 int dev_get_phys_port_id(struct net_device *dev,
8890 struct netdev_phys_item_id *ppid)
8891 {
8892 const struct net_device_ops *ops = dev->netdev_ops;
8893
8894 if (!ops->ndo_get_phys_port_id)
8895 return -EOPNOTSUPP;
8896 return ops->ndo_get_phys_port_id(dev, ppid);
8897 }
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907 int dev_get_phys_port_name(struct net_device *dev,
8908 char *name, size_t len)
8909 {
8910 const struct net_device_ops *ops = dev->netdev_ops;
8911 int err;
8912
8913 if (ops->ndo_get_phys_port_name) {
8914 err = ops->ndo_get_phys_port_name(dev, name, len);
8915 if (err != -EOPNOTSUPP)
8916 return err;
8917 }
8918 return devlink_compat_phys_port_name_get(dev, name, len);
8919 }
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929 int dev_get_port_parent_id(struct net_device *dev,
8930 struct netdev_phys_item_id *ppid,
8931 bool recurse)
8932 {
8933 const struct net_device_ops *ops = dev->netdev_ops;
8934 struct netdev_phys_item_id first = { };
8935 struct net_device *lower_dev;
8936 struct list_head *iter;
8937 int err;
8938
8939 if (ops->ndo_get_port_parent_id) {
8940 err = ops->ndo_get_port_parent_id(dev, ppid);
8941 if (err != -EOPNOTSUPP)
8942 return err;
8943 }
8944
8945 err = devlink_compat_switch_id_get(dev, ppid);
8946 if (!recurse || err != -EOPNOTSUPP)
8947 return err;
8948
8949 netdev_for_each_lower_dev(dev, lower_dev, iter) {
8950 err = dev_get_port_parent_id(lower_dev, ppid, true);
8951 if (err)
8952 break;
8953 if (!first.id_len)
8954 first = *ppid;
8955 else if (memcmp(&first, ppid, sizeof(*ppid)))
8956 return -EOPNOTSUPP;
8957 }
8958
8959 return err;
8960 }
8961 EXPORT_SYMBOL(dev_get_port_parent_id);
8962
8963
8964
8965
8966
8967
8968
8969 bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
8970 {
8971 struct netdev_phys_item_id a_id = { };
8972 struct netdev_phys_item_id b_id = { };
8973
8974 if (dev_get_port_parent_id(a, &a_id, true) ||
8975 dev_get_port_parent_id(b, &b_id, true))
8976 return false;
8977
8978 return netdev_phys_item_id_same(&a_id, &b_id);
8979 }
8980 EXPORT_SYMBOL(netdev_port_same_parent_id);
8981
8982
8983
8984
8985
8986
8987
8988 int dev_change_proto_down(struct net_device *dev, bool proto_down)
8989 {
8990 if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
8991 return -EOPNOTSUPP;
8992 if (!netif_device_present(dev))
8993 return -ENODEV;
8994 if (proto_down)
8995 netif_carrier_off(dev);
8996 else
8997 netif_carrier_on(dev);
8998 dev->proto_down = proto_down;
8999 return 0;
9000 }
9001
9002
9003
9004
9005
9006
9007
9008
9009 void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
9010 u32 value)
9011 {
9012 int b;
9013
9014 if (!mask) {
9015 dev->proto_down_reason = value;
9016 } else {
9017 for_each_set_bit(b, &mask, 32) {
9018 if (value & (1 << b))
9019 dev->proto_down_reason |= BIT(b);
9020 else
9021 dev->proto_down_reason &= ~BIT(b);
9022 }
9023 }
9024 }
9025
9026 struct bpf_xdp_link {
9027 struct bpf_link link;
9028 struct net_device *dev;
9029 int flags;
9030 };
9031
9032 static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags)
9033 {
9034 if (flags & XDP_FLAGS_HW_MODE)
9035 return XDP_MODE_HW;
9036 if (flags & XDP_FLAGS_DRV_MODE)
9037 return XDP_MODE_DRV;
9038 if (flags & XDP_FLAGS_SKB_MODE)
9039 return XDP_MODE_SKB;
9040 return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB;
9041 }
9042
9043 static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
9044 {
9045 switch (mode) {
9046 case XDP_MODE_SKB:
9047 return generic_xdp_install;
9048 case XDP_MODE_DRV:
9049 case XDP_MODE_HW:
9050 return dev->netdev_ops->ndo_bpf;
9051 default:
9052 return NULL;
9053 }
9054 }
9055
9056 static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
9057 enum bpf_xdp_mode mode)
9058 {
9059 return dev->xdp_state[mode].link;
9060 }
9061
9062 static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
9063 enum bpf_xdp_mode mode)
9064 {
9065 struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
9066
9067 if (link)
9068 return link->link.prog;
9069 return dev->xdp_state[mode].prog;
9070 }
9071
9072 u8 dev_xdp_prog_count(struct net_device *dev)
9073 {
9074 u8 count = 0;
9075 int i;
9076
9077 for (i = 0; i < __MAX_XDP_MODE; i++)
9078 if (dev->xdp_state[i].prog || dev->xdp_state[i].link)
9079 count++;
9080 return count;
9081 }
9082 EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
9083
9084 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
9085 {
9086 struct bpf_prog *prog = dev_xdp_prog(dev, mode);
9087
9088 return prog ? prog->aux->id : 0;
9089 }
9090
9091 static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
9092 struct bpf_xdp_link *link)
9093 {
9094 dev->xdp_state[mode].link = link;
9095 dev->xdp_state[mode].prog = NULL;
9096 }
9097
9098 static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
9099 struct bpf_prog *prog)
9100 {
9101 dev->xdp_state[mode].link = NULL;
9102 dev->xdp_state[mode].prog = prog;
9103 }
9104
9105 static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
9106 bpf_op_t bpf_op, struct netlink_ext_ack *extack,
9107 u32 flags, struct bpf_prog *prog)
9108 {
9109 struct netdev_bpf xdp;
9110 int err;
9111
9112 memset(&xdp, 0, sizeof(xdp));
9113 xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
9114 xdp.extack = extack;
9115 xdp.flags = flags;
9116 xdp.prog = prog;
9117
9118
9119
9120
9121
9122
9123
9124 if (prog)
9125 bpf_prog_inc(prog);
9126 err = bpf_op(dev, &xdp);
9127 if (err) {
9128 if (prog)
9129 bpf_prog_put(prog);
9130 return err;
9131 }
9132
9133 if (mode != XDP_MODE_HW)
9134 bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
9135
9136 return 0;
9137 }
9138
9139 static void dev_xdp_uninstall(struct net_device *dev)
9140 {
9141 struct bpf_xdp_link *link;
9142 struct bpf_prog *prog;
9143 enum bpf_xdp_mode mode;
9144 bpf_op_t bpf_op;
9145
9146 ASSERT_RTNL();
9147
9148 for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
9149 prog = dev_xdp_prog(dev, mode);
9150 if (!prog)
9151 continue;
9152
9153 bpf_op = dev_xdp_bpf_op(dev, mode);
9154 if (!bpf_op)
9155 continue;
9156
9157 WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
9158
9159
9160 link = dev_xdp_link(dev, mode);
9161 if (link)
9162 link->dev = NULL;
9163 else
9164 bpf_prog_put(prog);
9165
9166 dev_xdp_set_link(dev, mode, NULL);
9167 }
9168 }
9169
9170 static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
9171 struct bpf_xdp_link *link, struct bpf_prog *new_prog,
9172 struct bpf_prog *old_prog, u32 flags)
9173 {
9174 unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
9175 struct bpf_prog *cur_prog;
9176 struct net_device *upper;
9177 struct list_head *iter;
9178 enum bpf_xdp_mode mode;
9179 bpf_op_t bpf_op;
9180 int err;
9181
9182 ASSERT_RTNL();
9183
9184
9185 if (link && (new_prog || old_prog))
9186 return -EINVAL;
9187
9188 if (link && (flags & ~XDP_FLAGS_MODES)) {
9189 NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
9190 return -EINVAL;
9191 }
9192
9193 if (num_modes > 1) {
9194 NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
9195 return -EINVAL;
9196 }
9197
9198 if (!num_modes && dev_xdp_prog_count(dev) > 1) {
9199 NL_SET_ERR_MSG(extack,
9200 "More than one program loaded, unset mode is ambiguous");
9201 return -EINVAL;
9202 }
9203
9204 if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
9205 NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
9206 return -EINVAL;
9207 }
9208
9209 mode = dev_xdp_mode(dev, flags);
9210
9211 if (dev_xdp_link(dev, mode)) {
9212 NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
9213 return -EBUSY;
9214 }
9215
9216
9217 netdev_for_each_upper_dev_rcu(dev, upper, iter) {
9218 if (dev_xdp_prog_count(upper) > 0) {
9219 NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
9220 return -EEXIST;
9221 }
9222 }
9223
9224 cur_prog = dev_xdp_prog(dev, mode);
9225
9226 if (link && cur_prog) {
9227 NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
9228 return -EBUSY;
9229 }
9230 if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
9231 NL_SET_ERR_MSG(extack, "Active program does not match expected");
9232 return -EEXIST;
9233 }
9234
9235
9236 if (link)
9237 new_prog = link->link.prog;
9238
9239 if (new_prog) {
9240 bool offload = mode == XDP_MODE_HW;
9241 enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
9242 ? XDP_MODE_DRV : XDP_MODE_SKB;
9243
9244 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
9245 NL_SET_ERR_MSG(extack, "XDP program already attached");
9246 return -EBUSY;
9247 }
9248 if (!offload && dev_xdp_prog(dev, other_mode)) {
9249 NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
9250 return -EEXIST;
9251 }
9252 if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
9253 NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
9254 return -EINVAL;
9255 }
9256 if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
9257 NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
9258 return -EINVAL;
9259 }
9260 if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
9261 NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
9262 return -EINVAL;
9263 }
9264 }
9265
9266
9267 if (new_prog != cur_prog) {
9268 bpf_op = dev_xdp_bpf_op(dev, mode);
9269 if (!bpf_op) {
9270 NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
9271 return -EOPNOTSUPP;
9272 }
9273
9274 err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
9275 if (err)
9276 return err;
9277 }
9278
9279 if (link)
9280 dev_xdp_set_link(dev, mode, link);
9281 else
9282 dev_xdp_set_prog(dev, mode, new_prog);
9283 if (cur_prog)
9284 bpf_prog_put(cur_prog);
9285
9286 return 0;
9287 }
9288
9289 static int dev_xdp_attach_link(struct net_device *dev,
9290 struct netlink_ext_ack *extack,
9291 struct bpf_xdp_link *link)
9292 {
9293 return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
9294 }
9295
9296 static int dev_xdp_detach_link(struct net_device *dev,
9297 struct netlink_ext_ack *extack,
9298 struct bpf_xdp_link *link)
9299 {
9300 enum bpf_xdp_mode mode;
9301 bpf_op_t bpf_op;
9302
9303 ASSERT_RTNL();
9304
9305 mode = dev_xdp_mode(dev, link->flags);
9306 if (dev_xdp_link(dev, mode) != link)
9307 return -EINVAL;
9308
9309 bpf_op = dev_xdp_bpf_op(dev, mode);
9310 WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
9311 dev_xdp_set_link(dev, mode, NULL);
9312 return 0;
9313 }
9314
9315 static void bpf_xdp_link_release(struct bpf_link *link)
9316 {
9317 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9318
9319 rtnl_lock();
9320
9321
9322
9323
9324 if (xdp_link->dev) {
9325 WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
9326 xdp_link->dev = NULL;
9327 }
9328
9329 rtnl_unlock();
9330 }
9331
9332 static int bpf_xdp_link_detach(struct bpf_link *link)
9333 {
9334 bpf_xdp_link_release(link);
9335 return 0;
9336 }
9337
9338 static void bpf_xdp_link_dealloc(struct bpf_link *link)
9339 {
9340 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9341
9342 kfree(xdp_link);
9343 }
9344
9345 static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
9346 struct seq_file *seq)
9347 {
9348 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9349 u32 ifindex = 0;
9350
9351 rtnl_lock();
9352 if (xdp_link->dev)
9353 ifindex = xdp_link->dev->ifindex;
9354 rtnl_unlock();
9355
9356 seq_printf(seq, "ifindex:\t%u\n", ifindex);
9357 }
9358
9359 static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
9360 struct bpf_link_info *info)
9361 {
9362 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9363 u32 ifindex = 0;
9364
9365 rtnl_lock();
9366 if (xdp_link->dev)
9367 ifindex = xdp_link->dev->ifindex;
9368 rtnl_unlock();
9369
9370 info->xdp.ifindex = ifindex;
9371 return 0;
9372 }
9373
9374 static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
9375 struct bpf_prog *old_prog)
9376 {
9377 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9378 enum bpf_xdp_mode mode;
9379 bpf_op_t bpf_op;
9380 int err = 0;
9381
9382 rtnl_lock();
9383
9384
9385 if (!xdp_link->dev) {
9386 err = -ENOLINK;
9387 goto out_unlock;
9388 }
9389
9390 if (old_prog && link->prog != old_prog) {
9391 err = -EPERM;
9392 goto out_unlock;
9393 }
9394 old_prog = link->prog;
9395 if (old_prog->type != new_prog->type ||
9396 old_prog->expected_attach_type != new_prog->expected_attach_type) {
9397 err = -EINVAL;
9398 goto out_unlock;
9399 }
9400
9401 if (old_prog == new_prog) {
9402
9403 bpf_prog_put(new_prog);
9404 goto out_unlock;
9405 }
9406
9407 mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags);
9408 bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
9409 err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
9410 xdp_link->flags, new_prog);
9411 if (err)
9412 goto out_unlock;
9413
9414 old_prog = xchg(&link->prog, new_prog);
9415 bpf_prog_put(old_prog);
9416
9417 out_unlock:
9418 rtnl_unlock();
9419 return err;
9420 }
9421
9422 static const struct bpf_link_ops bpf_xdp_link_lops = {
9423 .release = bpf_xdp_link_release,
9424 .dealloc = bpf_xdp_link_dealloc,
9425 .detach = bpf_xdp_link_detach,
9426 .show_fdinfo = bpf_xdp_link_show_fdinfo,
9427 .fill_link_info = bpf_xdp_link_fill_link_info,
9428 .update_prog = bpf_xdp_link_update,
9429 };
9430
9431 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
9432 {
9433 struct net *net = current->nsproxy->net_ns;
9434 struct bpf_link_primer link_primer;
9435 struct bpf_xdp_link *link;
9436 struct net_device *dev;
9437 int err, fd;
9438
9439 rtnl_lock();
9440 dev = dev_get_by_index(net, attr->link_create.target_ifindex);
9441 if (!dev) {
9442 rtnl_unlock();
9443 return -EINVAL;
9444 }
9445
9446 link = kzalloc(sizeof(*link), GFP_USER);
9447 if (!link) {
9448 err = -ENOMEM;
9449 goto unlock;
9450 }
9451
9452 bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
9453 link->dev = dev;
9454 link->flags = attr->link_create.flags;
9455
9456 err = bpf_link_prime(&link->link, &link_primer);
9457 if (err) {
9458 kfree(link);
9459 goto unlock;
9460 }
9461
9462 err = dev_xdp_attach_link(dev, NULL, link);
9463 rtnl_unlock();
9464
9465 if (err) {
9466 link->dev = NULL;
9467 bpf_link_cleanup(&link_primer);
9468 goto out_put_dev;
9469 }
9470
9471 fd = bpf_link_settle(&link_primer);
9472
9473 dev_put(dev);
9474 return fd;
9475
9476 unlock:
9477 rtnl_unlock();
9478
9479 out_put_dev:
9480 dev_put(dev);
9481 return err;
9482 }
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
9495 int fd, int expected_fd, u32 flags)
9496 {
9497 enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags);
9498 struct bpf_prog *new_prog = NULL, *old_prog = NULL;
9499 int err;
9500
9501 ASSERT_RTNL();
9502
9503 if (fd >= 0) {
9504 new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
9505 mode != XDP_MODE_SKB);
9506 if (IS_ERR(new_prog))
9507 return PTR_ERR(new_prog);
9508 }
9509
9510 if (expected_fd >= 0) {
9511 old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
9512 mode != XDP_MODE_SKB);
9513 if (IS_ERR(old_prog)) {
9514 err = PTR_ERR(old_prog);
9515 old_prog = NULL;
9516 goto err_out;
9517 }
9518 }
9519
9520 err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
9521
9522 err_out:
9523 if (err && new_prog)
9524 bpf_prog_put(new_prog);
9525 if (old_prog)
9526 bpf_prog_put(old_prog);
9527 return err;
9528 }
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538 static int dev_new_index(struct net *net)
9539 {
9540 int ifindex = net->ifindex;
9541
9542 for (;;) {
9543 if (++ifindex <= 0)
9544 ifindex = 1;
9545 if (!__dev_get_by_index(net, ifindex))
9546 return net->ifindex = ifindex;
9547 }
9548 }
9549
9550
9551 LIST_HEAD(net_todo_list);
9552 DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
9553
9554 static void net_set_todo(struct net_device *dev)
9555 {
9556 list_add_tail(&dev->todo_list, &net_todo_list);
9557 atomic_inc(&dev_net(dev)->dev_unreg_count);
9558 }
9559
9560 static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
9561 struct net_device *upper, netdev_features_t features)
9562 {
9563 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9564 netdev_features_t feature;
9565 int feature_bit;
9566
9567 for_each_netdev_feature(upper_disables, feature_bit) {
9568 feature = __NETIF_F_BIT(feature_bit);
9569 if (!(upper->wanted_features & feature)
9570 && (features & feature)) {
9571 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
9572 &feature, upper->name);
9573 features &= ~feature;
9574 }
9575 }
9576
9577 return features;
9578 }
9579
9580 static void netdev_sync_lower_features(struct net_device *upper,
9581 struct net_device *lower, netdev_features_t features)
9582 {
9583 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9584 netdev_features_t feature;
9585 int feature_bit;
9586
9587 for_each_netdev_feature(upper_disables, feature_bit) {
9588 feature = __NETIF_F_BIT(feature_bit);
9589 if (!(features & feature) && (lower->features & feature)) {
9590 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
9591 &feature, lower->name);
9592 lower->wanted_features &= ~feature;
9593 __netdev_update_features(lower);
9594
9595 if (unlikely(lower->features & feature))
9596 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
9597 &feature, lower->name);
9598 else
9599 netdev_features_change(lower);
9600 }
9601 }
9602 }
9603
9604 static netdev_features_t netdev_fix_features(struct net_device *dev,
9605 netdev_features_t features)
9606 {
9607
9608 if ((features & NETIF_F_HW_CSUM) &&
9609 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
9610 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
9611 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
9612 }
9613
9614
9615 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
9616 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
9617 features &= ~NETIF_F_ALL_TSO;
9618 }
9619
9620 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
9621 !(features & NETIF_F_IP_CSUM)) {
9622 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
9623 features &= ~NETIF_F_TSO;
9624 features &= ~NETIF_F_TSO_ECN;
9625 }
9626
9627 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
9628 !(features & NETIF_F_IPV6_CSUM)) {
9629 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
9630 features &= ~NETIF_F_TSO6;
9631 }
9632
9633
9634 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
9635 features &= ~NETIF_F_TSO_MANGLEID;
9636
9637
9638 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
9639 features &= ~NETIF_F_TSO_ECN;
9640
9641
9642 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
9643 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
9644 features &= ~NETIF_F_GSO;
9645 }
9646
9647
9648 if ((features & dev->gso_partial_features) &&
9649 !(features & NETIF_F_GSO_PARTIAL)) {
9650 netdev_dbg(dev,
9651 "Dropping partially supported GSO features since no GSO partial.\n");
9652 features &= ~dev->gso_partial_features;
9653 }
9654
9655 if (!(features & NETIF_F_RXCSUM)) {
9656
9657
9658
9659
9660
9661 if (features & NETIF_F_GRO_HW) {
9662 netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
9663 features &= ~NETIF_F_GRO_HW;
9664 }
9665 }
9666
9667
9668 if (features & NETIF_F_RXFCS) {
9669 if (features & NETIF_F_LRO) {
9670 netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
9671 features &= ~NETIF_F_LRO;
9672 }
9673
9674 if (features & NETIF_F_GRO_HW) {
9675 netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
9676 features &= ~NETIF_F_GRO_HW;
9677 }
9678 }
9679
9680 if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
9681 netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
9682 features &= ~NETIF_F_LRO;
9683 }
9684
9685 if (features & NETIF_F_HW_TLS_TX) {
9686 bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
9687 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
9688 bool hw_csum = features & NETIF_F_HW_CSUM;
9689
9690 if (!ip_csum && !hw_csum) {
9691 netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
9692 features &= ~NETIF_F_HW_TLS_TX;
9693 }
9694 }
9695
9696 if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
9697 netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
9698 features &= ~NETIF_F_HW_TLS_RX;
9699 }
9700
9701 return features;
9702 }
9703
9704 int __netdev_update_features(struct net_device *dev)
9705 {
9706 struct net_device *upper, *lower;
9707 netdev_features_t features;
9708 struct list_head *iter;
9709 int err = -1;
9710
9711 ASSERT_RTNL();
9712
9713 features = netdev_get_wanted_features(dev);
9714
9715 if (dev->netdev_ops->ndo_fix_features)
9716 features = dev->netdev_ops->ndo_fix_features(dev, features);
9717
9718
9719 features = netdev_fix_features(dev, features);
9720
9721
9722 netdev_for_each_upper_dev_rcu(dev, upper, iter)
9723 features = netdev_sync_upper_features(dev, upper, features);
9724
9725 if (dev->features == features)
9726 goto sync_lower;
9727
9728 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
9729 &dev->features, &features);
9730
9731 if (dev->netdev_ops->ndo_set_features)
9732 err = dev->netdev_ops->ndo_set_features(dev, features);
9733 else
9734 err = 0;
9735
9736 if (unlikely(err < 0)) {
9737 netdev_err(dev,
9738 "set_features() failed (%d); wanted %pNF, left %pNF\n",
9739 err, &features, &dev->features);
9740
9741
9742
9743 return -1;
9744 }
9745
9746 sync_lower:
9747
9748
9749
9750 netdev_for_each_lower_dev(dev, lower, iter)
9751 netdev_sync_lower_features(dev, lower, features);
9752
9753 if (!err) {
9754 netdev_features_t diff = features ^ dev->features;
9755
9756 if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
9757
9758
9759
9760
9761
9762
9763
9764 if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
9765 dev->features = features;
9766 udp_tunnel_get_rx_info(dev);
9767 } else {
9768 udp_tunnel_drop_rx_info(dev);
9769 }
9770 }
9771
9772 if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
9773 if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
9774 dev->features = features;
9775 err |= vlan_get_rx_ctag_filter_info(dev);
9776 } else {
9777 vlan_drop_rx_ctag_filter_info(dev);
9778 }
9779 }
9780
9781 if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
9782 if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
9783 dev->features = features;
9784 err |= vlan_get_rx_stag_filter_info(dev);
9785 } else {
9786 vlan_drop_rx_stag_filter_info(dev);
9787 }
9788 }
9789
9790 dev->features = features;
9791 }
9792
9793 return err < 0 ? 0 : 1;
9794 }
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804 void netdev_update_features(struct net_device *dev)
9805 {
9806 if (__netdev_update_features(dev))
9807 netdev_features_change(dev);
9808 }
9809 EXPORT_SYMBOL(netdev_update_features);
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821 void netdev_change_features(struct net_device *dev)
9822 {
9823 __netdev_update_features(dev);
9824 netdev_features_change(dev);
9825 }
9826 EXPORT_SYMBOL(netdev_change_features);
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
9838 struct net_device *dev)
9839 {
9840 if (rootdev->operstate == IF_OPER_DORMANT)
9841 netif_dormant_on(dev);
9842 else
9843 netif_dormant_off(dev);
9844
9845 if (rootdev->operstate == IF_OPER_TESTING)
9846 netif_testing_on(dev);
9847 else
9848 netif_testing_off(dev);
9849
9850 if (netif_carrier_ok(rootdev))
9851 netif_carrier_on(dev);
9852 else
9853 netif_carrier_off(dev);
9854 }
9855 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
9856
9857 static int netif_alloc_rx_queues(struct net_device *dev)
9858 {
9859 unsigned int i, count = dev->num_rx_queues;
9860 struct netdev_rx_queue *rx;
9861 size_t sz = count * sizeof(*rx);
9862 int err = 0;
9863
9864 BUG_ON(count < 1);
9865
9866 rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
9867 if (!rx)
9868 return -ENOMEM;
9869
9870 dev->_rx = rx;
9871
9872 for (i = 0; i < count; i++) {
9873 rx[i].dev = dev;
9874
9875
9876 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
9877 if (err < 0)
9878 goto err_rxq_info;
9879 }
9880 return 0;
9881
9882 err_rxq_info:
9883
9884 while (i--)
9885 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
9886 kvfree(dev->_rx);
9887 dev->_rx = NULL;
9888 return err;
9889 }
9890
9891 static void netif_free_rx_queues(struct net_device *dev)
9892 {
9893 unsigned int i, count = dev->num_rx_queues;
9894
9895
9896 if (!dev->_rx)
9897 return;
9898
9899 for (i = 0; i < count; i++)
9900 xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
9901
9902 kvfree(dev->_rx);
9903 }
9904
9905 static void netdev_init_one_queue(struct net_device *dev,
9906 struct netdev_queue *queue, void *_unused)
9907 {
9908
9909 spin_lock_init(&queue->_xmit_lock);
9910 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
9911 queue->xmit_lock_owner = -1;
9912 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
9913 queue->dev = dev;
9914 #ifdef CONFIG_BQL
9915 dql_init(&queue->dql, HZ);
9916 #endif
9917 }
9918
9919 static void netif_free_tx_queues(struct net_device *dev)
9920 {
9921 kvfree(dev->_tx);
9922 }
9923
9924 static int netif_alloc_netdev_queues(struct net_device *dev)
9925 {
9926 unsigned int count = dev->num_tx_queues;
9927 struct netdev_queue *tx;
9928 size_t sz = count * sizeof(*tx);
9929
9930 if (count < 1 || count > 0xffff)
9931 return -EINVAL;
9932
9933 tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
9934 if (!tx)
9935 return -ENOMEM;
9936
9937 dev->_tx = tx;
9938
9939 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
9940 spin_lock_init(&dev->tx_global_lock);
9941
9942 return 0;
9943 }
9944
9945 void netif_tx_stop_all_queues(struct net_device *dev)
9946 {
9947 unsigned int i;
9948
9949 for (i = 0; i < dev->num_tx_queues; i++) {
9950 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
9951
9952 netif_tx_stop_queue(txq);
9953 }
9954 }
9955 EXPORT_SYMBOL(netif_tx_stop_all_queues);
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966 int register_netdevice(struct net_device *dev)
9967 {
9968 int ret;
9969 struct net *net = dev_net(dev);
9970
9971 BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
9972 NETDEV_FEATURE_COUNT);
9973 BUG_ON(dev_boot_phase);
9974 ASSERT_RTNL();
9975
9976 might_sleep();
9977
9978
9979 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
9980 BUG_ON(!net);
9981
9982 ret = ethtool_check_ops(dev->ethtool_ops);
9983 if (ret)
9984 return ret;
9985
9986 spin_lock_init(&dev->addr_list_lock);
9987 netdev_set_addr_lockdep_class(dev);
9988
9989 ret = dev_get_valid_name(net, dev, dev->name);
9990 if (ret < 0)
9991 goto out;
9992
9993 ret = -ENOMEM;
9994 dev->name_node = netdev_name_node_head_alloc(dev);
9995 if (!dev->name_node)
9996 goto out;
9997
9998
9999 if (dev->netdev_ops->ndo_init) {
10000 ret = dev->netdev_ops->ndo_init(dev);
10001 if (ret) {
10002 if (ret > 0)
10003 ret = -EIO;
10004 goto err_free_name;
10005 }
10006 }
10007
10008 if (((dev->hw_features | dev->features) &
10009 NETIF_F_HW_VLAN_CTAG_FILTER) &&
10010 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
10011 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
10012 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
10013 ret = -EINVAL;
10014 goto err_uninit;
10015 }
10016
10017 ret = -EBUSY;
10018 if (!dev->ifindex)
10019 dev->ifindex = dev_new_index(net);
10020 else if (__dev_get_by_index(net, dev->ifindex))
10021 goto err_uninit;
10022
10023
10024
10025
10026 dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
10027 dev->features |= NETIF_F_SOFT_FEATURES;
10028
10029 if (dev->udp_tunnel_nic_info) {
10030 dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
10031 dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
10032 }
10033
10034 dev->wanted_features = dev->features & dev->hw_features;
10035
10036 if (!(dev->flags & IFF_LOOPBACK))
10037 dev->hw_features |= NETIF_F_NOCACHE_COPY;
10038
10039
10040
10041
10042
10043
10044 if (dev->hw_features & NETIF_F_TSO)
10045 dev->hw_features |= NETIF_F_TSO_MANGLEID;
10046 if (dev->vlan_features & NETIF_F_TSO)
10047 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
10048 if (dev->mpls_features & NETIF_F_TSO)
10049 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
10050 if (dev->hw_enc_features & NETIF_F_TSO)
10051 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
10052
10053
10054
10055 dev->vlan_features |= NETIF_F_HIGHDMA;
10056
10057
10058
10059 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
10060
10061
10062
10063 dev->mpls_features |= NETIF_F_SG;
10064
10065 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
10066 ret = notifier_to_errno(ret);
10067 if (ret)
10068 goto err_uninit;
10069
10070 ret = netdev_register_kobject(dev);
10071 write_lock(&dev_base_lock);
10072 dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
10073 write_unlock(&dev_base_lock);
10074 if (ret)
10075 goto err_uninit;
10076
10077 __netdev_update_features(dev);
10078
10079
10080
10081
10082
10083
10084 set_bit(__LINK_STATE_PRESENT, &dev->state);
10085
10086 linkwatch_init_dev(dev);
10087
10088 dev_init_scheduler(dev);
10089
10090 netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL);
10091 list_netdevice(dev);
10092
10093 add_device_randomness(dev->dev_addr, dev->addr_len);
10094
10095
10096
10097
10098
10099 if (dev->addr_assign_type == NET_ADDR_PERM)
10100 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
10101
10102
10103 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
10104 ret = notifier_to_errno(ret);
10105 if (ret) {
10106
10107 dev->needs_free_netdev = false;
10108 unregister_netdevice_queue(dev, NULL);
10109 goto out;
10110 }
10111
10112
10113
10114
10115 if (!dev->rtnl_link_ops ||
10116 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
10117 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
10118
10119 out:
10120 return ret;
10121
10122 err_uninit:
10123 if (dev->netdev_ops->ndo_uninit)
10124 dev->netdev_ops->ndo_uninit(dev);
10125 if (dev->priv_destructor)
10126 dev->priv_destructor(dev);
10127 err_free_name:
10128 netdev_name_node_free(dev->name_node);
10129 goto out;
10130 }
10131 EXPORT_SYMBOL(register_netdevice);
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143 int init_dummy_netdev(struct net_device *dev)
10144 {
10145
10146
10147
10148
10149
10150 memset(dev, 0, sizeof(struct net_device));
10151
10152
10153
10154
10155 dev->reg_state = NETREG_DUMMY;
10156
10157
10158 INIT_LIST_HEAD(&dev->napi_list);
10159
10160
10161 set_bit(__LINK_STATE_PRESENT, &dev->state);
10162 set_bit(__LINK_STATE_START, &dev->state);
10163
10164
10165 dev_net_set(dev, &init_net);
10166
10167
10168
10169
10170
10171
10172 return 0;
10173 }
10174 EXPORT_SYMBOL_GPL(init_dummy_netdev);
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190 int register_netdev(struct net_device *dev)
10191 {
10192 int err;
10193
10194 if (rtnl_lock_killable())
10195 return -EINTR;
10196 err = register_netdevice(dev);
10197 rtnl_unlock();
10198 return err;
10199 }
10200 EXPORT_SYMBOL(register_netdev);
10201
10202 int netdev_refcnt_read(const struct net_device *dev)
10203 {
10204 #ifdef CONFIG_PCPU_DEV_REFCNT
10205 int i, refcnt = 0;
10206
10207 for_each_possible_cpu(i)
10208 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
10209 return refcnt;
10210 #else
10211 return refcount_read(&dev->dev_refcnt);
10212 #endif
10213 }
10214 EXPORT_SYMBOL(netdev_refcnt_read);
10215
10216 int netdev_unregister_timeout_secs __read_mostly = 10;
10217
10218 #define WAIT_REFS_MIN_MSECS 1
10219 #define WAIT_REFS_MAX_MSECS 250
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232 static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
10233 {
10234 unsigned long rebroadcast_time, warning_time;
10235 struct net_device *dev;
10236 int wait = 0;
10237
10238 rebroadcast_time = warning_time = jiffies;
10239
10240 list_for_each_entry(dev, list, todo_list)
10241 if (netdev_refcnt_read(dev) == 1)
10242 return dev;
10243
10244 while (true) {
10245 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
10246 rtnl_lock();
10247
10248
10249 list_for_each_entry(dev, list, todo_list)
10250 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
10251
10252 __rtnl_unlock();
10253 rcu_barrier();
10254 rtnl_lock();
10255
10256 list_for_each_entry(dev, list, todo_list)
10257 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
10258 &dev->state)) {
10259
10260
10261
10262
10263
10264
10265 linkwatch_run_queue();
10266 break;
10267 }
10268
10269 __rtnl_unlock();
10270
10271 rebroadcast_time = jiffies;
10272 }
10273
10274 if (!wait) {
10275 rcu_barrier();
10276 wait = WAIT_REFS_MIN_MSECS;
10277 } else {
10278 msleep(wait);
10279 wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
10280 }
10281
10282 list_for_each_entry(dev, list, todo_list)
10283 if (netdev_refcnt_read(dev) == 1)
10284 return dev;
10285
10286 if (time_after(jiffies, warning_time +
10287 READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
10288 list_for_each_entry(dev, list, todo_list) {
10289 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
10290 dev->name, netdev_refcnt_read(dev));
10291 ref_tracker_dir_print(&dev->refcnt_tracker, 10);
10292 }
10293
10294 warning_time = jiffies;
10295 }
10296 }
10297 }
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323 void netdev_run_todo(void)
10324 {
10325 struct net_device *dev, *tmp;
10326 struct list_head list;
10327 #ifdef CONFIG_LOCKDEP
10328 struct list_head unlink_list;
10329
10330 list_replace_init(&net_unlink_list, &unlink_list);
10331
10332 while (!list_empty(&unlink_list)) {
10333 struct net_device *dev = list_first_entry(&unlink_list,
10334 struct net_device,
10335 unlink_list);
10336 list_del_init(&dev->unlink_list);
10337 dev->nested_level = dev->lower_level - 1;
10338 }
10339 #endif
10340
10341
10342 list_replace_init(&net_todo_list, &list);
10343
10344 __rtnl_unlock();
10345
10346
10347 if (!list_empty(&list))
10348 rcu_barrier();
10349
10350 list_for_each_entry_safe(dev, tmp, &list, todo_list) {
10351 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
10352 netdev_WARN(dev, "run_todo but not unregistering\n");
10353 list_del(&dev->todo_list);
10354 continue;
10355 }
10356
10357 write_lock(&dev_base_lock);
10358 dev->reg_state = NETREG_UNREGISTERED;
10359 write_unlock(&dev_base_lock);
10360 linkwatch_forget_dev(dev);
10361 }
10362
10363 while (!list_empty(&list)) {
10364 dev = netdev_wait_allrefs_any(&list);
10365 list_del(&dev->todo_list);
10366
10367
10368 BUG_ON(netdev_refcnt_read(dev) != 1);
10369 BUG_ON(!list_empty(&dev->ptype_all));
10370 BUG_ON(!list_empty(&dev->ptype_specific));
10371 WARN_ON(rcu_access_pointer(dev->ip_ptr));
10372 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
10373 #if IS_ENABLED(CONFIG_DECNET)
10374 WARN_ON(dev->dn_ptr);
10375 #endif
10376 if (dev->priv_destructor)
10377 dev->priv_destructor(dev);
10378 if (dev->needs_free_netdev)
10379 free_netdev(dev);
10380
10381 if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
10382 wake_up(&netdev_unregistering_wq);
10383
10384
10385 kobject_put(&dev->dev.kobj);
10386 }
10387 }
10388
10389
10390
10391
10392
10393
10394 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
10395 const struct net_device_stats *netdev_stats)
10396 {
10397 #if BITS_PER_LONG == 64
10398 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
10399 memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
10400
10401 memset((char *)stats64 + sizeof(*netdev_stats), 0,
10402 sizeof(*stats64) - sizeof(*netdev_stats));
10403 #else
10404 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
10405 const unsigned long *src = (const unsigned long *)netdev_stats;
10406 u64 *dst = (u64 *)stats64;
10407
10408 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
10409 for (i = 0; i < n; i++)
10410 dst[i] = src[i];
10411
10412 memset((char *)stats64 + n * sizeof(u64), 0,
10413 sizeof(*stats64) - n * sizeof(u64));
10414 #endif
10415 }
10416 EXPORT_SYMBOL(netdev_stats_to_stats64);
10417
10418 struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
10419 {
10420 struct net_device_core_stats __percpu *p;
10421
10422 p = alloc_percpu_gfp(struct net_device_core_stats,
10423 GFP_ATOMIC | __GFP_NOWARN);
10424
10425 if (p && cmpxchg(&dev->core_stats, NULL, p))
10426 free_percpu(p);
10427
10428
10429 return READ_ONCE(dev->core_stats);
10430 }
10431 EXPORT_SYMBOL(netdev_core_stats_alloc);
10432
10433
10434
10435
10436
10437
10438
10439
10440
10441
10442
10443 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
10444 struct rtnl_link_stats64 *storage)
10445 {
10446 const struct net_device_ops *ops = dev->netdev_ops;
10447 const struct net_device_core_stats __percpu *p;
10448
10449 if (ops->ndo_get_stats64) {
10450 memset(storage, 0, sizeof(*storage));
10451 ops->ndo_get_stats64(dev, storage);
10452 } else if (ops->ndo_get_stats) {
10453 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
10454 } else {
10455 netdev_stats_to_stats64(storage, &dev->stats);
10456 }
10457
10458
10459 p = READ_ONCE(dev->core_stats);
10460 if (p) {
10461 const struct net_device_core_stats *core_stats;
10462 int i;
10463
10464 for_each_possible_cpu(i) {
10465 core_stats = per_cpu_ptr(p, i);
10466 storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
10467 storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
10468 storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
10469 storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
10470 }
10471 }
10472 return storage;
10473 }
10474 EXPORT_SYMBOL(dev_get_stats);
10475
10476
10477
10478
10479
10480
10481
10482
10483 void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
10484 const struct pcpu_sw_netstats __percpu *netstats)
10485 {
10486 int cpu;
10487
10488 for_each_possible_cpu(cpu) {
10489 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
10490 const struct pcpu_sw_netstats *stats;
10491 unsigned int start;
10492
10493 stats = per_cpu_ptr(netstats, cpu);
10494 do {
10495 start = u64_stats_fetch_begin_irq(&stats->syncp);
10496 rx_packets = u64_stats_read(&stats->rx_packets);
10497 rx_bytes = u64_stats_read(&stats->rx_bytes);
10498 tx_packets = u64_stats_read(&stats->tx_packets);
10499 tx_bytes = u64_stats_read(&stats->tx_bytes);
10500 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
10501
10502 s->rx_packets += rx_packets;
10503 s->rx_bytes += rx_bytes;
10504 s->tx_packets += tx_packets;
10505 s->tx_bytes += tx_bytes;
10506 }
10507 }
10508 EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518 void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s)
10519 {
10520 netdev_stats_to_stats64(s, &dev->stats);
10521 dev_fetch_sw_netstats(s, dev->tstats);
10522 }
10523 EXPORT_SYMBOL_GPL(dev_get_tstats64);
10524
10525 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
10526 {
10527 struct netdev_queue *queue = dev_ingress_queue(dev);
10528
10529 #ifdef CONFIG_NET_CLS_ACT
10530 if (queue)
10531 return queue;
10532 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
10533 if (!queue)
10534 return NULL;
10535 netdev_init_one_queue(dev, queue, NULL);
10536 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
10537 queue->qdisc_sleeping = &noop_qdisc;
10538 rcu_assign_pointer(dev->ingress_queue, queue);
10539 #endif
10540 return queue;
10541 }
10542
10543 static const struct ethtool_ops default_ethtool_ops;
10544
10545 void netdev_set_default_ethtool_ops(struct net_device *dev,
10546 const struct ethtool_ops *ops)
10547 {
10548 if (dev->ethtool_ops == &default_ethtool_ops)
10549 dev->ethtool_ops = ops;
10550 }
10551 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
10552
10553 void netdev_freemem(struct net_device *dev)
10554 {
10555 char *addr = (char *)dev - dev->padded;
10556
10557 kvfree(addr);
10558 }
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
10574 unsigned char name_assign_type,
10575 void (*setup)(struct net_device *),
10576 unsigned int txqs, unsigned int rxqs)
10577 {
10578 struct net_device *dev;
10579 unsigned int alloc_size;
10580 struct net_device *p;
10581
10582 BUG_ON(strlen(name) >= sizeof(dev->name));
10583
10584 if (txqs < 1) {
10585 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
10586 return NULL;
10587 }
10588
10589 if (rxqs < 1) {
10590 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
10591 return NULL;
10592 }
10593
10594 alloc_size = sizeof(struct net_device);
10595 if (sizeof_priv) {
10596
10597 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
10598 alloc_size += sizeof_priv;
10599 }
10600
10601 alloc_size += NETDEV_ALIGN - 1;
10602
10603 p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
10604 if (!p)
10605 return NULL;
10606
10607 dev = PTR_ALIGN(p, NETDEV_ALIGN);
10608 dev->padded = (char *)dev - (char *)p;
10609
10610 ref_tracker_dir_init(&dev->refcnt_tracker, 128);
10611 #ifdef CONFIG_PCPU_DEV_REFCNT
10612 dev->pcpu_refcnt = alloc_percpu(int);
10613 if (!dev->pcpu_refcnt)
10614 goto free_dev;
10615 __dev_hold(dev);
10616 #else
10617 refcount_set(&dev->dev_refcnt, 1);
10618 #endif
10619
10620 if (dev_addr_init(dev))
10621 goto free_pcpu;
10622
10623 dev_mc_init(dev);
10624 dev_uc_init(dev);
10625
10626 dev_net_set(dev, &init_net);
10627
10628 dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
10629 dev->gso_max_segs = GSO_MAX_SEGS;
10630 dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
10631 dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
10632 dev->tso_max_segs = TSO_MAX_SEGS;
10633 dev->upper_level = 1;
10634 dev->lower_level = 1;
10635 #ifdef CONFIG_LOCKDEP
10636 dev->nested_level = 0;
10637 INIT_LIST_HEAD(&dev->unlink_list);
10638 #endif
10639
10640 INIT_LIST_HEAD(&dev->napi_list);
10641 INIT_LIST_HEAD(&dev->unreg_list);
10642 INIT_LIST_HEAD(&dev->close_list);
10643 INIT_LIST_HEAD(&dev->link_watch_list);
10644 INIT_LIST_HEAD(&dev->adj_list.upper);
10645 INIT_LIST_HEAD(&dev->adj_list.lower);
10646 INIT_LIST_HEAD(&dev->ptype_all);
10647 INIT_LIST_HEAD(&dev->ptype_specific);
10648 INIT_LIST_HEAD(&dev->net_notifier_list);
10649 #ifdef CONFIG_NET_SCHED
10650 hash_init(dev->qdisc_hash);
10651 #endif
10652 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
10653 setup(dev);
10654
10655 if (!dev->tx_queue_len) {
10656 dev->priv_flags |= IFF_NO_QUEUE;
10657 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
10658 }
10659
10660 dev->num_tx_queues = txqs;
10661 dev->real_num_tx_queues = txqs;
10662 if (netif_alloc_netdev_queues(dev))
10663 goto free_all;
10664
10665 dev->num_rx_queues = rxqs;
10666 dev->real_num_rx_queues = rxqs;
10667 if (netif_alloc_rx_queues(dev))
10668 goto free_all;
10669
10670 strcpy(dev->name, name);
10671 dev->name_assign_type = name_assign_type;
10672 dev->group = INIT_NETDEV_GROUP;
10673 if (!dev->ethtool_ops)
10674 dev->ethtool_ops = &default_ethtool_ops;
10675
10676 nf_hook_netdev_init(dev);
10677
10678 return dev;
10679
10680 free_all:
10681 free_netdev(dev);
10682 return NULL;
10683
10684 free_pcpu:
10685 #ifdef CONFIG_PCPU_DEV_REFCNT
10686 free_percpu(dev->pcpu_refcnt);
10687 free_dev:
10688 #endif
10689 netdev_freemem(dev);
10690 return NULL;
10691 }
10692 EXPORT_SYMBOL(alloc_netdev_mqs);
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703 void free_netdev(struct net_device *dev)
10704 {
10705 struct napi_struct *p, *n;
10706
10707 might_sleep();
10708
10709
10710
10711
10712
10713 if (dev->reg_state == NETREG_UNREGISTERING) {
10714 ASSERT_RTNL();
10715 dev->needs_free_netdev = true;
10716 return;
10717 }
10718
10719 netif_free_tx_queues(dev);
10720 netif_free_rx_queues(dev);
10721
10722 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
10723
10724
10725 dev_addr_flush(dev);
10726
10727 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
10728 netif_napi_del(p);
10729
10730 ref_tracker_dir_exit(&dev->refcnt_tracker);
10731 #ifdef CONFIG_PCPU_DEV_REFCNT
10732 free_percpu(dev->pcpu_refcnt);
10733 dev->pcpu_refcnt = NULL;
10734 #endif
10735 free_percpu(dev->core_stats);
10736 dev->core_stats = NULL;
10737 free_percpu(dev->xdp_bulkq);
10738 dev->xdp_bulkq = NULL;
10739
10740
10741 if (dev->reg_state == NETREG_UNINITIALIZED) {
10742 netdev_freemem(dev);
10743 return;
10744 }
10745
10746 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
10747 dev->reg_state = NETREG_RELEASED;
10748
10749
10750 put_device(&dev->dev);
10751 }
10752 EXPORT_SYMBOL(free_netdev);
10753
10754
10755
10756
10757
10758
10759
10760 void synchronize_net(void)
10761 {
10762 might_sleep();
10763 if (rtnl_is_locked())
10764 synchronize_rcu_expedited();
10765 else
10766 synchronize_rcu();
10767 }
10768 EXPORT_SYMBOL(synchronize_net);
10769
10770
10771
10772
10773
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
10784 {
10785 ASSERT_RTNL();
10786
10787 if (head) {
10788 list_move_tail(&dev->unreg_list, head);
10789 } else {
10790 LIST_HEAD(single);
10791
10792 list_add(&dev->unreg_list, &single);
10793 unregister_netdevice_many(&single);
10794 }
10795 }
10796 EXPORT_SYMBOL(unregister_netdevice_queue);
10797
10798
10799
10800
10801
10802
10803
10804
10805 void unregister_netdevice_many(struct list_head *head)
10806 {
10807 struct net_device *dev, *tmp;
10808 LIST_HEAD(close_head);
10809
10810 BUG_ON(dev_boot_phase);
10811 ASSERT_RTNL();
10812
10813 if (list_empty(head))
10814 return;
10815
10816 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
10817
10818
10819
10820
10821 if (dev->reg_state == NETREG_UNINITIALIZED) {
10822 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
10823 dev->name, dev);
10824
10825 WARN_ON(1);
10826 list_del(&dev->unreg_list);
10827 continue;
10828 }
10829 dev->dismantle = true;
10830 BUG_ON(dev->reg_state != NETREG_REGISTERED);
10831 }
10832
10833
10834 list_for_each_entry(dev, head, unreg_list)
10835 list_add_tail(&dev->close_list, &close_head);
10836 dev_close_many(&close_head, true);
10837
10838 list_for_each_entry(dev, head, unreg_list) {
10839
10840 write_lock(&dev_base_lock);
10841 unlist_netdevice(dev, false);
10842 dev->reg_state = NETREG_UNREGISTERING;
10843 write_unlock(&dev_base_lock);
10844 }
10845 flush_all_backlogs();
10846
10847 synchronize_net();
10848
10849 list_for_each_entry(dev, head, unreg_list) {
10850 struct sk_buff *skb = NULL;
10851
10852
10853 dev_shutdown(dev);
10854
10855 dev_xdp_uninstall(dev);
10856
10857 netdev_offload_xstats_disable_all(dev);
10858
10859
10860
10861
10862 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
10863
10864 if (!dev->rtnl_link_ops ||
10865 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
10866 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
10867 GFP_KERNEL, NULL, 0);
10868
10869
10870
10871
10872 dev_uc_flush(dev);
10873 dev_mc_flush(dev);
10874
10875 netdev_name_node_alt_flush(dev);
10876 netdev_name_node_free(dev->name_node);
10877
10878 if (dev->netdev_ops->ndo_uninit)
10879 dev->netdev_ops->ndo_uninit(dev);
10880
10881 if (skb)
10882 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
10883
10884
10885 WARN_ON(netdev_has_any_upper_dev(dev));
10886 WARN_ON(netdev_has_any_lower_dev(dev));
10887
10888
10889 netdev_unregister_kobject(dev);
10890 #ifdef CONFIG_XPS
10891
10892 netif_reset_xps_queues_gt(dev, 0);
10893 #endif
10894 }
10895
10896 synchronize_net();
10897
10898 list_for_each_entry(dev, head, unreg_list) {
10899 netdev_put(dev, &dev->dev_registered_tracker);
10900 net_set_todo(dev);
10901 }
10902
10903 list_del(head);
10904 }
10905 EXPORT_SYMBOL(unregister_netdevice_many);
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916
10917
10918 void unregister_netdev(struct net_device *dev)
10919 {
10920 rtnl_lock();
10921 unregister_netdevice(dev);
10922 rtnl_unlock();
10923 }
10924 EXPORT_SYMBOL(unregister_netdev);
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940
10941
10942 int __dev_change_net_namespace(struct net_device *dev, struct net *net,
10943 const char *pat, int new_ifindex)
10944 {
10945 struct net *net_old = dev_net(dev);
10946 int err, new_nsid;
10947
10948 ASSERT_RTNL();
10949
10950
10951 err = -EINVAL;
10952 if (dev->features & NETIF_F_NETNS_LOCAL)
10953 goto out;
10954
10955
10956 if (dev->reg_state != NETREG_REGISTERED)
10957 goto out;
10958
10959
10960 err = 0;
10961 if (net_eq(net_old, net))
10962 goto out;
10963
10964
10965
10966
10967 err = -EEXIST;
10968 if (netdev_name_in_use(net, dev->name)) {
10969
10970 if (!pat)
10971 goto out;
10972 err = dev_get_valid_name(net, dev, pat);
10973 if (err < 0)
10974 goto out;
10975 }
10976
10977
10978 err = -EBUSY;
10979 if (new_ifindex && __dev_get_by_index(net, new_ifindex))
10980 goto out;
10981
10982
10983
10984
10985
10986
10987 dev_close(dev);
10988
10989
10990 unlist_netdevice(dev, true);
10991
10992 synchronize_net();
10993
10994
10995 dev_shutdown(dev);
10996
10997
10998
10999
11000
11001
11002
11003
11004 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
11005 rcu_barrier();
11006
11007 new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
11008
11009 if (!new_ifindex) {
11010 if (__dev_get_by_index(net, dev->ifindex))
11011 new_ifindex = dev_new_index(net);
11012 else
11013 new_ifindex = dev->ifindex;
11014 }
11015
11016 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
11017 new_ifindex);
11018
11019
11020
11021
11022 dev_uc_flush(dev);
11023 dev_mc_flush(dev);
11024
11025
11026 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
11027 netdev_adjacent_del_links(dev);
11028
11029
11030 move_netdevice_notifiers_dev_net(dev, net);
11031
11032
11033 dev_net_set(dev, net);
11034 dev->ifindex = new_ifindex;
11035
11036
11037 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
11038 netdev_adjacent_add_links(dev);
11039
11040
11041 err = device_rename(&dev->dev, dev->name);
11042 WARN_ON(err);
11043
11044
11045
11046
11047 err = netdev_change_owner(dev, net_old, net);
11048 WARN_ON(err);
11049
11050
11051 list_netdevice(dev);
11052
11053
11054 call_netdevice_notifiers(NETDEV_REGISTER, dev);
11055
11056
11057
11058
11059
11060 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
11061
11062 synchronize_net();
11063 err = 0;
11064 out:
11065 return err;
11066 }
11067 EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
11068
11069 static int dev_cpu_dead(unsigned int oldcpu)
11070 {
11071 struct sk_buff **list_skb;
11072 struct sk_buff *skb;
11073 unsigned int cpu;
11074 struct softnet_data *sd, *oldsd, *remsd = NULL;
11075
11076 local_irq_disable();
11077 cpu = smp_processor_id();
11078 sd = &per_cpu(softnet_data, cpu);
11079 oldsd = &per_cpu(softnet_data, oldcpu);
11080
11081
11082 list_skb = &sd->completion_queue;
11083 while (*list_skb)
11084 list_skb = &(*list_skb)->next;
11085
11086 *list_skb = oldsd->completion_queue;
11087 oldsd->completion_queue = NULL;
11088
11089
11090 if (oldsd->output_queue) {
11091 *sd->output_queue_tailp = oldsd->output_queue;
11092 sd->output_queue_tailp = oldsd->output_queue_tailp;
11093 oldsd->output_queue = NULL;
11094 oldsd->output_queue_tailp = &oldsd->output_queue;
11095 }
11096
11097
11098
11099
11100 while (!list_empty(&oldsd->poll_list)) {
11101 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
11102 struct napi_struct,
11103 poll_list);
11104
11105 list_del_init(&napi->poll_list);
11106 if (napi->poll == process_backlog)
11107 napi->state = 0;
11108 else
11109 ____napi_schedule(sd, napi);
11110 }
11111
11112 raise_softirq_irqoff(NET_TX_SOFTIRQ);
11113 local_irq_enable();
11114
11115 #ifdef CONFIG_RPS
11116 remsd = oldsd->rps_ipi_list;
11117 oldsd->rps_ipi_list = NULL;
11118 #endif
11119
11120 net_rps_send_ipi(remsd);
11121
11122
11123 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
11124 netif_rx(skb);
11125 input_queue_head_incr(oldsd);
11126 }
11127 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
11128 netif_rx(skb);
11129 input_queue_head_incr(oldsd);
11130 }
11131
11132 return 0;
11133 }
11134
11135
11136
11137
11138
11139
11140
11141
11142
11143
11144
11145 netdev_features_t netdev_increment_features(netdev_features_t all,
11146 netdev_features_t one, netdev_features_t mask)
11147 {
11148 if (mask & NETIF_F_HW_CSUM)
11149 mask |= NETIF_F_CSUM_MASK;
11150 mask |= NETIF_F_VLAN_CHALLENGED;
11151
11152 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
11153 all &= one | ~NETIF_F_ALL_FOR_ALL;
11154
11155
11156 if (all & NETIF_F_HW_CSUM)
11157 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
11158
11159 return all;
11160 }
11161 EXPORT_SYMBOL(netdev_increment_features);
11162
11163 static struct hlist_head * __net_init netdev_create_hash(void)
11164 {
11165 int i;
11166 struct hlist_head *hash;
11167
11168 hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
11169 if (hash != NULL)
11170 for (i = 0; i < NETDEV_HASHENTRIES; i++)
11171 INIT_HLIST_HEAD(&hash[i]);
11172
11173 return hash;
11174 }
11175
11176
11177 static int __net_init netdev_init(struct net *net)
11178 {
11179 BUILD_BUG_ON(GRO_HASH_BUCKETS >
11180 8 * sizeof_field(struct napi_struct, gro_bitmask));
11181
11182 INIT_LIST_HEAD(&net->dev_base_head);
11183
11184 net->dev_name_head = netdev_create_hash();
11185 if (net->dev_name_head == NULL)
11186 goto err_name;
11187
11188 net->dev_index_head = netdev_create_hash();
11189 if (net->dev_index_head == NULL)
11190 goto err_idx;
11191
11192 RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
11193
11194 return 0;
11195
11196 err_idx:
11197 kfree(net->dev_name_head);
11198 err_name:
11199 return -ENOMEM;
11200 }
11201
11202
11203
11204
11205
11206
11207
11208 const char *netdev_drivername(const struct net_device *dev)
11209 {
11210 const struct device_driver *driver;
11211 const struct device *parent;
11212 const char *empty = "";
11213
11214 parent = dev->dev.parent;
11215 if (!parent)
11216 return empty;
11217
11218 driver = parent->driver;
11219 if (driver && driver->name)
11220 return driver->name;
11221 return empty;
11222 }
11223
11224 static void __netdev_printk(const char *level, const struct net_device *dev,
11225 struct va_format *vaf)
11226 {
11227 if (dev && dev->dev.parent) {
11228 dev_printk_emit(level[1] - '0',
11229 dev->dev.parent,
11230 "%s %s %s%s: %pV",
11231 dev_driver_string(dev->dev.parent),
11232 dev_name(dev->dev.parent),
11233 netdev_name(dev), netdev_reg_state(dev),
11234 vaf);
11235 } else if (dev) {
11236 printk("%s%s%s: %pV",
11237 level, netdev_name(dev), netdev_reg_state(dev), vaf);
11238 } else {
11239 printk("%s(NULL net_device): %pV", level, vaf);
11240 }
11241 }
11242
11243 void netdev_printk(const char *level, const struct net_device *dev,
11244 const char *format, ...)
11245 {
11246 struct va_format vaf;
11247 va_list args;
11248
11249 va_start(args, format);
11250
11251 vaf.fmt = format;
11252 vaf.va = &args;
11253
11254 __netdev_printk(level, dev, &vaf);
11255
11256 va_end(args);
11257 }
11258 EXPORT_SYMBOL(netdev_printk);
11259
11260 #define define_netdev_printk_level(func, level) \
11261 void func(const struct net_device *dev, const char *fmt, ...) \
11262 { \
11263 struct va_format vaf; \
11264 va_list args; \
11265 \
11266 va_start(args, fmt); \
11267 \
11268 vaf.fmt = fmt; \
11269 vaf.va = &args; \
11270 \
11271 __netdev_printk(level, dev, &vaf); \
11272 \
11273 va_end(args); \
11274 } \
11275 EXPORT_SYMBOL(func);
11276
11277 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
11278 define_netdev_printk_level(netdev_alert, KERN_ALERT);
11279 define_netdev_printk_level(netdev_crit, KERN_CRIT);
11280 define_netdev_printk_level(netdev_err, KERN_ERR);
11281 define_netdev_printk_level(netdev_warn, KERN_WARNING);
11282 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
11283 define_netdev_printk_level(netdev_info, KERN_INFO);
11284
11285 static void __net_exit netdev_exit(struct net *net)
11286 {
11287 kfree(net->dev_name_head);
11288 kfree(net->dev_index_head);
11289 if (net != &init_net)
11290 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
11291 }
11292
11293 static struct pernet_operations __net_initdata netdev_net_ops = {
11294 .init = netdev_init,
11295 .exit = netdev_exit,
11296 };
11297
11298 static void __net_exit default_device_exit_net(struct net *net)
11299 {
11300 struct net_device *dev, *aux;
11301
11302
11303
11304
11305 ASSERT_RTNL();
11306 for_each_netdev_safe(net, dev, aux) {
11307 int err;
11308 char fb_name[IFNAMSIZ];
11309
11310
11311 if (dev->features & NETIF_F_NETNS_LOCAL)
11312 continue;
11313
11314
11315 if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
11316 continue;
11317
11318
11319 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
11320 if (netdev_name_in_use(&init_net, fb_name))
11321 snprintf(fb_name, IFNAMSIZ, "dev%%d");
11322 err = dev_change_net_namespace(dev, &init_net, fb_name);
11323 if (err) {
11324 pr_emerg("%s: failed to move %s to init_net: %d\n",
11325 __func__, dev->name, err);
11326 BUG();
11327 }
11328 }
11329 }
11330
11331 static void __net_exit default_device_exit_batch(struct list_head *net_list)
11332 {
11333
11334
11335
11336
11337
11338 struct net_device *dev;
11339 struct net *net;
11340 LIST_HEAD(dev_kill_list);
11341
11342 rtnl_lock();
11343 list_for_each_entry(net, net_list, exit_list) {
11344 default_device_exit_net(net);
11345 cond_resched();
11346 }
11347
11348 list_for_each_entry(net, net_list, exit_list) {
11349 for_each_netdev_reverse(net, dev) {
11350 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
11351 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
11352 else
11353 unregister_netdevice_queue(dev, &dev_kill_list);
11354 }
11355 }
11356 unregister_netdevice_many(&dev_kill_list);
11357 rtnl_unlock();
11358 }
11359
11360 static struct pernet_operations __net_initdata default_device_ops = {
11361 .exit_batch = default_device_exit_batch,
11362 };
11363
11364
11365
11366
11367
11368
11369
11370
11371
11372
11373
11374
11375 static int __init net_dev_init(void)
11376 {
11377 int i, rc = -ENOMEM;
11378
11379 BUG_ON(!dev_boot_phase);
11380
11381 if (dev_proc_init())
11382 goto out;
11383
11384 if (netdev_kobject_init())
11385 goto out;
11386
11387 INIT_LIST_HEAD(&ptype_all);
11388 for (i = 0; i < PTYPE_HASH_SIZE; i++)
11389 INIT_LIST_HEAD(&ptype_base[i]);
11390
11391 if (register_pernet_subsys(&netdev_net_ops))
11392 goto out;
11393
11394
11395
11396
11397
11398 for_each_possible_cpu(i) {
11399 struct work_struct *flush = per_cpu_ptr(&flush_works, i);
11400 struct softnet_data *sd = &per_cpu(softnet_data, i);
11401
11402 INIT_WORK(flush, flush_backlog);
11403
11404 skb_queue_head_init(&sd->input_pkt_queue);
11405 skb_queue_head_init(&sd->process_queue);
11406 #ifdef CONFIG_XFRM_OFFLOAD
11407 skb_queue_head_init(&sd->xfrm_backlog);
11408 #endif
11409 INIT_LIST_HEAD(&sd->poll_list);
11410 sd->output_queue_tailp = &sd->output_queue;
11411 #ifdef CONFIG_RPS
11412 INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
11413 sd->cpu = i;
11414 #endif
11415 INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
11416 spin_lock_init(&sd->defer_lock);
11417
11418 init_gro_hash(&sd->backlog);
11419 sd->backlog.poll = process_backlog;
11420 sd->backlog.weight = weight_p;
11421 }
11422
11423 dev_boot_phase = 0;
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434 if (register_pernet_device(&loopback_net_ops))
11435 goto out;
11436
11437 if (register_pernet_device(&default_device_ops))
11438 goto out;
11439
11440 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
11441 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
11442
11443 rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
11444 NULL, dev_cpu_dead);
11445 WARN_ON(rc < 0);
11446 rc = 0;
11447 out:
11448 return rc;
11449 }
11450
11451 subsys_initcall(net_dev_init);