Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  NET3    IP device support routines.
0004  *
0005  *  Derived from the IP parts of dev.c 1.0.19
0006  *      Authors:    Ross Biro
0007  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
0008  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
0009  *
0010  *  Additional Authors:
0011  *      Alan Cox, <gw4pts@gw4pts.ampr.org>
0012  *      Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0013  *
0014  *  Changes:
0015  *      Alexey Kuznetsov:   pa_* fields are replaced with ifaddr
0016  *                  lists.
0017  *      Cyrus Durgin:       updated for kmod
0018  *      Matthias Andree:    in devinet_ioctl, compare label and
0019  *                  address (4.4BSD alias style support),
0020  *                  fall back to comparing just the label
0021  *                  if no match found.
0022  */
0023 
0024 
0025 #include <linux/uaccess.h>
0026 #include <linux/bitops.h>
0027 #include <linux/capability.h>
0028 #include <linux/module.h>
0029 #include <linux/types.h>
0030 #include <linux/kernel.h>
0031 #include <linux/sched/signal.h>
0032 #include <linux/string.h>
0033 #include <linux/mm.h>
0034 #include <linux/socket.h>
0035 #include <linux/sockios.h>
0036 #include <linux/in.h>
0037 #include <linux/errno.h>
0038 #include <linux/interrupt.h>
0039 #include <linux/if_addr.h>
0040 #include <linux/if_ether.h>
0041 #include <linux/inet.h>
0042 #include <linux/netdevice.h>
0043 #include <linux/etherdevice.h>
0044 #include <linux/skbuff.h>
0045 #include <linux/init.h>
0046 #include <linux/notifier.h>
0047 #include <linux/inetdevice.h>
0048 #include <linux/igmp.h>
0049 #include <linux/slab.h>
0050 #include <linux/hash.h>
0051 #ifdef CONFIG_SYSCTL
0052 #include <linux/sysctl.h>
0053 #endif
0054 #include <linux/kmod.h>
0055 #include <linux/netconf.h>
0056 
0057 #include <net/arp.h>
0058 #include <net/ip.h>
0059 #include <net/route.h>
0060 #include <net/ip_fib.h>
0061 #include <net/rtnetlink.h>
0062 #include <net/net_namespace.h>
0063 #include <net/addrconf.h>
0064 
0065 #define IPV6ONLY_FLAGS  \
0066         (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
0067          IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
0068          IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
0069 
0070 static struct ipv4_devconf ipv4_devconf = {
0071     .data = {
0072         [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
0073         [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
0074         [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
0075         [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
0076         [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
0077         [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
0078         [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
0079     },
0080 };
0081 
0082 static struct ipv4_devconf ipv4_devconf_dflt = {
0083     .data = {
0084         [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
0085         [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
0086         [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
0087         [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
0088         [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
0089         [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
0090         [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
0091         [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
0092     },
0093 };
0094 
0095 #define IPV4_DEVCONF_DFLT(net, attr) \
0096     IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
0097 
0098 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
0099     [IFA_LOCAL]         = { .type = NLA_U32 },
0100     [IFA_ADDRESS]       = { .type = NLA_U32 },
0101     [IFA_BROADCAST]     = { .type = NLA_U32 },
0102     [IFA_LABEL]         = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
0103     [IFA_CACHEINFO]     = { .len = sizeof(struct ifa_cacheinfo) },
0104     [IFA_FLAGS]     = { .type = NLA_U32 },
0105     [IFA_RT_PRIORITY]   = { .type = NLA_U32 },
0106     [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
0107     [IFA_PROTO]     = { .type = NLA_U8 },
0108 };
0109 
0110 struct inet_fill_args {
0111     u32 portid;
0112     u32 seq;
0113     int event;
0114     unsigned int flags;
0115     int netnsid;
0116     int ifindex;
0117 };
0118 
0119 #define IN4_ADDR_HSIZE_SHIFT    8
0120 #define IN4_ADDR_HSIZE      (1U << IN4_ADDR_HSIZE_SHIFT)
0121 
0122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
0123 
0124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
0125 {
0126     u32 val = (__force u32) addr ^ net_hash_mix(net);
0127 
0128     return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
0129 }
0130 
0131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
0132 {
0133     u32 hash = inet_addr_hash(net, ifa->ifa_local);
0134 
0135     ASSERT_RTNL();
0136     hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
0137 }
0138 
0139 static void inet_hash_remove(struct in_ifaddr *ifa)
0140 {
0141     ASSERT_RTNL();
0142     hlist_del_init_rcu(&ifa->hash);
0143 }
0144 
0145 /**
0146  * __ip_dev_find - find the first device with a given source address.
0147  * @net: the net namespace
0148  * @addr: the source address
0149  * @devref: if true, take a reference on the found device
0150  *
0151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
0152  */
0153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
0154 {
0155     struct net_device *result = NULL;
0156     struct in_ifaddr *ifa;
0157 
0158     rcu_read_lock();
0159     ifa = inet_lookup_ifaddr_rcu(net, addr);
0160     if (!ifa) {
0161         struct flowi4 fl4 = { .daddr = addr };
0162         struct fib_result res = { 0 };
0163         struct fib_table *local;
0164 
0165         /* Fallback to FIB local table so that communication
0166          * over loopback subnets work.
0167          */
0168         local = fib_get_table(net, RT_TABLE_LOCAL);
0169         if (local &&
0170             !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
0171             res.type == RTN_LOCAL)
0172             result = FIB_RES_DEV(res);
0173     } else {
0174         result = ifa->ifa_dev->dev;
0175     }
0176     if (result && devref)
0177         dev_hold(result);
0178     rcu_read_unlock();
0179     return result;
0180 }
0181 EXPORT_SYMBOL(__ip_dev_find);
0182 
0183 /* called under RCU lock */
0184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
0185 {
0186     u32 hash = inet_addr_hash(net, addr);
0187     struct in_ifaddr *ifa;
0188 
0189     hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
0190         if (ifa->ifa_local == addr &&
0191             net_eq(dev_net(ifa->ifa_dev->dev), net))
0192             return ifa;
0193 
0194     return NULL;
0195 }
0196 
0197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
0198 
0199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
0200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
0201 static void inet_del_ifa(struct in_device *in_dev,
0202              struct in_ifaddr __rcu **ifap,
0203              int destroy);
0204 #ifdef CONFIG_SYSCTL
0205 static int devinet_sysctl_register(struct in_device *idev);
0206 static void devinet_sysctl_unregister(struct in_device *idev);
0207 #else
0208 static int devinet_sysctl_register(struct in_device *idev)
0209 {
0210     return 0;
0211 }
0212 static void devinet_sysctl_unregister(struct in_device *idev)
0213 {
0214 }
0215 #endif
0216 
0217 /* Locks all the inet devices. */
0218 
0219 static struct in_ifaddr *inet_alloc_ifa(void)
0220 {
0221     return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
0222 }
0223 
0224 static void inet_rcu_free_ifa(struct rcu_head *head)
0225 {
0226     struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
0227     if (ifa->ifa_dev)
0228         in_dev_put(ifa->ifa_dev);
0229     kfree(ifa);
0230 }
0231 
0232 static void inet_free_ifa(struct in_ifaddr *ifa)
0233 {
0234     call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
0235 }
0236 
0237 void in_dev_finish_destroy(struct in_device *idev)
0238 {
0239     struct net_device *dev = idev->dev;
0240 
0241     WARN_ON(idev->ifa_list);
0242     WARN_ON(idev->mc_list);
0243     kfree(rcu_dereference_protected(idev->mc_hash, 1));
0244 #ifdef NET_REFCNT_DEBUG
0245     pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
0246 #endif
0247     netdev_put(dev, &idev->dev_tracker);
0248     if (!idev->dead)
0249         pr_err("Freeing alive in_device %p\n", idev);
0250     else
0251         kfree(idev);
0252 }
0253 EXPORT_SYMBOL(in_dev_finish_destroy);
0254 
0255 static struct in_device *inetdev_init(struct net_device *dev)
0256 {
0257     struct in_device *in_dev;
0258     int err = -ENOMEM;
0259 
0260     ASSERT_RTNL();
0261 
0262     in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
0263     if (!in_dev)
0264         goto out;
0265     memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
0266             sizeof(in_dev->cnf));
0267     in_dev->cnf.sysctl = NULL;
0268     in_dev->dev = dev;
0269     in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
0270     if (!in_dev->arp_parms)
0271         goto out_kfree;
0272     if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
0273         dev_disable_lro(dev);
0274     /* Reference in_dev->dev */
0275     netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
0276     /* Account for reference dev->ip_ptr (below) */
0277     refcount_set(&in_dev->refcnt, 1);
0278 
0279     err = devinet_sysctl_register(in_dev);
0280     if (err) {
0281         in_dev->dead = 1;
0282         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
0283         in_dev_put(in_dev);
0284         in_dev = NULL;
0285         goto out;
0286     }
0287     ip_mc_init_dev(in_dev);
0288     if (dev->flags & IFF_UP)
0289         ip_mc_up(in_dev);
0290 
0291     /* we can receive as soon as ip_ptr is set -- do this last */
0292     rcu_assign_pointer(dev->ip_ptr, in_dev);
0293 out:
0294     return in_dev ?: ERR_PTR(err);
0295 out_kfree:
0296     kfree(in_dev);
0297     in_dev = NULL;
0298     goto out;
0299 }
0300 
0301 static void in_dev_rcu_put(struct rcu_head *head)
0302 {
0303     struct in_device *idev = container_of(head, struct in_device, rcu_head);
0304     in_dev_put(idev);
0305 }
0306 
0307 static void inetdev_destroy(struct in_device *in_dev)
0308 {
0309     struct net_device *dev;
0310     struct in_ifaddr *ifa;
0311 
0312     ASSERT_RTNL();
0313 
0314     dev = in_dev->dev;
0315 
0316     in_dev->dead = 1;
0317 
0318     ip_mc_destroy_dev(in_dev);
0319 
0320     while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
0321         inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
0322         inet_free_ifa(ifa);
0323     }
0324 
0325     RCU_INIT_POINTER(dev->ip_ptr, NULL);
0326 
0327     devinet_sysctl_unregister(in_dev);
0328     neigh_parms_release(&arp_tbl, in_dev->arp_parms);
0329     arp_ifdown(dev);
0330 
0331     call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
0332 }
0333 
0334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
0335 {
0336     const struct in_ifaddr *ifa;
0337 
0338     rcu_read_lock();
0339     in_dev_for_each_ifa_rcu(ifa, in_dev) {
0340         if (inet_ifa_match(a, ifa)) {
0341             if (!b || inet_ifa_match(b, ifa)) {
0342                 rcu_read_unlock();
0343                 return 1;
0344             }
0345         }
0346     }
0347     rcu_read_unlock();
0348     return 0;
0349 }
0350 
0351 static void __inet_del_ifa(struct in_device *in_dev,
0352                struct in_ifaddr __rcu **ifap,
0353                int destroy, struct nlmsghdr *nlh, u32 portid)
0354 {
0355     struct in_ifaddr *promote = NULL;
0356     struct in_ifaddr *ifa, *ifa1;
0357     struct in_ifaddr *last_prim;
0358     struct in_ifaddr *prev_prom = NULL;
0359     int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
0360 
0361     ASSERT_RTNL();
0362 
0363     ifa1 = rtnl_dereference(*ifap);
0364     last_prim = rtnl_dereference(in_dev->ifa_list);
0365     if (in_dev->dead)
0366         goto no_promotions;
0367 
0368     /* 1. Deleting primary ifaddr forces deletion all secondaries
0369      * unless alias promotion is set
0370      **/
0371 
0372     if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
0373         struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
0374 
0375         while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
0376             if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
0377                 ifa1->ifa_scope <= ifa->ifa_scope)
0378                 last_prim = ifa;
0379 
0380             if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
0381                 ifa1->ifa_mask != ifa->ifa_mask ||
0382                 !inet_ifa_match(ifa1->ifa_address, ifa)) {
0383                 ifap1 = &ifa->ifa_next;
0384                 prev_prom = ifa;
0385                 continue;
0386             }
0387 
0388             if (!do_promote) {
0389                 inet_hash_remove(ifa);
0390                 *ifap1 = ifa->ifa_next;
0391 
0392                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
0393                 blocking_notifier_call_chain(&inetaddr_chain,
0394                         NETDEV_DOWN, ifa);
0395                 inet_free_ifa(ifa);
0396             } else {
0397                 promote = ifa;
0398                 break;
0399             }
0400         }
0401     }
0402 
0403     /* On promotion all secondaries from subnet are changing
0404      * the primary IP, we must remove all their routes silently
0405      * and later to add them back with new prefsrc. Do this
0406      * while all addresses are on the device list.
0407      */
0408     for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
0409         if (ifa1->ifa_mask == ifa->ifa_mask &&
0410             inet_ifa_match(ifa1->ifa_address, ifa))
0411             fib_del_ifaddr(ifa, ifa1);
0412     }
0413 
0414 no_promotions:
0415     /* 2. Unlink it */
0416 
0417     *ifap = ifa1->ifa_next;
0418     inet_hash_remove(ifa1);
0419 
0420     /* 3. Announce address deletion */
0421 
0422     /* Send message first, then call notifier.
0423        At first sight, FIB update triggered by notifier
0424        will refer to already deleted ifaddr, that could confuse
0425        netlink listeners. It is not true: look, gated sees
0426        that route deleted and if it still thinks that ifaddr
0427        is valid, it will try to restore deleted routes... Grr.
0428        So that, this order is correct.
0429      */
0430     rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
0431     blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
0432 
0433     if (promote) {
0434         struct in_ifaddr *next_sec;
0435 
0436         next_sec = rtnl_dereference(promote->ifa_next);
0437         if (prev_prom) {
0438             struct in_ifaddr *last_sec;
0439 
0440             rcu_assign_pointer(prev_prom->ifa_next, next_sec);
0441 
0442             last_sec = rtnl_dereference(last_prim->ifa_next);
0443             rcu_assign_pointer(promote->ifa_next, last_sec);
0444             rcu_assign_pointer(last_prim->ifa_next, promote);
0445         }
0446 
0447         promote->ifa_flags &= ~IFA_F_SECONDARY;
0448         rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
0449         blocking_notifier_call_chain(&inetaddr_chain,
0450                 NETDEV_UP, promote);
0451         for (ifa = next_sec; ifa;
0452              ifa = rtnl_dereference(ifa->ifa_next)) {
0453             if (ifa1->ifa_mask != ifa->ifa_mask ||
0454                 !inet_ifa_match(ifa1->ifa_address, ifa))
0455                     continue;
0456             fib_add_ifaddr(ifa);
0457         }
0458 
0459     }
0460     if (destroy)
0461         inet_free_ifa(ifa1);
0462 }
0463 
0464 static void inet_del_ifa(struct in_device *in_dev,
0465              struct in_ifaddr __rcu **ifap,
0466              int destroy)
0467 {
0468     __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
0469 }
0470 
0471 static void check_lifetime(struct work_struct *work);
0472 
0473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
0474 
0475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
0476                  u32 portid, struct netlink_ext_ack *extack)
0477 {
0478     struct in_ifaddr __rcu **last_primary, **ifap;
0479     struct in_device *in_dev = ifa->ifa_dev;
0480     struct in_validator_info ivi;
0481     struct in_ifaddr *ifa1;
0482     int ret;
0483 
0484     ASSERT_RTNL();
0485 
0486     if (!ifa->ifa_local) {
0487         inet_free_ifa(ifa);
0488         return 0;
0489     }
0490 
0491     ifa->ifa_flags &= ~IFA_F_SECONDARY;
0492     last_primary = &in_dev->ifa_list;
0493 
0494     /* Don't set IPv6 only flags to IPv4 addresses */
0495     ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
0496 
0497     ifap = &in_dev->ifa_list;
0498     ifa1 = rtnl_dereference(*ifap);
0499 
0500     while (ifa1) {
0501         if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
0502             ifa->ifa_scope <= ifa1->ifa_scope)
0503             last_primary = &ifa1->ifa_next;
0504         if (ifa1->ifa_mask == ifa->ifa_mask &&
0505             inet_ifa_match(ifa1->ifa_address, ifa)) {
0506             if (ifa1->ifa_local == ifa->ifa_local) {
0507                 inet_free_ifa(ifa);
0508                 return -EEXIST;
0509             }
0510             if (ifa1->ifa_scope != ifa->ifa_scope) {
0511                 inet_free_ifa(ifa);
0512                 return -EINVAL;
0513             }
0514             ifa->ifa_flags |= IFA_F_SECONDARY;
0515         }
0516 
0517         ifap = &ifa1->ifa_next;
0518         ifa1 = rtnl_dereference(*ifap);
0519     }
0520 
0521     /* Allow any devices that wish to register ifaddr validtors to weigh
0522      * in now, before changes are committed.  The rntl lock is serializing
0523      * access here, so the state should not change between a validator call
0524      * and a final notify on commit.  This isn't invoked on promotion under
0525      * the assumption that validators are checking the address itself, and
0526      * not the flags.
0527      */
0528     ivi.ivi_addr = ifa->ifa_address;
0529     ivi.ivi_dev = ifa->ifa_dev;
0530     ivi.extack = extack;
0531     ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
0532                        NETDEV_UP, &ivi);
0533     ret = notifier_to_errno(ret);
0534     if (ret) {
0535         inet_free_ifa(ifa);
0536         return ret;
0537     }
0538 
0539     if (!(ifa->ifa_flags & IFA_F_SECONDARY))
0540         ifap = last_primary;
0541 
0542     rcu_assign_pointer(ifa->ifa_next, *ifap);
0543     rcu_assign_pointer(*ifap, ifa);
0544 
0545     inet_hash_insert(dev_net(in_dev->dev), ifa);
0546 
0547     cancel_delayed_work(&check_lifetime_work);
0548     queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
0549 
0550     /* Send message first, then call notifier.
0551        Notifier will trigger FIB update, so that
0552        listeners of netlink will know about new ifaddr */
0553     rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
0554     blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
0555 
0556     return 0;
0557 }
0558 
0559 static int inet_insert_ifa(struct in_ifaddr *ifa)
0560 {
0561     return __inet_insert_ifa(ifa, NULL, 0, NULL);
0562 }
0563 
0564 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
0565 {
0566     struct in_device *in_dev = __in_dev_get_rtnl(dev);
0567 
0568     ASSERT_RTNL();
0569 
0570     if (!in_dev) {
0571         inet_free_ifa(ifa);
0572         return -ENOBUFS;
0573     }
0574     ipv4_devconf_setall(in_dev);
0575     neigh_parms_data_state_setall(in_dev->arp_parms);
0576     if (ifa->ifa_dev != in_dev) {
0577         WARN_ON(ifa->ifa_dev);
0578         in_dev_hold(in_dev);
0579         ifa->ifa_dev = in_dev;
0580     }
0581     if (ipv4_is_loopback(ifa->ifa_local))
0582         ifa->ifa_scope = RT_SCOPE_HOST;
0583     return inet_insert_ifa(ifa);
0584 }
0585 
0586 /* Caller must hold RCU or RTNL :
0587  * We dont take a reference on found in_device
0588  */
0589 struct in_device *inetdev_by_index(struct net *net, int ifindex)
0590 {
0591     struct net_device *dev;
0592     struct in_device *in_dev = NULL;
0593 
0594     rcu_read_lock();
0595     dev = dev_get_by_index_rcu(net, ifindex);
0596     if (dev)
0597         in_dev = rcu_dereference_rtnl(dev->ip_ptr);
0598     rcu_read_unlock();
0599     return in_dev;
0600 }
0601 EXPORT_SYMBOL(inetdev_by_index);
0602 
0603 /* Called only from RTNL semaphored context. No locks. */
0604 
0605 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
0606                     __be32 mask)
0607 {
0608     struct in_ifaddr *ifa;
0609 
0610     ASSERT_RTNL();
0611 
0612     in_dev_for_each_ifa_rtnl(ifa, in_dev) {
0613         if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
0614             return ifa;
0615     }
0616     return NULL;
0617 }
0618 
0619 static int ip_mc_autojoin_config(struct net *net, bool join,
0620                  const struct in_ifaddr *ifa)
0621 {
0622 #if defined(CONFIG_IP_MULTICAST)
0623     struct ip_mreqn mreq = {
0624         .imr_multiaddr.s_addr = ifa->ifa_address,
0625         .imr_ifindex = ifa->ifa_dev->dev->ifindex,
0626     };
0627     struct sock *sk = net->ipv4.mc_autojoin_sk;
0628     int ret;
0629 
0630     ASSERT_RTNL();
0631 
0632     lock_sock(sk);
0633     if (join)
0634         ret = ip_mc_join_group(sk, &mreq);
0635     else
0636         ret = ip_mc_leave_group(sk, &mreq);
0637     release_sock(sk);
0638 
0639     return ret;
0640 #else
0641     return -EOPNOTSUPP;
0642 #endif
0643 }
0644 
0645 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
0646                 struct netlink_ext_ack *extack)
0647 {
0648     struct net *net = sock_net(skb->sk);
0649     struct in_ifaddr __rcu **ifap;
0650     struct nlattr *tb[IFA_MAX+1];
0651     struct in_device *in_dev;
0652     struct ifaddrmsg *ifm;
0653     struct in_ifaddr *ifa;
0654     int err;
0655 
0656     ASSERT_RTNL();
0657 
0658     err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
0659                      ifa_ipv4_policy, extack);
0660     if (err < 0)
0661         goto errout;
0662 
0663     ifm = nlmsg_data(nlh);
0664     in_dev = inetdev_by_index(net, ifm->ifa_index);
0665     if (!in_dev) {
0666         err = -ENODEV;
0667         goto errout;
0668     }
0669 
0670     for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
0671          ifap = &ifa->ifa_next) {
0672         if (tb[IFA_LOCAL] &&
0673             ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
0674             continue;
0675 
0676         if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
0677             continue;
0678 
0679         if (tb[IFA_ADDRESS] &&
0680             (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
0681             !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
0682             continue;
0683 
0684         if (ipv4_is_multicast(ifa->ifa_address))
0685             ip_mc_autojoin_config(net, false, ifa);
0686         __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
0687         return 0;
0688     }
0689 
0690     err = -EADDRNOTAVAIL;
0691 errout:
0692     return err;
0693 }
0694 
0695 #define INFINITY_LIFE_TIME  0xFFFFFFFF
0696 
0697 static void check_lifetime(struct work_struct *work)
0698 {
0699     unsigned long now, next, next_sec, next_sched;
0700     struct in_ifaddr *ifa;
0701     struct hlist_node *n;
0702     int i;
0703 
0704     now = jiffies;
0705     next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
0706 
0707     for (i = 0; i < IN4_ADDR_HSIZE; i++) {
0708         bool change_needed = false;
0709 
0710         rcu_read_lock();
0711         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
0712             unsigned long age;
0713 
0714             if (ifa->ifa_flags & IFA_F_PERMANENT)
0715                 continue;
0716 
0717             /* We try to batch several events at once. */
0718             age = (now - ifa->ifa_tstamp +
0719                    ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
0720 
0721             if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
0722                 age >= ifa->ifa_valid_lft) {
0723                 change_needed = true;
0724             } else if (ifa->ifa_preferred_lft ==
0725                    INFINITY_LIFE_TIME) {
0726                 continue;
0727             } else if (age >= ifa->ifa_preferred_lft) {
0728                 if (time_before(ifa->ifa_tstamp +
0729                         ifa->ifa_valid_lft * HZ, next))
0730                     next = ifa->ifa_tstamp +
0731                            ifa->ifa_valid_lft * HZ;
0732 
0733                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
0734                     change_needed = true;
0735             } else if (time_before(ifa->ifa_tstamp +
0736                            ifa->ifa_preferred_lft * HZ,
0737                            next)) {
0738                 next = ifa->ifa_tstamp +
0739                        ifa->ifa_preferred_lft * HZ;
0740             }
0741         }
0742         rcu_read_unlock();
0743         if (!change_needed)
0744             continue;
0745         rtnl_lock();
0746         hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
0747             unsigned long age;
0748 
0749             if (ifa->ifa_flags & IFA_F_PERMANENT)
0750                 continue;
0751 
0752             /* We try to batch several events at once. */
0753             age = (now - ifa->ifa_tstamp +
0754                    ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
0755 
0756             if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
0757                 age >= ifa->ifa_valid_lft) {
0758                 struct in_ifaddr __rcu **ifap;
0759                 struct in_ifaddr *tmp;
0760 
0761                 ifap = &ifa->ifa_dev->ifa_list;
0762                 tmp = rtnl_dereference(*ifap);
0763                 while (tmp) {
0764                     if (tmp == ifa) {
0765                         inet_del_ifa(ifa->ifa_dev,
0766                                  ifap, 1);
0767                         break;
0768                     }
0769                     ifap = &tmp->ifa_next;
0770                     tmp = rtnl_dereference(*ifap);
0771                 }
0772             } else if (ifa->ifa_preferred_lft !=
0773                    INFINITY_LIFE_TIME &&
0774                    age >= ifa->ifa_preferred_lft &&
0775                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
0776                 ifa->ifa_flags |= IFA_F_DEPRECATED;
0777                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
0778             }
0779         }
0780         rtnl_unlock();
0781     }
0782 
0783     next_sec = round_jiffies_up(next);
0784     next_sched = next;
0785 
0786     /* If rounded timeout is accurate enough, accept it. */
0787     if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
0788         next_sched = next_sec;
0789 
0790     now = jiffies;
0791     /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
0792     if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
0793         next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
0794 
0795     queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
0796             next_sched - now);
0797 }
0798 
0799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
0800                  __u32 prefered_lft)
0801 {
0802     unsigned long timeout;
0803 
0804     ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
0805 
0806     timeout = addrconf_timeout_fixup(valid_lft, HZ);
0807     if (addrconf_finite_timeout(timeout))
0808         ifa->ifa_valid_lft = timeout;
0809     else
0810         ifa->ifa_flags |= IFA_F_PERMANENT;
0811 
0812     timeout = addrconf_timeout_fixup(prefered_lft, HZ);
0813     if (addrconf_finite_timeout(timeout)) {
0814         if (timeout == 0)
0815             ifa->ifa_flags |= IFA_F_DEPRECATED;
0816         ifa->ifa_preferred_lft = timeout;
0817     }
0818     ifa->ifa_tstamp = jiffies;
0819     if (!ifa->ifa_cstamp)
0820         ifa->ifa_cstamp = ifa->ifa_tstamp;
0821 }
0822 
0823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
0824                        __u32 *pvalid_lft, __u32 *pprefered_lft,
0825                        struct netlink_ext_ack *extack)
0826 {
0827     struct nlattr *tb[IFA_MAX+1];
0828     struct in_ifaddr *ifa;
0829     struct ifaddrmsg *ifm;
0830     struct net_device *dev;
0831     struct in_device *in_dev;
0832     int err;
0833 
0834     err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
0835                      ifa_ipv4_policy, extack);
0836     if (err < 0)
0837         goto errout;
0838 
0839     ifm = nlmsg_data(nlh);
0840     err = -EINVAL;
0841     if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
0842         goto errout;
0843 
0844     dev = __dev_get_by_index(net, ifm->ifa_index);
0845     err = -ENODEV;
0846     if (!dev)
0847         goto errout;
0848 
0849     in_dev = __in_dev_get_rtnl(dev);
0850     err = -ENOBUFS;
0851     if (!in_dev)
0852         goto errout;
0853 
0854     ifa = inet_alloc_ifa();
0855     if (!ifa)
0856         /*
0857          * A potential indev allocation can be left alive, it stays
0858          * assigned to its device and is destroy with it.
0859          */
0860         goto errout;
0861 
0862     ipv4_devconf_setall(in_dev);
0863     neigh_parms_data_state_setall(in_dev->arp_parms);
0864     in_dev_hold(in_dev);
0865 
0866     if (!tb[IFA_ADDRESS])
0867         tb[IFA_ADDRESS] = tb[IFA_LOCAL];
0868 
0869     INIT_HLIST_NODE(&ifa->hash);
0870     ifa->ifa_prefixlen = ifm->ifa_prefixlen;
0871     ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
0872     ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
0873                      ifm->ifa_flags;
0874     ifa->ifa_scope = ifm->ifa_scope;
0875     ifa->ifa_dev = in_dev;
0876 
0877     ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
0878     ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
0879 
0880     if (tb[IFA_BROADCAST])
0881         ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
0882 
0883     if (tb[IFA_LABEL])
0884         nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
0885     else
0886         memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
0887 
0888     if (tb[IFA_RT_PRIORITY])
0889         ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
0890 
0891     if (tb[IFA_PROTO])
0892         ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
0893 
0894     if (tb[IFA_CACHEINFO]) {
0895         struct ifa_cacheinfo *ci;
0896 
0897         ci = nla_data(tb[IFA_CACHEINFO]);
0898         if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
0899             err = -EINVAL;
0900             goto errout_free;
0901         }
0902         *pvalid_lft = ci->ifa_valid;
0903         *pprefered_lft = ci->ifa_prefered;
0904     }
0905 
0906     return ifa;
0907 
0908 errout_free:
0909     inet_free_ifa(ifa);
0910 errout:
0911     return ERR_PTR(err);
0912 }
0913 
0914 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
0915 {
0916     struct in_device *in_dev = ifa->ifa_dev;
0917     struct in_ifaddr *ifa1;
0918 
0919     if (!ifa->ifa_local)
0920         return NULL;
0921 
0922     in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
0923         if (ifa1->ifa_mask == ifa->ifa_mask &&
0924             inet_ifa_match(ifa1->ifa_address, ifa) &&
0925             ifa1->ifa_local == ifa->ifa_local)
0926             return ifa1;
0927     }
0928     return NULL;
0929 }
0930 
0931 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
0932                 struct netlink_ext_ack *extack)
0933 {
0934     struct net *net = sock_net(skb->sk);
0935     struct in_ifaddr *ifa;
0936     struct in_ifaddr *ifa_existing;
0937     __u32 valid_lft = INFINITY_LIFE_TIME;
0938     __u32 prefered_lft = INFINITY_LIFE_TIME;
0939 
0940     ASSERT_RTNL();
0941 
0942     ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
0943     if (IS_ERR(ifa))
0944         return PTR_ERR(ifa);
0945 
0946     ifa_existing = find_matching_ifa(ifa);
0947     if (!ifa_existing) {
0948         /* It would be best to check for !NLM_F_CREATE here but
0949          * userspace already relies on not having to provide this.
0950          */
0951         set_ifa_lifetime(ifa, valid_lft, prefered_lft);
0952         if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
0953             int ret = ip_mc_autojoin_config(net, true, ifa);
0954 
0955             if (ret < 0) {
0956                 inet_free_ifa(ifa);
0957                 return ret;
0958             }
0959         }
0960         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
0961                      extack);
0962     } else {
0963         u32 new_metric = ifa->ifa_rt_priority;
0964 
0965         inet_free_ifa(ifa);
0966 
0967         if (nlh->nlmsg_flags & NLM_F_EXCL ||
0968             !(nlh->nlmsg_flags & NLM_F_REPLACE))
0969             return -EEXIST;
0970         ifa = ifa_existing;
0971 
0972         if (ifa->ifa_rt_priority != new_metric) {
0973             fib_modify_prefix_metric(ifa, new_metric);
0974             ifa->ifa_rt_priority = new_metric;
0975         }
0976 
0977         set_ifa_lifetime(ifa, valid_lft, prefered_lft);
0978         cancel_delayed_work(&check_lifetime_work);
0979         queue_delayed_work(system_power_efficient_wq,
0980                 &check_lifetime_work, 0);
0981         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
0982     }
0983     return 0;
0984 }
0985 
0986 /*
0987  *  Determine a default network mask, based on the IP address.
0988  */
0989 
0990 static int inet_abc_len(__be32 addr)
0991 {
0992     int rc = -1;    /* Something else, probably a multicast. */
0993 
0994     if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
0995         rc = 0;
0996     else {
0997         __u32 haddr = ntohl(addr);
0998         if (IN_CLASSA(haddr))
0999             rc = 8;
1000         else if (IN_CLASSB(haddr))
1001             rc = 16;
1002         else if (IN_CLASSC(haddr))
1003             rc = 24;
1004         else if (IN_CLASSE(haddr))
1005             rc = 32;
1006     }
1007 
1008     return rc;
1009 }
1010 
1011 
1012 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1013 {
1014     struct sockaddr_in sin_orig;
1015     struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1016     struct in_ifaddr __rcu **ifap = NULL;
1017     struct in_device *in_dev;
1018     struct in_ifaddr *ifa = NULL;
1019     struct net_device *dev;
1020     char *colon;
1021     int ret = -EFAULT;
1022     int tryaddrmatch = 0;
1023 
1024     ifr->ifr_name[IFNAMSIZ - 1] = 0;
1025 
1026     /* save original address for comparison */
1027     memcpy(&sin_orig, sin, sizeof(*sin));
1028 
1029     colon = strchr(ifr->ifr_name, ':');
1030     if (colon)
1031         *colon = 0;
1032 
1033     dev_load(net, ifr->ifr_name);
1034 
1035     switch (cmd) {
1036     case SIOCGIFADDR:   /* Get interface address */
1037     case SIOCGIFBRDADDR:    /* Get the broadcast address */
1038     case SIOCGIFDSTADDR:    /* Get the destination address */
1039     case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1040         /* Note that these ioctls will not sleep,
1041            so that we do not impose a lock.
1042            One day we will be forced to put shlock here (I mean SMP)
1043          */
1044         tryaddrmatch = (sin_orig.sin_family == AF_INET);
1045         memset(sin, 0, sizeof(*sin));
1046         sin->sin_family = AF_INET;
1047         break;
1048 
1049     case SIOCSIFFLAGS:
1050         ret = -EPERM;
1051         if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1052             goto out;
1053         break;
1054     case SIOCSIFADDR:   /* Set interface address (and family) */
1055     case SIOCSIFBRDADDR:    /* Set the broadcast address */
1056     case SIOCSIFDSTADDR:    /* Set the destination address */
1057     case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1058         ret = -EPERM;
1059         if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1060             goto out;
1061         ret = -EINVAL;
1062         if (sin->sin_family != AF_INET)
1063             goto out;
1064         break;
1065     default:
1066         ret = -EINVAL;
1067         goto out;
1068     }
1069 
1070     rtnl_lock();
1071 
1072     ret = -ENODEV;
1073     dev = __dev_get_by_name(net, ifr->ifr_name);
1074     if (!dev)
1075         goto done;
1076 
1077     if (colon)
1078         *colon = ':';
1079 
1080     in_dev = __in_dev_get_rtnl(dev);
1081     if (in_dev) {
1082         if (tryaddrmatch) {
1083             /* Matthias Andree */
1084             /* compare label and address (4.4BSD style) */
1085             /* note: we only do this for a limited set of ioctls
1086                and only if the original address family was AF_INET.
1087                This is checked above. */
1088 
1089             for (ifap = &in_dev->ifa_list;
1090                  (ifa = rtnl_dereference(*ifap)) != NULL;
1091                  ifap = &ifa->ifa_next) {
1092                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1093                     sin_orig.sin_addr.s_addr ==
1094                             ifa->ifa_local) {
1095                     break; /* found */
1096                 }
1097             }
1098         }
1099         /* we didn't get a match, maybe the application is
1100            4.3BSD-style and passed in junk so we fall back to
1101            comparing just the label */
1102         if (!ifa) {
1103             for (ifap = &in_dev->ifa_list;
1104                  (ifa = rtnl_dereference(*ifap)) != NULL;
1105                  ifap = &ifa->ifa_next)
1106                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1107                     break;
1108         }
1109     }
1110 
1111     ret = -EADDRNOTAVAIL;
1112     if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1113         goto done;
1114 
1115     switch (cmd) {
1116     case SIOCGIFADDR:   /* Get interface address */
1117         ret = 0;
1118         sin->sin_addr.s_addr = ifa->ifa_local;
1119         break;
1120 
1121     case SIOCGIFBRDADDR:    /* Get the broadcast address */
1122         ret = 0;
1123         sin->sin_addr.s_addr = ifa->ifa_broadcast;
1124         break;
1125 
1126     case SIOCGIFDSTADDR:    /* Get the destination address */
1127         ret = 0;
1128         sin->sin_addr.s_addr = ifa->ifa_address;
1129         break;
1130 
1131     case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1132         ret = 0;
1133         sin->sin_addr.s_addr = ifa->ifa_mask;
1134         break;
1135 
1136     case SIOCSIFFLAGS:
1137         if (colon) {
1138             ret = -EADDRNOTAVAIL;
1139             if (!ifa)
1140                 break;
1141             ret = 0;
1142             if (!(ifr->ifr_flags & IFF_UP))
1143                 inet_del_ifa(in_dev, ifap, 1);
1144             break;
1145         }
1146         ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1147         break;
1148 
1149     case SIOCSIFADDR:   /* Set interface address (and family) */
1150         ret = -EINVAL;
1151         if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1152             break;
1153 
1154         if (!ifa) {
1155             ret = -ENOBUFS;
1156             ifa = inet_alloc_ifa();
1157             if (!ifa)
1158                 break;
1159             INIT_HLIST_NODE(&ifa->hash);
1160             if (colon)
1161                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1162             else
1163                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1164         } else {
1165             ret = 0;
1166             if (ifa->ifa_local == sin->sin_addr.s_addr)
1167                 break;
1168             inet_del_ifa(in_dev, ifap, 0);
1169             ifa->ifa_broadcast = 0;
1170             ifa->ifa_scope = 0;
1171         }
1172 
1173         ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1174 
1175         if (!(dev->flags & IFF_POINTOPOINT)) {
1176             ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1177             ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1178             if ((dev->flags & IFF_BROADCAST) &&
1179                 ifa->ifa_prefixlen < 31)
1180                 ifa->ifa_broadcast = ifa->ifa_address |
1181                              ~ifa->ifa_mask;
1182         } else {
1183             ifa->ifa_prefixlen = 32;
1184             ifa->ifa_mask = inet_make_mask(32);
1185         }
1186         set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1187         ret = inet_set_ifa(dev, ifa);
1188         break;
1189 
1190     case SIOCSIFBRDADDR:    /* Set the broadcast address */
1191         ret = 0;
1192         if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1193             inet_del_ifa(in_dev, ifap, 0);
1194             ifa->ifa_broadcast = sin->sin_addr.s_addr;
1195             inet_insert_ifa(ifa);
1196         }
1197         break;
1198 
1199     case SIOCSIFDSTADDR:    /* Set the destination address */
1200         ret = 0;
1201         if (ifa->ifa_address == sin->sin_addr.s_addr)
1202             break;
1203         ret = -EINVAL;
1204         if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1205             break;
1206         ret = 0;
1207         inet_del_ifa(in_dev, ifap, 0);
1208         ifa->ifa_address = sin->sin_addr.s_addr;
1209         inet_insert_ifa(ifa);
1210         break;
1211 
1212     case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1213 
1214         /*
1215          *  The mask we set must be legal.
1216          */
1217         ret = -EINVAL;
1218         if (bad_mask(sin->sin_addr.s_addr, 0))
1219             break;
1220         ret = 0;
1221         if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1222             __be32 old_mask = ifa->ifa_mask;
1223             inet_del_ifa(in_dev, ifap, 0);
1224             ifa->ifa_mask = sin->sin_addr.s_addr;
1225             ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1226 
1227             /* See if current broadcast address matches
1228              * with current netmask, then recalculate
1229              * the broadcast address. Otherwise it's a
1230              * funny address, so don't touch it since
1231              * the user seems to know what (s)he's doing...
1232              */
1233             if ((dev->flags & IFF_BROADCAST) &&
1234                 (ifa->ifa_prefixlen < 31) &&
1235                 (ifa->ifa_broadcast ==
1236                  (ifa->ifa_local|~old_mask))) {
1237                 ifa->ifa_broadcast = (ifa->ifa_local |
1238                               ~sin->sin_addr.s_addr);
1239             }
1240             inet_insert_ifa(ifa);
1241         }
1242         break;
1243     }
1244 done:
1245     rtnl_unlock();
1246 out:
1247     return ret;
1248 }
1249 
1250 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1251 {
1252     struct in_device *in_dev = __in_dev_get_rtnl(dev);
1253     const struct in_ifaddr *ifa;
1254     struct ifreq ifr;
1255     int done = 0;
1256 
1257     if (WARN_ON(size > sizeof(struct ifreq)))
1258         goto out;
1259 
1260     if (!in_dev)
1261         goto out;
1262 
1263     in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1264         if (!buf) {
1265             done += size;
1266             continue;
1267         }
1268         if (len < size)
1269             break;
1270         memset(&ifr, 0, sizeof(struct ifreq));
1271         strcpy(ifr.ifr_name, ifa->ifa_label);
1272 
1273         (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1274         (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1275                                 ifa->ifa_local;
1276 
1277         if (copy_to_user(buf + done, &ifr, size)) {
1278             done = -EFAULT;
1279             break;
1280         }
1281         len  -= size;
1282         done += size;
1283     }
1284 out:
1285     return done;
1286 }
1287 
1288 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1289                  int scope)
1290 {
1291     const struct in_ifaddr *ifa;
1292 
1293     in_dev_for_each_ifa_rcu(ifa, in_dev) {
1294         if (ifa->ifa_flags & IFA_F_SECONDARY)
1295             continue;
1296         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1297             ifa->ifa_scope <= scope)
1298             return ifa->ifa_local;
1299     }
1300 
1301     return 0;
1302 }
1303 
1304 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1305 {
1306     const struct in_ifaddr *ifa;
1307     __be32 addr = 0;
1308     unsigned char localnet_scope = RT_SCOPE_HOST;
1309     struct in_device *in_dev;
1310     struct net *net = dev_net(dev);
1311     int master_idx;
1312 
1313     rcu_read_lock();
1314     in_dev = __in_dev_get_rcu(dev);
1315     if (!in_dev)
1316         goto no_in_dev;
1317 
1318     if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1319         localnet_scope = RT_SCOPE_LINK;
1320 
1321     in_dev_for_each_ifa_rcu(ifa, in_dev) {
1322         if (ifa->ifa_flags & IFA_F_SECONDARY)
1323             continue;
1324         if (min(ifa->ifa_scope, localnet_scope) > scope)
1325             continue;
1326         if (!dst || inet_ifa_match(dst, ifa)) {
1327             addr = ifa->ifa_local;
1328             break;
1329         }
1330         if (!addr)
1331             addr = ifa->ifa_local;
1332     }
1333 
1334     if (addr)
1335         goto out_unlock;
1336 no_in_dev:
1337     master_idx = l3mdev_master_ifindex_rcu(dev);
1338 
1339     /* For VRFs, the VRF device takes the place of the loopback device,
1340      * with addresses on it being preferred.  Note in such cases the
1341      * loopback device will be among the devices that fail the master_idx
1342      * equality check in the loop below.
1343      */
1344     if (master_idx &&
1345         (dev = dev_get_by_index_rcu(net, master_idx)) &&
1346         (in_dev = __in_dev_get_rcu(dev))) {
1347         addr = in_dev_select_addr(in_dev, scope);
1348         if (addr)
1349             goto out_unlock;
1350     }
1351 
1352     /* Not loopback addresses on loopback should be preferred
1353        in this case. It is important that lo is the first interface
1354        in dev_base list.
1355      */
1356     for_each_netdev_rcu(net, dev) {
1357         if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1358             continue;
1359 
1360         in_dev = __in_dev_get_rcu(dev);
1361         if (!in_dev)
1362             continue;
1363 
1364         addr = in_dev_select_addr(in_dev, scope);
1365         if (addr)
1366             goto out_unlock;
1367     }
1368 out_unlock:
1369     rcu_read_unlock();
1370     return addr;
1371 }
1372 EXPORT_SYMBOL(inet_select_addr);
1373 
1374 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1375                   __be32 local, int scope)
1376 {
1377     unsigned char localnet_scope = RT_SCOPE_HOST;
1378     const struct in_ifaddr *ifa;
1379     __be32 addr = 0;
1380     int same = 0;
1381 
1382     if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383         localnet_scope = RT_SCOPE_LINK;
1384 
1385     in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386         unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1387 
1388         if (!addr &&
1389             (local == ifa->ifa_local || !local) &&
1390             min_scope <= scope) {
1391             addr = ifa->ifa_local;
1392             if (same)
1393                 break;
1394         }
1395         if (!same) {
1396             same = (!local || inet_ifa_match(local, ifa)) &&
1397                 (!dst || inet_ifa_match(dst, ifa));
1398             if (same && addr) {
1399                 if (local || !dst)
1400                     break;
1401                 /* Is the selected addr into dst subnet? */
1402                 if (inet_ifa_match(addr, ifa))
1403                     break;
1404                 /* No, then can we use new local src? */
1405                 if (min_scope <= scope) {
1406                     addr = ifa->ifa_local;
1407                     break;
1408                 }
1409                 /* search for large dst subnet for addr */
1410                 same = 0;
1411             }
1412         }
1413     }
1414 
1415     return same ? addr : 0;
1416 }
1417 
1418 /*
1419  * Confirm that local IP address exists using wildcards:
1420  * - net: netns to check, cannot be NULL
1421  * - in_dev: only on this interface, NULL=any interface
1422  * - dst: only in the same subnet as dst, 0=any dst
1423  * - local: address, 0=autoselect the local address
1424  * - scope: maximum allowed scope value for the local address
1425  */
1426 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1427              __be32 dst, __be32 local, int scope)
1428 {
1429     __be32 addr = 0;
1430     struct net_device *dev;
1431 
1432     if (in_dev)
1433         return confirm_addr_indev(in_dev, dst, local, scope);
1434 
1435     rcu_read_lock();
1436     for_each_netdev_rcu(net, dev) {
1437         in_dev = __in_dev_get_rcu(dev);
1438         if (in_dev) {
1439             addr = confirm_addr_indev(in_dev, dst, local, scope);
1440             if (addr)
1441                 break;
1442         }
1443     }
1444     rcu_read_unlock();
1445 
1446     return addr;
1447 }
1448 EXPORT_SYMBOL(inet_confirm_addr);
1449 
1450 /*
1451  *  Device notifier
1452  */
1453 
1454 int register_inetaddr_notifier(struct notifier_block *nb)
1455 {
1456     return blocking_notifier_chain_register(&inetaddr_chain, nb);
1457 }
1458 EXPORT_SYMBOL(register_inetaddr_notifier);
1459 
1460 int unregister_inetaddr_notifier(struct notifier_block *nb)
1461 {
1462     return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1463 }
1464 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1465 
1466 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1467 {
1468     return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1469 }
1470 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1471 
1472 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1473 {
1474     return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1475         nb);
1476 }
1477 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1478 
1479 /* Rename ifa_labels for a device name change. Make some effort to preserve
1480  * existing alias numbering and to create unique labels if possible.
1481 */
1482 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1483 {
1484     struct in_ifaddr *ifa;
1485     int named = 0;
1486 
1487     in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1488         char old[IFNAMSIZ], *dot;
1489 
1490         memcpy(old, ifa->ifa_label, IFNAMSIZ);
1491         memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492         if (named++ == 0)
1493             goto skip;
1494         dot = strchr(old, ':');
1495         if (!dot) {
1496             sprintf(old, ":%d", named);
1497             dot = old;
1498         }
1499         if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1500             strcat(ifa->ifa_label, dot);
1501         else
1502             strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1503 skip:
1504         rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1505     }
1506 }
1507 
1508 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1509                     struct in_device *in_dev)
1510 
1511 {
1512     const struct in_ifaddr *ifa;
1513 
1514     in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1515         arp_send(ARPOP_REQUEST, ETH_P_ARP,
1516              ifa->ifa_local, dev,
1517              ifa->ifa_local, NULL,
1518              dev->dev_addr, NULL);
1519     }
1520 }
1521 
1522 /* Called only under RTNL semaphore */
1523 
1524 static int inetdev_event(struct notifier_block *this, unsigned long event,
1525              void *ptr)
1526 {
1527     struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1528     struct in_device *in_dev = __in_dev_get_rtnl(dev);
1529 
1530     ASSERT_RTNL();
1531 
1532     if (!in_dev) {
1533         if (event == NETDEV_REGISTER) {
1534             in_dev = inetdev_init(dev);
1535             if (IS_ERR(in_dev))
1536                 return notifier_from_errno(PTR_ERR(in_dev));
1537             if (dev->flags & IFF_LOOPBACK) {
1538                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1539                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1540             }
1541         } else if (event == NETDEV_CHANGEMTU) {
1542             /* Re-enabling IP */
1543             if (inetdev_valid_mtu(dev->mtu))
1544                 in_dev = inetdev_init(dev);
1545         }
1546         goto out;
1547     }
1548 
1549     switch (event) {
1550     case NETDEV_REGISTER:
1551         pr_debug("%s: bug\n", __func__);
1552         RCU_INIT_POINTER(dev->ip_ptr, NULL);
1553         break;
1554     case NETDEV_UP:
1555         if (!inetdev_valid_mtu(dev->mtu))
1556             break;
1557         if (dev->flags & IFF_LOOPBACK) {
1558             struct in_ifaddr *ifa = inet_alloc_ifa();
1559 
1560             if (ifa) {
1561                 INIT_HLIST_NODE(&ifa->hash);
1562                 ifa->ifa_local =
1563                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1564                 ifa->ifa_prefixlen = 8;
1565                 ifa->ifa_mask = inet_make_mask(8);
1566                 in_dev_hold(in_dev);
1567                 ifa->ifa_dev = in_dev;
1568                 ifa->ifa_scope = RT_SCOPE_HOST;
1569                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1570                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1571                          INFINITY_LIFE_TIME);
1572                 ipv4_devconf_setall(in_dev);
1573                 neigh_parms_data_state_setall(in_dev->arp_parms);
1574                 inet_insert_ifa(ifa);
1575             }
1576         }
1577         ip_mc_up(in_dev);
1578         fallthrough;
1579     case NETDEV_CHANGEADDR:
1580         if (!IN_DEV_ARP_NOTIFY(in_dev))
1581             break;
1582         fallthrough;
1583     case NETDEV_NOTIFY_PEERS:
1584         /* Send gratuitous ARP to notify of link change */
1585         inetdev_send_gratuitous_arp(dev, in_dev);
1586         break;
1587     case NETDEV_DOWN:
1588         ip_mc_down(in_dev);
1589         break;
1590     case NETDEV_PRE_TYPE_CHANGE:
1591         ip_mc_unmap(in_dev);
1592         break;
1593     case NETDEV_POST_TYPE_CHANGE:
1594         ip_mc_remap(in_dev);
1595         break;
1596     case NETDEV_CHANGEMTU:
1597         if (inetdev_valid_mtu(dev->mtu))
1598             break;
1599         /* disable IP when MTU is not enough */
1600         fallthrough;
1601     case NETDEV_UNREGISTER:
1602         inetdev_destroy(in_dev);
1603         break;
1604     case NETDEV_CHANGENAME:
1605         /* Do not notify about label change, this event is
1606          * not interesting to applications using netlink.
1607          */
1608         inetdev_changename(dev, in_dev);
1609 
1610         devinet_sysctl_unregister(in_dev);
1611         devinet_sysctl_register(in_dev);
1612         break;
1613     }
1614 out:
1615     return NOTIFY_DONE;
1616 }
1617 
1618 static struct notifier_block ip_netdev_notifier = {
1619     .notifier_call = inetdev_event,
1620 };
1621 
1622 static size_t inet_nlmsg_size(void)
1623 {
1624     return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1625            + nla_total_size(4) /* IFA_ADDRESS */
1626            + nla_total_size(4) /* IFA_LOCAL */
1627            + nla_total_size(4) /* IFA_BROADCAST */
1628            + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1629            + nla_total_size(4)  /* IFA_FLAGS */
1630            + nla_total_size(1)  /* IFA_PROTO */
1631            + nla_total_size(4)  /* IFA_RT_PRIORITY */
1632            + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1633 }
1634 
1635 static inline u32 cstamp_delta(unsigned long cstamp)
1636 {
1637     return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1638 }
1639 
1640 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1641              unsigned long tstamp, u32 preferred, u32 valid)
1642 {
1643     struct ifa_cacheinfo ci;
1644 
1645     ci.cstamp = cstamp_delta(cstamp);
1646     ci.tstamp = cstamp_delta(tstamp);
1647     ci.ifa_prefered = preferred;
1648     ci.ifa_valid = valid;
1649 
1650     return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1651 }
1652 
1653 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1654                 struct inet_fill_args *args)
1655 {
1656     struct ifaddrmsg *ifm;
1657     struct nlmsghdr  *nlh;
1658     u32 preferred, valid;
1659 
1660     nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1661             args->flags);
1662     if (!nlh)
1663         return -EMSGSIZE;
1664 
1665     ifm = nlmsg_data(nlh);
1666     ifm->ifa_family = AF_INET;
1667     ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1668     ifm->ifa_flags = ifa->ifa_flags;
1669     ifm->ifa_scope = ifa->ifa_scope;
1670     ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1671 
1672     if (args->netnsid >= 0 &&
1673         nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1674         goto nla_put_failure;
1675 
1676     if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1677         preferred = ifa->ifa_preferred_lft;
1678         valid = ifa->ifa_valid_lft;
1679         if (preferred != INFINITY_LIFE_TIME) {
1680             long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1681 
1682             if (preferred > tval)
1683                 preferred -= tval;
1684             else
1685                 preferred = 0;
1686             if (valid != INFINITY_LIFE_TIME) {
1687                 if (valid > tval)
1688                     valid -= tval;
1689                 else
1690                     valid = 0;
1691             }
1692         }
1693     } else {
1694         preferred = INFINITY_LIFE_TIME;
1695         valid = INFINITY_LIFE_TIME;
1696     }
1697     if ((ifa->ifa_address &&
1698          nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1699         (ifa->ifa_local &&
1700          nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1701         (ifa->ifa_broadcast &&
1702          nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1703         (ifa->ifa_label[0] &&
1704          nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1705         (ifa->ifa_proto &&
1706          nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1707         nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1708         (ifa->ifa_rt_priority &&
1709          nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1710         put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1711               preferred, valid))
1712         goto nla_put_failure;
1713 
1714     nlmsg_end(skb, nlh);
1715     return 0;
1716 
1717 nla_put_failure:
1718     nlmsg_cancel(skb, nlh);
1719     return -EMSGSIZE;
1720 }
1721 
1722 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1723                       struct inet_fill_args *fillargs,
1724                       struct net **tgt_net, struct sock *sk,
1725                       struct netlink_callback *cb)
1726 {
1727     struct netlink_ext_ack *extack = cb->extack;
1728     struct nlattr *tb[IFA_MAX+1];
1729     struct ifaddrmsg *ifm;
1730     int err, i;
1731 
1732     if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1733         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1734         return -EINVAL;
1735     }
1736 
1737     ifm = nlmsg_data(nlh);
1738     if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1739         NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1740         return -EINVAL;
1741     }
1742 
1743     fillargs->ifindex = ifm->ifa_index;
1744     if (fillargs->ifindex) {
1745         cb->answer_flags |= NLM_F_DUMP_FILTERED;
1746         fillargs->flags |= NLM_F_DUMP_FILTERED;
1747     }
1748 
1749     err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1750                         ifa_ipv4_policy, extack);
1751     if (err < 0)
1752         return err;
1753 
1754     for (i = 0; i <= IFA_MAX; ++i) {
1755         if (!tb[i])
1756             continue;
1757 
1758         if (i == IFA_TARGET_NETNSID) {
1759             struct net *net;
1760 
1761             fillargs->netnsid = nla_get_s32(tb[i]);
1762 
1763             net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1764             if (IS_ERR(net)) {
1765                 fillargs->netnsid = -1;
1766                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1767                 return PTR_ERR(net);
1768             }
1769             *tgt_net = net;
1770         } else {
1771             NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1772             return -EINVAL;
1773         }
1774     }
1775 
1776     return 0;
1777 }
1778 
1779 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1780                 struct netlink_callback *cb, int s_ip_idx,
1781                 struct inet_fill_args *fillargs)
1782 {
1783     struct in_ifaddr *ifa;
1784     int ip_idx = 0;
1785     int err;
1786 
1787     in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1788         if (ip_idx < s_ip_idx) {
1789             ip_idx++;
1790             continue;
1791         }
1792         err = inet_fill_ifaddr(skb, ifa, fillargs);
1793         if (err < 0)
1794             goto done;
1795 
1796         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1797         ip_idx++;
1798     }
1799     err = 0;
1800 
1801 done:
1802     cb->args[2] = ip_idx;
1803 
1804     return err;
1805 }
1806 
1807 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1808 {
1809     const struct nlmsghdr *nlh = cb->nlh;
1810     struct inet_fill_args fillargs = {
1811         .portid = NETLINK_CB(cb->skb).portid,
1812         .seq = nlh->nlmsg_seq,
1813         .event = RTM_NEWADDR,
1814         .flags = NLM_F_MULTI,
1815         .netnsid = -1,
1816     };
1817     struct net *net = sock_net(skb->sk);
1818     struct net *tgt_net = net;
1819     int h, s_h;
1820     int idx, s_idx;
1821     int s_ip_idx;
1822     struct net_device *dev;
1823     struct in_device *in_dev;
1824     struct hlist_head *head;
1825     int err = 0;
1826 
1827     s_h = cb->args[0];
1828     s_idx = idx = cb->args[1];
1829     s_ip_idx = cb->args[2];
1830 
1831     if (cb->strict_check) {
1832         err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1833                          skb->sk, cb);
1834         if (err < 0)
1835             goto put_tgt_net;
1836 
1837         err = 0;
1838         if (fillargs.ifindex) {
1839             dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1840             if (!dev) {
1841                 err = -ENODEV;
1842                 goto put_tgt_net;
1843             }
1844 
1845             in_dev = __in_dev_get_rtnl(dev);
1846             if (in_dev) {
1847                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1848                                &fillargs);
1849             }
1850             goto put_tgt_net;
1851         }
1852     }
1853 
1854     for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1855         idx = 0;
1856         head = &tgt_net->dev_index_head[h];
1857         rcu_read_lock();
1858         cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1859               tgt_net->dev_base_seq;
1860         hlist_for_each_entry_rcu(dev, head, index_hlist) {
1861             if (idx < s_idx)
1862                 goto cont;
1863             if (h > s_h || idx > s_idx)
1864                 s_ip_idx = 0;
1865             in_dev = __in_dev_get_rcu(dev);
1866             if (!in_dev)
1867                 goto cont;
1868 
1869             err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1870                            &fillargs);
1871             if (err < 0) {
1872                 rcu_read_unlock();
1873                 goto done;
1874             }
1875 cont:
1876             idx++;
1877         }
1878         rcu_read_unlock();
1879     }
1880 
1881 done:
1882     cb->args[0] = h;
1883     cb->args[1] = idx;
1884 put_tgt_net:
1885     if (fillargs.netnsid >= 0)
1886         put_net(tgt_net);
1887 
1888     return skb->len ? : err;
1889 }
1890 
1891 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1892               u32 portid)
1893 {
1894     struct inet_fill_args fillargs = {
1895         .portid = portid,
1896         .seq = nlh ? nlh->nlmsg_seq : 0,
1897         .event = event,
1898         .flags = 0,
1899         .netnsid = -1,
1900     };
1901     struct sk_buff *skb;
1902     int err = -ENOBUFS;
1903     struct net *net;
1904 
1905     net = dev_net(ifa->ifa_dev->dev);
1906     skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1907     if (!skb)
1908         goto errout;
1909 
1910     err = inet_fill_ifaddr(skb, ifa, &fillargs);
1911     if (err < 0) {
1912         /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1913         WARN_ON(err == -EMSGSIZE);
1914         kfree_skb(skb);
1915         goto errout;
1916     }
1917     rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1918     return;
1919 errout:
1920     if (err < 0)
1921         rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1922 }
1923 
1924 static size_t inet_get_link_af_size(const struct net_device *dev,
1925                     u32 ext_filter_mask)
1926 {
1927     struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1928 
1929     if (!in_dev)
1930         return 0;
1931 
1932     return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1933 }
1934 
1935 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1936                  u32 ext_filter_mask)
1937 {
1938     struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1939     struct nlattr *nla;
1940     int i;
1941 
1942     if (!in_dev)
1943         return -ENODATA;
1944 
1945     nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1946     if (!nla)
1947         return -EMSGSIZE;
1948 
1949     for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1950         ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1951 
1952     return 0;
1953 }
1954 
1955 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1956     [IFLA_INET_CONF]    = { .type = NLA_NESTED },
1957 };
1958 
1959 static int inet_validate_link_af(const struct net_device *dev,
1960                  const struct nlattr *nla,
1961                  struct netlink_ext_ack *extack)
1962 {
1963     struct nlattr *a, *tb[IFLA_INET_MAX+1];
1964     int err, rem;
1965 
1966     if (dev && !__in_dev_get_rtnl(dev))
1967         return -EAFNOSUPPORT;
1968 
1969     err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1970                       inet_af_policy, extack);
1971     if (err < 0)
1972         return err;
1973 
1974     if (tb[IFLA_INET_CONF]) {
1975         nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1976             int cfgid = nla_type(a);
1977 
1978             if (nla_len(a) < 4)
1979                 return -EINVAL;
1980 
1981             if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1982                 return -EINVAL;
1983         }
1984     }
1985 
1986     return 0;
1987 }
1988 
1989 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1990                 struct netlink_ext_ack *extack)
1991 {
1992     struct in_device *in_dev = __in_dev_get_rtnl(dev);
1993     struct nlattr *a, *tb[IFLA_INET_MAX+1];
1994     int rem;
1995 
1996     if (!in_dev)
1997         return -EAFNOSUPPORT;
1998 
1999     if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2000         return -EINVAL;
2001 
2002     if (tb[IFLA_INET_CONF]) {
2003         nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2004             ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2005     }
2006 
2007     return 0;
2008 }
2009 
2010 static int inet_netconf_msgsize_devconf(int type)
2011 {
2012     int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2013            + nla_total_size(4); /* NETCONFA_IFINDEX */
2014     bool all = false;
2015 
2016     if (type == NETCONFA_ALL)
2017         all = true;
2018 
2019     if (all || type == NETCONFA_FORWARDING)
2020         size += nla_total_size(4);
2021     if (all || type == NETCONFA_RP_FILTER)
2022         size += nla_total_size(4);
2023     if (all || type == NETCONFA_MC_FORWARDING)
2024         size += nla_total_size(4);
2025     if (all || type == NETCONFA_BC_FORWARDING)
2026         size += nla_total_size(4);
2027     if (all || type == NETCONFA_PROXY_NEIGH)
2028         size += nla_total_size(4);
2029     if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2030         size += nla_total_size(4);
2031 
2032     return size;
2033 }
2034 
2035 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2036                      struct ipv4_devconf *devconf, u32 portid,
2037                      u32 seq, int event, unsigned int flags,
2038                      int type)
2039 {
2040     struct nlmsghdr  *nlh;
2041     struct netconfmsg *ncm;
2042     bool all = false;
2043 
2044     nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2045             flags);
2046     if (!nlh)
2047         return -EMSGSIZE;
2048 
2049     if (type == NETCONFA_ALL)
2050         all = true;
2051 
2052     ncm = nlmsg_data(nlh);
2053     ncm->ncm_family = AF_INET;
2054 
2055     if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2056         goto nla_put_failure;
2057 
2058     if (!devconf)
2059         goto out;
2060 
2061     if ((all || type == NETCONFA_FORWARDING) &&
2062         nla_put_s32(skb, NETCONFA_FORWARDING,
2063             IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2064         goto nla_put_failure;
2065     if ((all || type == NETCONFA_RP_FILTER) &&
2066         nla_put_s32(skb, NETCONFA_RP_FILTER,
2067             IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2068         goto nla_put_failure;
2069     if ((all || type == NETCONFA_MC_FORWARDING) &&
2070         nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2071             IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2072         goto nla_put_failure;
2073     if ((all || type == NETCONFA_BC_FORWARDING) &&
2074         nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2075             IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2076         goto nla_put_failure;
2077     if ((all || type == NETCONFA_PROXY_NEIGH) &&
2078         nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2079             IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2080         goto nla_put_failure;
2081     if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2082         nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2083             IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2084         goto nla_put_failure;
2085 
2086 out:
2087     nlmsg_end(skb, nlh);
2088     return 0;
2089 
2090 nla_put_failure:
2091     nlmsg_cancel(skb, nlh);
2092     return -EMSGSIZE;
2093 }
2094 
2095 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2096                  int ifindex, struct ipv4_devconf *devconf)
2097 {
2098     struct sk_buff *skb;
2099     int err = -ENOBUFS;
2100 
2101     skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2102     if (!skb)
2103         goto errout;
2104 
2105     err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2106                     event, 0, type);
2107     if (err < 0) {
2108         /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2109         WARN_ON(err == -EMSGSIZE);
2110         kfree_skb(skb);
2111         goto errout;
2112     }
2113     rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2114     return;
2115 errout:
2116     if (err < 0)
2117         rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2118 }
2119 
2120 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2121     [NETCONFA_IFINDEX]  = { .len = sizeof(int) },
2122     [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2123     [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2124     [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2125     [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2126 };
2127 
2128 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2129                       const struct nlmsghdr *nlh,
2130                       struct nlattr **tb,
2131                       struct netlink_ext_ack *extack)
2132 {
2133     int i, err;
2134 
2135     if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2136         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2137         return -EINVAL;
2138     }
2139 
2140     if (!netlink_strict_get_check(skb))
2141         return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2142                           tb, NETCONFA_MAX,
2143                           devconf_ipv4_policy, extack);
2144 
2145     err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2146                         tb, NETCONFA_MAX,
2147                         devconf_ipv4_policy, extack);
2148     if (err)
2149         return err;
2150 
2151     for (i = 0; i <= NETCONFA_MAX; i++) {
2152         if (!tb[i])
2153             continue;
2154 
2155         switch (i) {
2156         case NETCONFA_IFINDEX:
2157             break;
2158         default:
2159             NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2160             return -EINVAL;
2161         }
2162     }
2163 
2164     return 0;
2165 }
2166 
2167 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2168                     struct nlmsghdr *nlh,
2169                     struct netlink_ext_ack *extack)
2170 {
2171     struct net *net = sock_net(in_skb->sk);
2172     struct nlattr *tb[NETCONFA_MAX+1];
2173     struct sk_buff *skb;
2174     struct ipv4_devconf *devconf;
2175     struct in_device *in_dev;
2176     struct net_device *dev;
2177     int ifindex;
2178     int err;
2179 
2180     err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2181     if (err)
2182         goto errout;
2183 
2184     err = -EINVAL;
2185     if (!tb[NETCONFA_IFINDEX])
2186         goto errout;
2187 
2188     ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2189     switch (ifindex) {
2190     case NETCONFA_IFINDEX_ALL:
2191         devconf = net->ipv4.devconf_all;
2192         break;
2193     case NETCONFA_IFINDEX_DEFAULT:
2194         devconf = net->ipv4.devconf_dflt;
2195         break;
2196     default:
2197         dev = __dev_get_by_index(net, ifindex);
2198         if (!dev)
2199             goto errout;
2200         in_dev = __in_dev_get_rtnl(dev);
2201         if (!in_dev)
2202             goto errout;
2203         devconf = &in_dev->cnf;
2204         break;
2205     }
2206 
2207     err = -ENOBUFS;
2208     skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2209     if (!skb)
2210         goto errout;
2211 
2212     err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2213                     NETLINK_CB(in_skb).portid,
2214                     nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2215                     NETCONFA_ALL);
2216     if (err < 0) {
2217         /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218         WARN_ON(err == -EMSGSIZE);
2219         kfree_skb(skb);
2220         goto errout;
2221     }
2222     err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2223 errout:
2224     return err;
2225 }
2226 
2227 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2228                      struct netlink_callback *cb)
2229 {
2230     const struct nlmsghdr *nlh = cb->nlh;
2231     struct net *net = sock_net(skb->sk);
2232     int h, s_h;
2233     int idx, s_idx;
2234     struct net_device *dev;
2235     struct in_device *in_dev;
2236     struct hlist_head *head;
2237 
2238     if (cb->strict_check) {
2239         struct netlink_ext_ack *extack = cb->extack;
2240         struct netconfmsg *ncm;
2241 
2242         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2243             NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2244             return -EINVAL;
2245         }
2246 
2247         if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2248             NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2249             return -EINVAL;
2250         }
2251     }
2252 
2253     s_h = cb->args[0];
2254     s_idx = idx = cb->args[1];
2255 
2256     for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2257         idx = 0;
2258         head = &net->dev_index_head[h];
2259         rcu_read_lock();
2260         cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2261               net->dev_base_seq;
2262         hlist_for_each_entry_rcu(dev, head, index_hlist) {
2263             if (idx < s_idx)
2264                 goto cont;
2265             in_dev = __in_dev_get_rcu(dev);
2266             if (!in_dev)
2267                 goto cont;
2268 
2269             if (inet_netconf_fill_devconf(skb, dev->ifindex,
2270                               &in_dev->cnf,
2271                               NETLINK_CB(cb->skb).portid,
2272                               nlh->nlmsg_seq,
2273                               RTM_NEWNETCONF,
2274                               NLM_F_MULTI,
2275                               NETCONFA_ALL) < 0) {
2276                 rcu_read_unlock();
2277                 goto done;
2278             }
2279             nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2280 cont:
2281             idx++;
2282         }
2283         rcu_read_unlock();
2284     }
2285     if (h == NETDEV_HASHENTRIES) {
2286         if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2287                           net->ipv4.devconf_all,
2288                           NETLINK_CB(cb->skb).portid,
2289                           nlh->nlmsg_seq,
2290                           RTM_NEWNETCONF, NLM_F_MULTI,
2291                           NETCONFA_ALL) < 0)
2292             goto done;
2293         else
2294             h++;
2295     }
2296     if (h == NETDEV_HASHENTRIES + 1) {
2297         if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2298                           net->ipv4.devconf_dflt,
2299                           NETLINK_CB(cb->skb).portid,
2300                           nlh->nlmsg_seq,
2301                           RTM_NEWNETCONF, NLM_F_MULTI,
2302                           NETCONFA_ALL) < 0)
2303             goto done;
2304         else
2305             h++;
2306     }
2307 done:
2308     cb->args[0] = h;
2309     cb->args[1] = idx;
2310 
2311     return skb->len;
2312 }
2313 
2314 #ifdef CONFIG_SYSCTL
2315 
2316 static void devinet_copy_dflt_conf(struct net *net, int i)
2317 {
2318     struct net_device *dev;
2319 
2320     rcu_read_lock();
2321     for_each_netdev_rcu(net, dev) {
2322         struct in_device *in_dev;
2323 
2324         in_dev = __in_dev_get_rcu(dev);
2325         if (in_dev && !test_bit(i, in_dev->cnf.state))
2326             in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2327     }
2328     rcu_read_unlock();
2329 }
2330 
2331 /* called with RTNL locked */
2332 static void inet_forward_change(struct net *net)
2333 {
2334     struct net_device *dev;
2335     int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2336 
2337     IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2338     IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2339     inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2340                     NETCONFA_FORWARDING,
2341                     NETCONFA_IFINDEX_ALL,
2342                     net->ipv4.devconf_all);
2343     inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344                     NETCONFA_FORWARDING,
2345                     NETCONFA_IFINDEX_DEFAULT,
2346                     net->ipv4.devconf_dflt);
2347 
2348     for_each_netdev(net, dev) {
2349         struct in_device *in_dev;
2350 
2351         if (on)
2352             dev_disable_lro(dev);
2353 
2354         in_dev = __in_dev_get_rtnl(dev);
2355         if (in_dev) {
2356             IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2357             inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358                             NETCONFA_FORWARDING,
2359                             dev->ifindex, &in_dev->cnf);
2360         }
2361     }
2362 }
2363 
2364 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2365 {
2366     if (cnf == net->ipv4.devconf_dflt)
2367         return NETCONFA_IFINDEX_DEFAULT;
2368     else if (cnf == net->ipv4.devconf_all)
2369         return NETCONFA_IFINDEX_ALL;
2370     else {
2371         struct in_device *idev
2372             = container_of(cnf, struct in_device, cnf);
2373         return idev->dev->ifindex;
2374     }
2375 }
2376 
2377 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2378                  void *buffer, size_t *lenp, loff_t *ppos)
2379 {
2380     int old_value = *(int *)ctl->data;
2381     int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382     int new_value = *(int *)ctl->data;
2383 
2384     if (write) {
2385         struct ipv4_devconf *cnf = ctl->extra1;
2386         struct net *net = ctl->extra2;
2387         int i = (int *)ctl->data - cnf->data;
2388         int ifindex;
2389 
2390         set_bit(i, cnf->state);
2391 
2392         if (cnf == net->ipv4.devconf_dflt)
2393             devinet_copy_dflt_conf(net, i);
2394         if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2395             i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2396             if ((new_value == 0) && (old_value != 0))
2397                 rt_cache_flush(net);
2398 
2399         if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2400             new_value != old_value)
2401             rt_cache_flush(net);
2402 
2403         if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2404             new_value != old_value) {
2405             ifindex = devinet_conf_ifindex(net, cnf);
2406             inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2407                             NETCONFA_RP_FILTER,
2408                             ifindex, cnf);
2409         }
2410         if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2411             new_value != old_value) {
2412             ifindex = devinet_conf_ifindex(net, cnf);
2413             inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2414                             NETCONFA_PROXY_NEIGH,
2415                             ifindex, cnf);
2416         }
2417         if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2418             new_value != old_value) {
2419             ifindex = devinet_conf_ifindex(net, cnf);
2420             inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2421                             NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2422                             ifindex, cnf);
2423         }
2424     }
2425 
2426     return ret;
2427 }
2428 
2429 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2430                   void *buffer, size_t *lenp, loff_t *ppos)
2431 {
2432     int *valp = ctl->data;
2433     int val = *valp;
2434     loff_t pos = *ppos;
2435     struct net *net = ctl->extra2;
2436     int ret;
2437 
2438     if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2439         return -EPERM;
2440 
2441     ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2442 
2443     if (write && *valp != val) {
2444         if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2445             if (!rtnl_trylock()) {
2446                 /* Restore the original values before restarting */
2447                 *valp = val;
2448                 *ppos = pos;
2449                 return restart_syscall();
2450             }
2451             if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2452                 inet_forward_change(net);
2453             } else {
2454                 struct ipv4_devconf *cnf = ctl->extra1;
2455                 struct in_device *idev =
2456                     container_of(cnf, struct in_device, cnf);
2457                 if (*valp)
2458                     dev_disable_lro(idev->dev);
2459                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2460                                 NETCONFA_FORWARDING,
2461                                 idev->dev->ifindex,
2462                                 cnf);
2463             }
2464             rtnl_unlock();
2465             rt_cache_flush(net);
2466         } else
2467             inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2468                             NETCONFA_FORWARDING,
2469                             NETCONFA_IFINDEX_DEFAULT,
2470                             net->ipv4.devconf_dflt);
2471     }
2472 
2473     return ret;
2474 }
2475 
2476 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2477                 void *buffer, size_t *lenp, loff_t *ppos)
2478 {
2479     int *valp = ctl->data;
2480     int val = *valp;
2481     int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2482     struct net *net = ctl->extra2;
2483 
2484     if (write && *valp != val)
2485         rt_cache_flush(net);
2486 
2487     return ret;
2488 }
2489 
2490 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2491     { \
2492         .procname   = name, \
2493         .data       = ipv4_devconf.data + \
2494                   IPV4_DEVCONF_ ## attr - 1, \
2495         .maxlen     = sizeof(int), \
2496         .mode       = mval, \
2497         .proc_handler   = proc, \
2498         .extra1     = &ipv4_devconf, \
2499     }
2500 
2501 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2502     DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2503 
2504 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2505     DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2506 
2507 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2508     DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2509 
2510 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2511     DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2512 
2513 static struct devinet_sysctl_table {
2514     struct ctl_table_header *sysctl_header;
2515     struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2516 } devinet_sysctl = {
2517     .devinet_vars = {
2518         DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2519                          devinet_sysctl_forward),
2520         DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2521         DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2522 
2523         DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2524         DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2525         DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2526         DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2527         DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2528         DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2529                     "accept_source_route"),
2530         DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2531         DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2532         DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2533         DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2534         DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2535         DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2536         DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2537         DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2538         DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2539         DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2540         DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2541         DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2542         DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2543                     "arp_evict_nocarrier"),
2544         DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2545         DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2546                     "force_igmp_version"),
2547         DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2548                     "igmpv2_unsolicited_report_interval"),
2549         DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2550                     "igmpv3_unsolicited_report_interval"),
2551         DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2552                     "ignore_routes_with_linkdown"),
2553         DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2554                     "drop_gratuitous_arp"),
2555 
2556         DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2557         DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2558         DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2559                           "promote_secondaries"),
2560         DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2561                           "route_localnet"),
2562         DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2563                           "drop_unicast_in_l2_multicast"),
2564     },
2565 };
2566 
2567 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2568                      int ifindex, struct ipv4_devconf *p)
2569 {
2570     int i;
2571     struct devinet_sysctl_table *t;
2572     char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2573 
2574     t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2575     if (!t)
2576         goto out;
2577 
2578     for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2579         t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2580         t->devinet_vars[i].extra1 = p;
2581         t->devinet_vars[i].extra2 = net;
2582     }
2583 
2584     snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2585 
2586     t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2587     if (!t->sysctl_header)
2588         goto free;
2589 
2590     p->sysctl = t;
2591 
2592     inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2593                     ifindex, p);
2594     return 0;
2595 
2596 free:
2597     kfree(t);
2598 out:
2599     return -ENOMEM;
2600 }
2601 
2602 static void __devinet_sysctl_unregister(struct net *net,
2603                     struct ipv4_devconf *cnf, int ifindex)
2604 {
2605     struct devinet_sysctl_table *t = cnf->sysctl;
2606 
2607     if (t) {
2608         cnf->sysctl = NULL;
2609         unregister_net_sysctl_table(t->sysctl_header);
2610         kfree(t);
2611     }
2612 
2613     inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2614 }
2615 
2616 static int devinet_sysctl_register(struct in_device *idev)
2617 {
2618     int err;
2619 
2620     if (!sysctl_dev_name_is_allowed(idev->dev->name))
2621         return -EINVAL;
2622 
2623     err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2624     if (err)
2625         return err;
2626     err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2627                     idev->dev->ifindex, &idev->cnf);
2628     if (err)
2629         neigh_sysctl_unregister(idev->arp_parms);
2630     return err;
2631 }
2632 
2633 static void devinet_sysctl_unregister(struct in_device *idev)
2634 {
2635     struct net *net = dev_net(idev->dev);
2636 
2637     __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2638     neigh_sysctl_unregister(idev->arp_parms);
2639 }
2640 
2641 static struct ctl_table ctl_forward_entry[] = {
2642     {
2643         .procname   = "ip_forward",
2644         .data       = &ipv4_devconf.data[
2645                     IPV4_DEVCONF_FORWARDING - 1],
2646         .maxlen     = sizeof(int),
2647         .mode       = 0644,
2648         .proc_handler   = devinet_sysctl_forward,
2649         .extra1     = &ipv4_devconf,
2650         .extra2     = &init_net,
2651     },
2652     { },
2653 };
2654 #endif
2655 
2656 static __net_init int devinet_init_net(struct net *net)
2657 {
2658     int err;
2659     struct ipv4_devconf *all, *dflt;
2660 #ifdef CONFIG_SYSCTL
2661     struct ctl_table *tbl;
2662     struct ctl_table_header *forw_hdr;
2663 #endif
2664 
2665     err = -ENOMEM;
2666     all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2667     if (!all)
2668         goto err_alloc_all;
2669 
2670     dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2671     if (!dflt)
2672         goto err_alloc_dflt;
2673 
2674 #ifdef CONFIG_SYSCTL
2675     tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2676     if (!tbl)
2677         goto err_alloc_ctl;
2678 
2679     tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2680     tbl[0].extra1 = all;
2681     tbl[0].extra2 = net;
2682 #endif
2683 
2684     if (!net_eq(net, &init_net)) {
2685         switch (net_inherit_devconf()) {
2686         case 3:
2687             /* copy from the current netns */
2688             memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2689                    sizeof(ipv4_devconf));
2690             memcpy(dflt,
2691                    current->nsproxy->net_ns->ipv4.devconf_dflt,
2692                    sizeof(ipv4_devconf_dflt));
2693             break;
2694         case 0:
2695         case 1:
2696             /* copy from init_net */
2697             memcpy(all, init_net.ipv4.devconf_all,
2698                    sizeof(ipv4_devconf));
2699             memcpy(dflt, init_net.ipv4.devconf_dflt,
2700                    sizeof(ipv4_devconf_dflt));
2701             break;
2702         case 2:
2703             /* use compiled values */
2704             break;
2705         }
2706     }
2707 
2708 #ifdef CONFIG_SYSCTL
2709     err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2710     if (err < 0)
2711         goto err_reg_all;
2712 
2713     err = __devinet_sysctl_register(net, "default",
2714                     NETCONFA_IFINDEX_DEFAULT, dflt);
2715     if (err < 0)
2716         goto err_reg_dflt;
2717 
2718     err = -ENOMEM;
2719     forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2720     if (!forw_hdr)
2721         goto err_reg_ctl;
2722     net->ipv4.forw_hdr = forw_hdr;
2723 #endif
2724 
2725     net->ipv4.devconf_all = all;
2726     net->ipv4.devconf_dflt = dflt;
2727     return 0;
2728 
2729 #ifdef CONFIG_SYSCTL
2730 err_reg_ctl:
2731     __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2732 err_reg_dflt:
2733     __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2734 err_reg_all:
2735     kfree(tbl);
2736 err_alloc_ctl:
2737 #endif
2738     kfree(dflt);
2739 err_alloc_dflt:
2740     kfree(all);
2741 err_alloc_all:
2742     return err;
2743 }
2744 
2745 static __net_exit void devinet_exit_net(struct net *net)
2746 {
2747 #ifdef CONFIG_SYSCTL
2748     struct ctl_table *tbl;
2749 
2750     tbl = net->ipv4.forw_hdr->ctl_table_arg;
2751     unregister_net_sysctl_table(net->ipv4.forw_hdr);
2752     __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2753                     NETCONFA_IFINDEX_DEFAULT);
2754     __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2755                     NETCONFA_IFINDEX_ALL);
2756     kfree(tbl);
2757 #endif
2758     kfree(net->ipv4.devconf_dflt);
2759     kfree(net->ipv4.devconf_all);
2760 }
2761 
2762 static __net_initdata struct pernet_operations devinet_ops = {
2763     .init = devinet_init_net,
2764     .exit = devinet_exit_net,
2765 };
2766 
2767 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2768     .family       = AF_INET,
2769     .fill_link_af     = inet_fill_link_af,
2770     .get_link_af_size = inet_get_link_af_size,
2771     .validate_link_af = inet_validate_link_af,
2772     .set_link_af      = inet_set_link_af,
2773 };
2774 
2775 void __init devinet_init(void)
2776 {
2777     int i;
2778 
2779     for (i = 0; i < IN4_ADDR_HSIZE; i++)
2780         INIT_HLIST_HEAD(&inet_addr_lst[i]);
2781 
2782     register_pernet_subsys(&devinet_ops);
2783     register_netdevice_notifier(&ip_netdev_notifier);
2784 
2785     queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2786 
2787     rtnl_af_register(&inet_af_ops);
2788 
2789     rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2790     rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2791     rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2792     rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2793               inet_netconf_dump_devconf, 0);
2794 }