Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  Anycast support for IPv6
0004  *  Linux INET6 implementation
0005  *
0006  *  Authors:
0007  *  David L Stevens (dlstevens@us.ibm.com)
0008  *
0009  *  based heavily on net/ipv6/mcast.c
0010  */
0011 
0012 #include <linux/capability.h>
0013 #include <linux/module.h>
0014 #include <linux/errno.h>
0015 #include <linux/types.h>
0016 #include <linux/random.h>
0017 #include <linux/string.h>
0018 #include <linux/socket.h>
0019 #include <linux/sockios.h>
0020 #include <linux/net.h>
0021 #include <linux/in6.h>
0022 #include <linux/netdevice.h>
0023 #include <linux/if_arp.h>
0024 #include <linux/route.h>
0025 #include <linux/init.h>
0026 #include <linux/proc_fs.h>
0027 #include <linux/seq_file.h>
0028 #include <linux/slab.h>
0029 
0030 #include <net/net_namespace.h>
0031 #include <net/sock.h>
0032 #include <net/snmp.h>
0033 
0034 #include <net/ipv6.h>
0035 #include <net/protocol.h>
0036 #include <net/if_inet6.h>
0037 #include <net/ndisc.h>
0038 #include <net/addrconf.h>
0039 #include <net/ip6_route.h>
0040 
0041 #include <net/checksum.h>
0042 
0043 #define IN6_ADDR_HSIZE_SHIFT    8
0044 #define IN6_ADDR_HSIZE      BIT(IN6_ADDR_HSIZE_SHIFT)
0045 /*  anycast address hash table
0046  */
0047 static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
0048 static DEFINE_SPINLOCK(acaddr_hash_lock);
0049 
0050 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
0051 
0052 static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
0053 {
0054     u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
0055 
0056     return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
0057 }
0058 
0059 /*
0060  *  socket join an anycast group
0061  */
0062 
0063 int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
0064 {
0065     struct ipv6_pinfo *np = inet6_sk(sk);
0066     struct net_device *dev = NULL;
0067     struct inet6_dev *idev;
0068     struct ipv6_ac_socklist *pac;
0069     struct net *net = sock_net(sk);
0070     int ishost = !net->ipv6.devconf_all->forwarding;
0071     int err = 0;
0072 
0073     ASSERT_RTNL();
0074 
0075     if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
0076         return -EPERM;
0077     if (ipv6_addr_is_multicast(addr))
0078         return -EINVAL;
0079 
0080     if (ifindex)
0081         dev = __dev_get_by_index(net, ifindex);
0082 
0083     if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
0084         return -EINVAL;
0085 
0086     pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
0087     if (!pac)
0088         return -ENOMEM;
0089     pac->acl_next = NULL;
0090     pac->acl_addr = *addr;
0091 
0092     if (ifindex == 0) {
0093         struct rt6_info *rt;
0094 
0095         rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
0096         if (rt) {
0097             dev = rt->dst.dev;
0098             ip6_rt_put(rt);
0099         } else if (ishost) {
0100             err = -EADDRNOTAVAIL;
0101             goto error;
0102         } else {
0103             /* router, no matching interface: just pick one */
0104             dev = __dev_get_by_flags(net, IFF_UP,
0105                          IFF_UP | IFF_LOOPBACK);
0106         }
0107     }
0108 
0109     if (!dev) {
0110         err = -ENODEV;
0111         goto error;
0112     }
0113 
0114     idev = __in6_dev_get(dev);
0115     if (!idev) {
0116         if (ifindex)
0117             err = -ENODEV;
0118         else
0119             err = -EADDRNOTAVAIL;
0120         goto error;
0121     }
0122     /* reset ishost, now that we have a specific device */
0123     ishost = !idev->cnf.forwarding;
0124 
0125     pac->acl_ifindex = dev->ifindex;
0126 
0127     /* XXX
0128      * For hosts, allow link-local or matching prefix anycasts.
0129      * This obviates the need for propagating anycast routes while
0130      * still allowing some non-router anycast participation.
0131      */
0132     if (!ipv6_chk_prefix(addr, dev)) {
0133         if (ishost)
0134             err = -EADDRNOTAVAIL;
0135         if (err)
0136             goto error;
0137     }
0138 
0139     err = __ipv6_dev_ac_inc(idev, addr);
0140     if (!err) {
0141         pac->acl_next = np->ipv6_ac_list;
0142         np->ipv6_ac_list = pac;
0143         pac = NULL;
0144     }
0145 
0146 error:
0147     if (pac)
0148         sock_kfree_s(sk, pac, sizeof(*pac));
0149     return err;
0150 }
0151 
0152 /*
0153  *  socket leave an anycast group
0154  */
0155 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
0156 {
0157     struct ipv6_pinfo *np = inet6_sk(sk);
0158     struct net_device *dev;
0159     struct ipv6_ac_socklist *pac, *prev_pac;
0160     struct net *net = sock_net(sk);
0161 
0162     ASSERT_RTNL();
0163 
0164     prev_pac = NULL;
0165     for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
0166         if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
0167              ipv6_addr_equal(&pac->acl_addr, addr))
0168             break;
0169         prev_pac = pac;
0170     }
0171     if (!pac)
0172         return -ENOENT;
0173     if (prev_pac)
0174         prev_pac->acl_next = pac->acl_next;
0175     else
0176         np->ipv6_ac_list = pac->acl_next;
0177 
0178     dev = __dev_get_by_index(net, pac->acl_ifindex);
0179     if (dev)
0180         ipv6_dev_ac_dec(dev, &pac->acl_addr);
0181 
0182     sock_kfree_s(sk, pac, sizeof(*pac));
0183     return 0;
0184 }
0185 
0186 void __ipv6_sock_ac_close(struct sock *sk)
0187 {
0188     struct ipv6_pinfo *np = inet6_sk(sk);
0189     struct net_device *dev = NULL;
0190     struct ipv6_ac_socklist *pac;
0191     struct net *net = sock_net(sk);
0192     int prev_index;
0193 
0194     ASSERT_RTNL();
0195     pac = np->ipv6_ac_list;
0196     np->ipv6_ac_list = NULL;
0197 
0198     prev_index = 0;
0199     while (pac) {
0200         struct ipv6_ac_socklist *next = pac->acl_next;
0201 
0202         if (pac->acl_ifindex != prev_index) {
0203             dev = __dev_get_by_index(net, pac->acl_ifindex);
0204             prev_index = pac->acl_ifindex;
0205         }
0206         if (dev)
0207             ipv6_dev_ac_dec(dev, &pac->acl_addr);
0208         sock_kfree_s(sk, pac, sizeof(*pac));
0209         pac = next;
0210     }
0211 }
0212 
0213 void ipv6_sock_ac_close(struct sock *sk)
0214 {
0215     struct ipv6_pinfo *np = inet6_sk(sk);
0216 
0217     if (!np->ipv6_ac_list)
0218         return;
0219     rtnl_lock();
0220     __ipv6_sock_ac_close(sk);
0221     rtnl_unlock();
0222 }
0223 
0224 static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
0225 {
0226     unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
0227 
0228     spin_lock(&acaddr_hash_lock);
0229     hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
0230     spin_unlock(&acaddr_hash_lock);
0231 }
0232 
0233 static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
0234 {
0235     spin_lock(&acaddr_hash_lock);
0236     hlist_del_init_rcu(&aca->aca_addr_lst);
0237     spin_unlock(&acaddr_hash_lock);
0238 }
0239 
0240 static void aca_get(struct ifacaddr6 *aca)
0241 {
0242     refcount_inc(&aca->aca_refcnt);
0243 }
0244 
0245 static void aca_free_rcu(struct rcu_head *h)
0246 {
0247     struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
0248 
0249     fib6_info_release(aca->aca_rt);
0250     kfree(aca);
0251 }
0252 
0253 static void aca_put(struct ifacaddr6 *ac)
0254 {
0255     if (refcount_dec_and_test(&ac->aca_refcnt)) {
0256         call_rcu(&ac->rcu, aca_free_rcu);
0257     }
0258 }
0259 
0260 static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
0261                    const struct in6_addr *addr)
0262 {
0263     struct ifacaddr6 *aca;
0264 
0265     aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
0266     if (!aca)
0267         return NULL;
0268 
0269     aca->aca_addr = *addr;
0270     fib6_info_hold(f6i);
0271     aca->aca_rt = f6i;
0272     INIT_HLIST_NODE(&aca->aca_addr_lst);
0273     aca->aca_users = 1;
0274     /* aca_tstamp should be updated upon changes */
0275     aca->aca_cstamp = aca->aca_tstamp = jiffies;
0276     refcount_set(&aca->aca_refcnt, 1);
0277 
0278     return aca;
0279 }
0280 
0281 /*
0282  *  device anycast group inc (add if not found)
0283  */
0284 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
0285 {
0286     struct ifacaddr6 *aca;
0287     struct fib6_info *f6i;
0288     struct net *net;
0289     int err;
0290 
0291     ASSERT_RTNL();
0292 
0293     write_lock_bh(&idev->lock);
0294     if (idev->dead) {
0295         err = -ENODEV;
0296         goto out;
0297     }
0298 
0299     for (aca = idev->ac_list; aca; aca = aca->aca_next) {
0300         if (ipv6_addr_equal(&aca->aca_addr, addr)) {
0301             aca->aca_users++;
0302             err = 0;
0303             goto out;
0304         }
0305     }
0306 
0307     net = dev_net(idev->dev);
0308     f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
0309     if (IS_ERR(f6i)) {
0310         err = PTR_ERR(f6i);
0311         goto out;
0312     }
0313     aca = aca_alloc(f6i, addr);
0314     if (!aca) {
0315         fib6_info_release(f6i);
0316         err = -ENOMEM;
0317         goto out;
0318     }
0319 
0320     aca->aca_next = idev->ac_list;
0321     idev->ac_list = aca;
0322 
0323     /* Hold this for addrconf_join_solict() below before we unlock,
0324      * it is already exposed via idev->ac_list.
0325      */
0326     aca_get(aca);
0327     write_unlock_bh(&idev->lock);
0328 
0329     ipv6_add_acaddr_hash(net, aca);
0330 
0331     ip6_ins_rt(net, f6i);
0332 
0333     addrconf_join_solict(idev->dev, &aca->aca_addr);
0334 
0335     aca_put(aca);
0336     return 0;
0337 out:
0338     write_unlock_bh(&idev->lock);
0339     return err;
0340 }
0341 
0342 /*
0343  *  device anycast group decrement
0344  */
0345 int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
0346 {
0347     struct ifacaddr6 *aca, *prev_aca;
0348 
0349     ASSERT_RTNL();
0350 
0351     write_lock_bh(&idev->lock);
0352     prev_aca = NULL;
0353     for (aca = idev->ac_list; aca; aca = aca->aca_next) {
0354         if (ipv6_addr_equal(&aca->aca_addr, addr))
0355             break;
0356         prev_aca = aca;
0357     }
0358     if (!aca) {
0359         write_unlock_bh(&idev->lock);
0360         return -ENOENT;
0361     }
0362     if (--aca->aca_users > 0) {
0363         write_unlock_bh(&idev->lock);
0364         return 0;
0365     }
0366     if (prev_aca)
0367         prev_aca->aca_next = aca->aca_next;
0368     else
0369         idev->ac_list = aca->aca_next;
0370     write_unlock_bh(&idev->lock);
0371     ipv6_del_acaddr_hash(aca);
0372     addrconf_leave_solict(idev, &aca->aca_addr);
0373 
0374     ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
0375 
0376     aca_put(aca);
0377     return 0;
0378 }
0379 
0380 /* called with rtnl_lock() */
0381 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
0382 {
0383     struct inet6_dev *idev = __in6_dev_get(dev);
0384 
0385     if (!idev)
0386         return -ENODEV;
0387     return __ipv6_dev_ac_dec(idev, addr);
0388 }
0389 
0390 void ipv6_ac_destroy_dev(struct inet6_dev *idev)
0391 {
0392     struct ifacaddr6 *aca;
0393 
0394     write_lock_bh(&idev->lock);
0395     while ((aca = idev->ac_list) != NULL) {
0396         idev->ac_list = aca->aca_next;
0397         write_unlock_bh(&idev->lock);
0398 
0399         ipv6_del_acaddr_hash(aca);
0400 
0401         addrconf_leave_solict(idev, &aca->aca_addr);
0402 
0403         ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
0404 
0405         aca_put(aca);
0406 
0407         write_lock_bh(&idev->lock);
0408     }
0409     write_unlock_bh(&idev->lock);
0410 }
0411 
0412 /*
0413  *  check if the interface has this anycast address
0414  *  called with rcu_read_lock()
0415  */
0416 static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
0417 {
0418     struct inet6_dev *idev;
0419     struct ifacaddr6 *aca;
0420 
0421     idev = __in6_dev_get(dev);
0422     if (idev) {
0423         read_lock_bh(&idev->lock);
0424         for (aca = idev->ac_list; aca; aca = aca->aca_next)
0425             if (ipv6_addr_equal(&aca->aca_addr, addr))
0426                 break;
0427         read_unlock_bh(&idev->lock);
0428         return aca != NULL;
0429     }
0430     return false;
0431 }
0432 
0433 /*
0434  *  check if given interface (or any, if dev==0) has this anycast address
0435  */
0436 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
0437              const struct in6_addr *addr)
0438 {
0439     struct net_device *nh_dev;
0440     struct ifacaddr6 *aca;
0441     bool found = false;
0442 
0443     rcu_read_lock();
0444     if (dev)
0445         found = ipv6_chk_acast_dev(dev, addr);
0446     else {
0447         unsigned int hash = inet6_acaddr_hash(net, addr);
0448 
0449         hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
0450                      aca_addr_lst) {
0451             nh_dev = fib6_info_nh_dev(aca->aca_rt);
0452             if (!nh_dev || !net_eq(dev_net(nh_dev), net))
0453                 continue;
0454             if (ipv6_addr_equal(&aca->aca_addr, addr)) {
0455                 found = true;
0456                 break;
0457             }
0458         }
0459     }
0460     rcu_read_unlock();
0461     return found;
0462 }
0463 
0464 /*  check if this anycast address is link-local on given interface or
0465  *  is global
0466  */
0467 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
0468                  const struct in6_addr *addr)
0469 {
0470     return ipv6_chk_acast_addr(net,
0471                    (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
0472                     dev : NULL),
0473                    addr);
0474 }
0475 
0476 #ifdef CONFIG_PROC_FS
0477 struct ac6_iter_state {
0478     struct seq_net_private p;
0479     struct net_device *dev;
0480     struct inet6_dev *idev;
0481 };
0482 
0483 #define ac6_seq_private(seq)    ((struct ac6_iter_state *)(seq)->private)
0484 
0485 static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
0486 {
0487     struct ifacaddr6 *im = NULL;
0488     struct ac6_iter_state *state = ac6_seq_private(seq);
0489     struct net *net = seq_file_net(seq);
0490 
0491     state->idev = NULL;
0492     for_each_netdev_rcu(net, state->dev) {
0493         struct inet6_dev *idev;
0494         idev = __in6_dev_get(state->dev);
0495         if (!idev)
0496             continue;
0497         read_lock_bh(&idev->lock);
0498         im = idev->ac_list;
0499         if (im) {
0500             state->idev = idev;
0501             break;
0502         }
0503         read_unlock_bh(&idev->lock);
0504     }
0505     return im;
0506 }
0507 
0508 static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
0509 {
0510     struct ac6_iter_state *state = ac6_seq_private(seq);
0511 
0512     im = im->aca_next;
0513     while (!im) {
0514         if (likely(state->idev != NULL))
0515             read_unlock_bh(&state->idev->lock);
0516 
0517         state->dev = next_net_device_rcu(state->dev);
0518         if (!state->dev) {
0519             state->idev = NULL;
0520             break;
0521         }
0522         state->idev = __in6_dev_get(state->dev);
0523         if (!state->idev)
0524             continue;
0525         read_lock_bh(&state->idev->lock);
0526         im = state->idev->ac_list;
0527     }
0528     return im;
0529 }
0530 
0531 static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
0532 {
0533     struct ifacaddr6 *im = ac6_get_first(seq);
0534     if (im)
0535         while (pos && (im = ac6_get_next(seq, im)) != NULL)
0536             --pos;
0537     return pos ? NULL : im;
0538 }
0539 
0540 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
0541     __acquires(RCU)
0542 {
0543     rcu_read_lock();
0544     return ac6_get_idx(seq, *pos);
0545 }
0546 
0547 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0548 {
0549     struct ifacaddr6 *im = ac6_get_next(seq, v);
0550 
0551     ++*pos;
0552     return im;
0553 }
0554 
0555 static void ac6_seq_stop(struct seq_file *seq, void *v)
0556     __releases(RCU)
0557 {
0558     struct ac6_iter_state *state = ac6_seq_private(seq);
0559 
0560     if (likely(state->idev != NULL)) {
0561         read_unlock_bh(&state->idev->lock);
0562         state->idev = NULL;
0563     }
0564     rcu_read_unlock();
0565 }
0566 
0567 static int ac6_seq_show(struct seq_file *seq, void *v)
0568 {
0569     struct ifacaddr6 *im = (struct ifacaddr6 *)v;
0570     struct ac6_iter_state *state = ac6_seq_private(seq);
0571 
0572     seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
0573            state->dev->ifindex, state->dev->name,
0574            &im->aca_addr, im->aca_users);
0575     return 0;
0576 }
0577 
0578 static const struct seq_operations ac6_seq_ops = {
0579     .start  =   ac6_seq_start,
0580     .next   =   ac6_seq_next,
0581     .stop   =   ac6_seq_stop,
0582     .show   =   ac6_seq_show,
0583 };
0584 
0585 int __net_init ac6_proc_init(struct net *net)
0586 {
0587     if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
0588             sizeof(struct ac6_iter_state)))
0589         return -ENOMEM;
0590 
0591     return 0;
0592 }
0593 
0594 void ac6_proc_exit(struct net *net)
0595 {
0596     remove_proc_entry("anycast6", net->proc_net);
0597 }
0598 #endif
0599 
0600 /*  Init / cleanup code
0601  */
0602 int __init ipv6_anycast_init(void)
0603 {
0604     int i;
0605 
0606     for (i = 0; i < IN6_ADDR_HSIZE; i++)
0607         INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
0608     return 0;
0609 }
0610 
0611 void ipv6_anycast_cleanup(void)
0612 {
0613     int i;
0614 
0615     spin_lock(&acaddr_hash_lock);
0616     for (i = 0; i < IN6_ADDR_HSIZE; i++)
0617         WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
0618     spin_unlock(&acaddr_hash_lock);
0619 }