Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #include <linux/types.h>
0004 #include <linux/atomic.h>
0005 #include <linux/inetdevice.h>
0006 #include <linux/netfilter.h>
0007 #include <linux/netfilter_ipv4.h>
0008 #include <linux/netfilter_ipv6.h>
0009 
0010 #include <net/netfilter/nf_nat_masquerade.h>
0011 
0012 struct masq_dev_work {
0013     struct work_struct work;
0014     struct net *net;
0015     netns_tracker ns_tracker;
0016     union nf_inet_addr addr;
0017     int ifindex;
0018     int (*iter)(struct nf_conn *i, void *data);
0019 };
0020 
0021 #define MAX_MASQ_WORKER_COUNT   16
0022 
0023 static DEFINE_MUTEX(masq_mutex);
0024 static unsigned int masq_refcnt __read_mostly;
0025 static atomic_t masq_worker_count __read_mostly;
0026 
0027 unsigned int
0028 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
0029                const struct nf_nat_range2 *range,
0030                const struct net_device *out)
0031 {
0032     struct nf_conn *ct;
0033     struct nf_conn_nat *nat;
0034     enum ip_conntrack_info ctinfo;
0035     struct nf_nat_range2 newrange;
0036     const struct rtable *rt;
0037     __be32 newsrc, nh;
0038 
0039     WARN_ON(hooknum != NF_INET_POST_ROUTING);
0040 
0041     ct = nf_ct_get(skb, &ctinfo);
0042 
0043     WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
0044              ctinfo == IP_CT_RELATED_REPLY)));
0045 
0046     /* Source address is 0.0.0.0 - locally generated packet that is
0047      * probably not supposed to be masqueraded.
0048      */
0049     if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
0050         return NF_ACCEPT;
0051 
0052     rt = skb_rtable(skb);
0053     nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
0054     newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
0055     if (!newsrc) {
0056         pr_info("%s ate my IP address\n", out->name);
0057         return NF_DROP;
0058     }
0059 
0060     nat = nf_ct_nat_ext_add(ct);
0061     if (nat)
0062         nat->masq_index = out->ifindex;
0063 
0064     /* Transfer from original range. */
0065     memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
0066     memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
0067     newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
0068     newrange.min_addr.ip = newsrc;
0069     newrange.max_addr.ip = newsrc;
0070     newrange.min_proto   = range->min_proto;
0071     newrange.max_proto   = range->max_proto;
0072 
0073     /* Hand modified range to generic setup. */
0074     return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
0075 }
0076 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
0077 
0078 static void iterate_cleanup_work(struct work_struct *work)
0079 {
0080     struct nf_ct_iter_data iter_data = {};
0081     struct masq_dev_work *w;
0082 
0083     w = container_of(work, struct masq_dev_work, work);
0084 
0085     iter_data.net = w->net;
0086     iter_data.data = (void *)w;
0087     nf_ct_iterate_cleanup_net(w->iter, &iter_data);
0088 
0089     put_net_track(w->net, &w->ns_tracker);
0090     kfree(w);
0091     atomic_dec(&masq_worker_count);
0092     module_put(THIS_MODULE);
0093 }
0094 
0095 /* Iterate conntrack table in the background and remove conntrack entries
0096  * that use the device/address being removed.
0097  *
0098  * In case too many work items have been queued already or memory allocation
0099  * fails iteration is skipped, conntrack entries will time out eventually.
0100  */
0101 static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
0102                  int ifindex,
0103                  int (*iter)(struct nf_conn *i, void *data),
0104                  gfp_t gfp_flags)
0105 {
0106     struct masq_dev_work *w;
0107 
0108     if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
0109         return;
0110 
0111     net = maybe_get_net(net);
0112     if (!net)
0113         return;
0114 
0115     if (!try_module_get(THIS_MODULE))
0116         goto err_module;
0117 
0118     w = kzalloc(sizeof(*w), gfp_flags);
0119     if (w) {
0120         /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
0121         atomic_inc(&masq_worker_count);
0122 
0123         INIT_WORK(&w->work, iterate_cleanup_work);
0124         w->ifindex = ifindex;
0125         w->net = net;
0126         netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
0127         w->iter = iter;
0128         if (addr)
0129             w->addr = *addr;
0130         schedule_work(&w->work);
0131         return;
0132     }
0133 
0134     module_put(THIS_MODULE);
0135  err_module:
0136     put_net(net);
0137 }
0138 
0139 static int device_cmp(struct nf_conn *i, void *arg)
0140 {
0141     const struct nf_conn_nat *nat = nfct_nat(i);
0142     const struct masq_dev_work *w = arg;
0143 
0144     if (!nat)
0145         return 0;
0146     return nat->masq_index == w->ifindex;
0147 }
0148 
0149 static int masq_device_event(struct notifier_block *this,
0150                  unsigned long event,
0151                  void *ptr)
0152 {
0153     const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
0154     struct net *net = dev_net(dev);
0155 
0156     if (event == NETDEV_DOWN) {
0157         /* Device was downed.  Search entire table for
0158          * conntracks which were associated with that device,
0159          * and forget them.
0160          */
0161 
0162         nf_nat_masq_schedule(net, NULL, dev->ifindex,
0163                      device_cmp, GFP_KERNEL);
0164     }
0165 
0166     return NOTIFY_DONE;
0167 }
0168 
0169 static int inet_cmp(struct nf_conn *ct, void *ptr)
0170 {
0171     struct nf_conntrack_tuple *tuple;
0172     struct masq_dev_work *w = ptr;
0173 
0174     if (!device_cmp(ct, ptr))
0175         return 0;
0176 
0177     tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
0178 
0179     return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
0180 }
0181 
0182 static int masq_inet_event(struct notifier_block *this,
0183                unsigned long event,
0184                void *ptr)
0185 {
0186     const struct in_ifaddr *ifa = ptr;
0187     const struct in_device *idev;
0188     const struct net_device *dev;
0189     union nf_inet_addr addr;
0190 
0191     if (event != NETDEV_DOWN)
0192         return NOTIFY_DONE;
0193 
0194     /* The masq_dev_notifier will catch the case of the device going
0195      * down.  So if the inetdev is dead and being destroyed we have
0196      * no work to do.  Otherwise this is an individual address removal
0197      * and we have to perform the flush.
0198      */
0199     idev = ifa->ifa_dev;
0200     if (idev->dead)
0201         return NOTIFY_DONE;
0202 
0203     memset(&addr, 0, sizeof(addr));
0204 
0205     addr.ip = ifa->ifa_address;
0206 
0207     dev = idev->dev;
0208     nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
0209                  inet_cmp, GFP_KERNEL);
0210 
0211     return NOTIFY_DONE;
0212 }
0213 
0214 static struct notifier_block masq_dev_notifier = {
0215     .notifier_call  = masq_device_event,
0216 };
0217 
0218 static struct notifier_block masq_inet_notifier = {
0219     .notifier_call  = masq_inet_event,
0220 };
0221 
0222 #if IS_ENABLED(CONFIG_IPV6)
0223 static int
0224 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
0225                const struct in6_addr *daddr, unsigned int srcprefs,
0226                struct in6_addr *saddr)
0227 {
0228 #ifdef CONFIG_IPV6_MODULE
0229     const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
0230 
0231     if (!v6_ops)
0232         return -EHOSTUNREACH;
0233 
0234     return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
0235 #else
0236     return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
0237 #endif
0238 }
0239 
0240 unsigned int
0241 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
0242                const struct net_device *out)
0243 {
0244     enum ip_conntrack_info ctinfo;
0245     struct nf_conn_nat *nat;
0246     struct in6_addr src;
0247     struct nf_conn *ct;
0248     struct nf_nat_range2 newrange;
0249 
0250     ct = nf_ct_get(skb, &ctinfo);
0251     WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
0252              ctinfo == IP_CT_RELATED_REPLY)));
0253 
0254     if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
0255                    &ipv6_hdr(skb)->daddr, 0, &src) < 0)
0256         return NF_DROP;
0257 
0258     nat = nf_ct_nat_ext_add(ct);
0259     if (nat)
0260         nat->masq_index = out->ifindex;
0261 
0262     newrange.flags      = range->flags | NF_NAT_RANGE_MAP_IPS;
0263     newrange.min_addr.in6   = src;
0264     newrange.max_addr.in6   = src;
0265     newrange.min_proto  = range->min_proto;
0266     newrange.max_proto  = range->max_proto;
0267 
0268     return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
0269 }
0270 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
0271 
0272 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
0273  *
0274  * Defer it to the system workqueue.
0275  *
0276  * As we can have 'a lot' of inet_events (depending on amount of ipv6
0277  * addresses being deleted), we also need to limit work item queue.
0278  */
0279 static int masq_inet6_event(struct notifier_block *this,
0280                 unsigned long event, void *ptr)
0281 {
0282     struct inet6_ifaddr *ifa = ptr;
0283     const struct net_device *dev;
0284     union nf_inet_addr addr;
0285 
0286     if (event != NETDEV_DOWN)
0287         return NOTIFY_DONE;
0288 
0289     dev = ifa->idev->dev;
0290 
0291     memset(&addr, 0, sizeof(addr));
0292 
0293     addr.in6 = ifa->addr;
0294 
0295     nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
0296                  GFP_ATOMIC);
0297     return NOTIFY_DONE;
0298 }
0299 
0300 static struct notifier_block masq_inet6_notifier = {
0301     .notifier_call  = masq_inet6_event,
0302 };
0303 
0304 static int nf_nat_masquerade_ipv6_register_notifier(void)
0305 {
0306     return register_inet6addr_notifier(&masq_inet6_notifier);
0307 }
0308 #else
0309 static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
0310 #endif
0311 
0312 int nf_nat_masquerade_inet_register_notifiers(void)
0313 {
0314     int ret = 0;
0315 
0316     mutex_lock(&masq_mutex);
0317     if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
0318         ret = -EOVERFLOW;
0319         goto out_unlock;
0320     }
0321 
0322     /* check if the notifier was already set */
0323     if (++masq_refcnt > 1)
0324         goto out_unlock;
0325 
0326     /* Register for device down reports */
0327     ret = register_netdevice_notifier(&masq_dev_notifier);
0328     if (ret)
0329         goto err_dec;
0330     /* Register IP address change reports */
0331     ret = register_inetaddr_notifier(&masq_inet_notifier);
0332     if (ret)
0333         goto err_unregister;
0334 
0335     ret = nf_nat_masquerade_ipv6_register_notifier();
0336     if (ret)
0337         goto err_unreg_inet;
0338 
0339     mutex_unlock(&masq_mutex);
0340     return ret;
0341 err_unreg_inet:
0342     unregister_inetaddr_notifier(&masq_inet_notifier);
0343 err_unregister:
0344     unregister_netdevice_notifier(&masq_dev_notifier);
0345 err_dec:
0346     masq_refcnt--;
0347 out_unlock:
0348     mutex_unlock(&masq_mutex);
0349     return ret;
0350 }
0351 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
0352 
0353 void nf_nat_masquerade_inet_unregister_notifiers(void)
0354 {
0355     mutex_lock(&masq_mutex);
0356     /* check if the notifiers still have clients */
0357     if (--masq_refcnt > 0)
0358         goto out_unlock;
0359 
0360     unregister_netdevice_notifier(&masq_dev_notifier);
0361     unregister_inetaddr_notifier(&masq_inet_notifier);
0362 #if IS_ENABLED(CONFIG_IPV6)
0363     unregister_inet6addr_notifier(&masq_inet6_notifier);
0364 #endif
0365 out_unlock:
0366     mutex_unlock(&masq_mutex);
0367 }
0368 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);