Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* (C) 1999-2001 Paul `Rusty' Russell
0003  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
0004  */
0005 
0006 #include <linux/types.h>
0007 #include <linux/export.h>
0008 #include <linux/init.h>
0009 #include <linux/udp.h>
0010 #include <linux/tcp.h>
0011 #include <linux/icmp.h>
0012 #include <linux/icmpv6.h>
0013 
0014 #include <linux/dccp.h>
0015 #include <linux/sctp.h>
0016 #include <net/sctp/checksum.h>
0017 
0018 #include <linux/netfilter.h>
0019 #include <net/netfilter/nf_nat.h>
0020 
0021 #include <linux/ipv6.h>
0022 #include <linux/netfilter_ipv6.h>
0023 #include <net/checksum.h>
0024 #include <net/ip6_checksum.h>
0025 #include <net/ip6_route.h>
0026 #include <net/xfrm.h>
0027 #include <net/ipv6.h>
0028 
0029 #include <net/netfilter/nf_conntrack_core.h>
0030 #include <net/netfilter/nf_conntrack.h>
0031 #include <linux/netfilter/nfnetlink_conntrack.h>
0032 
0033 static void nf_csum_update(struct sk_buff *skb,
0034                unsigned int iphdroff, __sum16 *check,
0035                const struct nf_conntrack_tuple *t,
0036                enum nf_nat_manip_type maniptype);
0037 
0038 static void
0039 __udp_manip_pkt(struct sk_buff *skb,
0040             unsigned int iphdroff, struct udphdr *hdr,
0041             const struct nf_conntrack_tuple *tuple,
0042             enum nf_nat_manip_type maniptype, bool do_csum)
0043 {
0044     __be16 *portptr, newport;
0045 
0046     if (maniptype == NF_NAT_MANIP_SRC) {
0047         /* Get rid of src port */
0048         newport = tuple->src.u.udp.port;
0049         portptr = &hdr->source;
0050     } else {
0051         /* Get rid of dst port */
0052         newport = tuple->dst.u.udp.port;
0053         portptr = &hdr->dest;
0054     }
0055     if (do_csum) {
0056         nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
0057         inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
0058                      false);
0059         if (!hdr->check)
0060             hdr->check = CSUM_MANGLED_0;
0061     }
0062     *portptr = newport;
0063 }
0064 
0065 static bool udp_manip_pkt(struct sk_buff *skb,
0066               unsigned int iphdroff, unsigned int hdroff,
0067               const struct nf_conntrack_tuple *tuple,
0068               enum nf_nat_manip_type maniptype)
0069 {
0070     struct udphdr *hdr;
0071 
0072     if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
0073         return false;
0074 
0075     hdr = (struct udphdr *)(skb->data + hdroff);
0076     __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
0077 
0078     return true;
0079 }
0080 
0081 static bool udplite_manip_pkt(struct sk_buff *skb,
0082                   unsigned int iphdroff, unsigned int hdroff,
0083                   const struct nf_conntrack_tuple *tuple,
0084                   enum nf_nat_manip_type maniptype)
0085 {
0086 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
0087     struct udphdr *hdr;
0088 
0089     if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
0090         return false;
0091 
0092     hdr = (struct udphdr *)(skb->data + hdroff);
0093     __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
0094 #endif
0095     return true;
0096 }
0097 
0098 static bool
0099 sctp_manip_pkt(struct sk_buff *skb,
0100            unsigned int iphdroff, unsigned int hdroff,
0101            const struct nf_conntrack_tuple *tuple,
0102            enum nf_nat_manip_type maniptype)
0103 {
0104 #ifdef CONFIG_NF_CT_PROTO_SCTP
0105     struct sctphdr *hdr;
0106     int hdrsize = 8;
0107 
0108     /* This could be an inner header returned in imcp packet; in such
0109      * cases we cannot update the checksum field since it is outside
0110      * of the 8 bytes of transport layer headers we are guaranteed.
0111      */
0112     if (skb->len >= hdroff + sizeof(*hdr))
0113         hdrsize = sizeof(*hdr);
0114 
0115     if (skb_ensure_writable(skb, hdroff + hdrsize))
0116         return false;
0117 
0118     hdr = (struct sctphdr *)(skb->data + hdroff);
0119 
0120     if (maniptype == NF_NAT_MANIP_SRC) {
0121         /* Get rid of src port */
0122         hdr->source = tuple->src.u.sctp.port;
0123     } else {
0124         /* Get rid of dst port */
0125         hdr->dest = tuple->dst.u.sctp.port;
0126     }
0127 
0128     if (hdrsize < sizeof(*hdr))
0129         return true;
0130 
0131     if (skb->ip_summed != CHECKSUM_PARTIAL) {
0132         hdr->checksum = sctp_compute_cksum(skb, hdroff);
0133         skb->ip_summed = CHECKSUM_NONE;
0134     }
0135 
0136 #endif
0137     return true;
0138 }
0139 
0140 static bool
0141 tcp_manip_pkt(struct sk_buff *skb,
0142           unsigned int iphdroff, unsigned int hdroff,
0143           const struct nf_conntrack_tuple *tuple,
0144           enum nf_nat_manip_type maniptype)
0145 {
0146     struct tcphdr *hdr;
0147     __be16 *portptr, newport, oldport;
0148     int hdrsize = 8; /* TCP connection tracking guarantees this much */
0149 
0150     /* this could be a inner header returned in icmp packet; in such
0151        cases we cannot update the checksum field since it is outside of
0152        the 8 bytes of transport layer headers we are guaranteed */
0153     if (skb->len >= hdroff + sizeof(struct tcphdr))
0154         hdrsize = sizeof(struct tcphdr);
0155 
0156     if (skb_ensure_writable(skb, hdroff + hdrsize))
0157         return false;
0158 
0159     hdr = (struct tcphdr *)(skb->data + hdroff);
0160 
0161     if (maniptype == NF_NAT_MANIP_SRC) {
0162         /* Get rid of src port */
0163         newport = tuple->src.u.tcp.port;
0164         portptr = &hdr->source;
0165     } else {
0166         /* Get rid of dst port */
0167         newport = tuple->dst.u.tcp.port;
0168         portptr = &hdr->dest;
0169     }
0170 
0171     oldport = *portptr;
0172     *portptr = newport;
0173 
0174     if (hdrsize < sizeof(*hdr))
0175         return true;
0176 
0177     nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
0178     inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
0179     return true;
0180 }
0181 
0182 static bool
0183 dccp_manip_pkt(struct sk_buff *skb,
0184            unsigned int iphdroff, unsigned int hdroff,
0185            const struct nf_conntrack_tuple *tuple,
0186            enum nf_nat_manip_type maniptype)
0187 {
0188 #ifdef CONFIG_NF_CT_PROTO_DCCP
0189     struct dccp_hdr *hdr;
0190     __be16 *portptr, oldport, newport;
0191     int hdrsize = 8; /* DCCP connection tracking guarantees this much */
0192 
0193     if (skb->len >= hdroff + sizeof(struct dccp_hdr))
0194         hdrsize = sizeof(struct dccp_hdr);
0195 
0196     if (skb_ensure_writable(skb, hdroff + hdrsize))
0197         return false;
0198 
0199     hdr = (struct dccp_hdr *)(skb->data + hdroff);
0200 
0201     if (maniptype == NF_NAT_MANIP_SRC) {
0202         newport = tuple->src.u.dccp.port;
0203         portptr = &hdr->dccph_sport;
0204     } else {
0205         newport = tuple->dst.u.dccp.port;
0206         portptr = &hdr->dccph_dport;
0207     }
0208 
0209     oldport = *portptr;
0210     *portptr = newport;
0211 
0212     if (hdrsize < sizeof(*hdr))
0213         return true;
0214 
0215     nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
0216     inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
0217                  false);
0218 #endif
0219     return true;
0220 }
0221 
0222 static bool
0223 icmp_manip_pkt(struct sk_buff *skb,
0224            unsigned int iphdroff, unsigned int hdroff,
0225            const struct nf_conntrack_tuple *tuple,
0226            enum nf_nat_manip_type maniptype)
0227 {
0228     struct icmphdr *hdr;
0229 
0230     if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
0231         return false;
0232 
0233     hdr = (struct icmphdr *)(skb->data + hdroff);
0234     switch (hdr->type) {
0235     case ICMP_ECHO:
0236     case ICMP_ECHOREPLY:
0237     case ICMP_TIMESTAMP:
0238     case ICMP_TIMESTAMPREPLY:
0239     case ICMP_INFO_REQUEST:
0240     case ICMP_INFO_REPLY:
0241     case ICMP_ADDRESS:
0242     case ICMP_ADDRESSREPLY:
0243         break;
0244     default:
0245         return true;
0246     }
0247     inet_proto_csum_replace2(&hdr->checksum, skb,
0248                  hdr->un.echo.id, tuple->src.u.icmp.id, false);
0249     hdr->un.echo.id = tuple->src.u.icmp.id;
0250     return true;
0251 }
0252 
0253 static bool
0254 icmpv6_manip_pkt(struct sk_buff *skb,
0255          unsigned int iphdroff, unsigned int hdroff,
0256          const struct nf_conntrack_tuple *tuple,
0257          enum nf_nat_manip_type maniptype)
0258 {
0259     struct icmp6hdr *hdr;
0260 
0261     if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
0262         return false;
0263 
0264     hdr = (struct icmp6hdr *)(skb->data + hdroff);
0265     nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
0266     if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
0267         hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
0268         inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
0269                      hdr->icmp6_identifier,
0270                      tuple->src.u.icmp.id, false);
0271         hdr->icmp6_identifier = tuple->src.u.icmp.id;
0272     }
0273     return true;
0274 }
0275 
0276 /* manipulate a GRE packet according to maniptype */
0277 static bool
0278 gre_manip_pkt(struct sk_buff *skb,
0279           unsigned int iphdroff, unsigned int hdroff,
0280           const struct nf_conntrack_tuple *tuple,
0281           enum nf_nat_manip_type maniptype)
0282 {
0283 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
0284     const struct gre_base_hdr *greh;
0285     struct pptp_gre_header *pgreh;
0286 
0287     /* pgreh includes two optional 32bit fields which are not required
0288      * to be there.  That's where the magic '8' comes from */
0289     if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
0290         return false;
0291 
0292     greh = (void *)skb->data + hdroff;
0293     pgreh = (struct pptp_gre_header *)greh;
0294 
0295     /* we only have destination manip of a packet, since 'source key'
0296      * is not present in the packet itself */
0297     if (maniptype != NF_NAT_MANIP_DST)
0298         return true;
0299 
0300     switch (greh->flags & GRE_VERSION) {
0301     case GRE_VERSION_0:
0302         /* We do not currently NAT any GREv0 packets.
0303          * Try to behave like "nf_nat_proto_unknown" */
0304         break;
0305     case GRE_VERSION_1:
0306         pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
0307         pgreh->call_id = tuple->dst.u.gre.key;
0308         break;
0309     default:
0310         pr_debug("can't nat unknown GRE version\n");
0311         return false;
0312     }
0313 #endif
0314     return true;
0315 }
0316 
0317 static bool l4proto_manip_pkt(struct sk_buff *skb,
0318                   unsigned int iphdroff, unsigned int hdroff,
0319                   const struct nf_conntrack_tuple *tuple,
0320                   enum nf_nat_manip_type maniptype)
0321 {
0322     switch (tuple->dst.protonum) {
0323     case IPPROTO_TCP:
0324         return tcp_manip_pkt(skb, iphdroff, hdroff,
0325                      tuple, maniptype);
0326     case IPPROTO_UDP:
0327         return udp_manip_pkt(skb, iphdroff, hdroff,
0328                      tuple, maniptype);
0329     case IPPROTO_UDPLITE:
0330         return udplite_manip_pkt(skb, iphdroff, hdroff,
0331                      tuple, maniptype);
0332     case IPPROTO_SCTP:
0333         return sctp_manip_pkt(skb, iphdroff, hdroff,
0334                       tuple, maniptype);
0335     case IPPROTO_ICMP:
0336         return icmp_manip_pkt(skb, iphdroff, hdroff,
0337                       tuple, maniptype);
0338     case IPPROTO_ICMPV6:
0339         return icmpv6_manip_pkt(skb, iphdroff, hdroff,
0340                     tuple, maniptype);
0341     case IPPROTO_DCCP:
0342         return dccp_manip_pkt(skb, iphdroff, hdroff,
0343                       tuple, maniptype);
0344     case IPPROTO_GRE:
0345         return gre_manip_pkt(skb, iphdroff, hdroff,
0346                      tuple, maniptype);
0347     }
0348 
0349     /* If we don't know protocol -- no error, pass it unmodified. */
0350     return true;
0351 }
0352 
0353 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
0354                   unsigned int iphdroff,
0355                   const struct nf_conntrack_tuple *target,
0356                   enum nf_nat_manip_type maniptype)
0357 {
0358     struct iphdr *iph;
0359     unsigned int hdroff;
0360 
0361     if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
0362         return false;
0363 
0364     iph = (void *)skb->data + iphdroff;
0365     hdroff = iphdroff + iph->ihl * 4;
0366 
0367     if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
0368         return false;
0369     iph = (void *)skb->data + iphdroff;
0370 
0371     if (maniptype == NF_NAT_MANIP_SRC) {
0372         csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
0373         iph->saddr = target->src.u3.ip;
0374     } else {
0375         csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
0376         iph->daddr = target->dst.u3.ip;
0377     }
0378     return true;
0379 }
0380 
0381 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
0382                   unsigned int iphdroff,
0383                   const struct nf_conntrack_tuple *target,
0384                   enum nf_nat_manip_type maniptype)
0385 {
0386 #if IS_ENABLED(CONFIG_IPV6)
0387     struct ipv6hdr *ipv6h;
0388     __be16 frag_off;
0389     int hdroff;
0390     u8 nexthdr;
0391 
0392     if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
0393         return false;
0394 
0395     ipv6h = (void *)skb->data + iphdroff;
0396     nexthdr = ipv6h->nexthdr;
0397     hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
0398                   &nexthdr, &frag_off);
0399     if (hdroff < 0)
0400         goto manip_addr;
0401 
0402     if ((frag_off & htons(~0x7)) == 0 &&
0403         !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
0404         return false;
0405 
0406     /* must reload, offset might have changed */
0407     ipv6h = (void *)skb->data + iphdroff;
0408 
0409 manip_addr:
0410     if (maniptype == NF_NAT_MANIP_SRC)
0411         ipv6h->saddr = target->src.u3.in6;
0412     else
0413         ipv6h->daddr = target->dst.u3.in6;
0414 
0415 #endif
0416     return true;
0417 }
0418 
0419 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
0420                   enum nf_nat_manip_type mtype,
0421                   enum ip_conntrack_dir dir)
0422 {
0423     struct nf_conntrack_tuple target;
0424 
0425     /* We are aiming to look like inverse of other direction. */
0426     nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
0427 
0428     switch (target.src.l3num) {
0429     case NFPROTO_IPV6:
0430         if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
0431             return NF_ACCEPT;
0432         break;
0433     case NFPROTO_IPV4:
0434         if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
0435             return NF_ACCEPT;
0436         break;
0437     default:
0438         WARN_ON_ONCE(1);
0439         break;
0440     }
0441 
0442     return NF_DROP;
0443 }
0444 
0445 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
0446                     unsigned int iphdroff, __sum16 *check,
0447                     const struct nf_conntrack_tuple *t,
0448                     enum nf_nat_manip_type maniptype)
0449 {
0450     struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
0451     __be32 oldip, newip;
0452 
0453     if (maniptype == NF_NAT_MANIP_SRC) {
0454         oldip = iph->saddr;
0455         newip = t->src.u3.ip;
0456     } else {
0457         oldip = iph->daddr;
0458         newip = t->dst.u3.ip;
0459     }
0460     inet_proto_csum_replace4(check, skb, oldip, newip, true);
0461 }
0462 
0463 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
0464                     unsigned int iphdroff, __sum16 *check,
0465                     const struct nf_conntrack_tuple *t,
0466                     enum nf_nat_manip_type maniptype)
0467 {
0468 #if IS_ENABLED(CONFIG_IPV6)
0469     const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
0470     const struct in6_addr *oldip, *newip;
0471 
0472     if (maniptype == NF_NAT_MANIP_SRC) {
0473         oldip = &ipv6h->saddr;
0474         newip = &t->src.u3.in6;
0475     } else {
0476         oldip = &ipv6h->daddr;
0477         newip = &t->dst.u3.in6;
0478     }
0479     inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
0480                   newip->s6_addr32, true);
0481 #endif
0482 }
0483 
0484 static void nf_csum_update(struct sk_buff *skb,
0485                unsigned int iphdroff, __sum16 *check,
0486                const struct nf_conntrack_tuple *t,
0487                enum nf_nat_manip_type maniptype)
0488 {
0489     switch (t->src.l3num) {
0490     case NFPROTO_IPV4:
0491         nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
0492         return;
0493     case NFPROTO_IPV6:
0494         nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
0495         return;
0496     }
0497 }
0498 
0499 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
0500                     u8 proto, void *data, __sum16 *check,
0501                     int datalen, int oldlen)
0502 {
0503     if (skb->ip_summed != CHECKSUM_PARTIAL) {
0504         const struct iphdr *iph = ip_hdr(skb);
0505 
0506         skb->ip_summed = CHECKSUM_PARTIAL;
0507         skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
0508             ip_hdrlen(skb);
0509         skb->csum_offset = (void *)check - data;
0510         *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
0511                         proto, 0);
0512     } else {
0513         inet_proto_csum_replace2(check, skb,
0514                      htons(oldlen), htons(datalen), true);
0515     }
0516 }
0517 
0518 #if IS_ENABLED(CONFIG_IPV6)
0519 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
0520                     u8 proto, void *data, __sum16 *check,
0521                     int datalen, int oldlen)
0522 {
0523     if (skb->ip_summed != CHECKSUM_PARTIAL) {
0524         const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
0525 
0526         skb->ip_summed = CHECKSUM_PARTIAL;
0527         skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
0528             (data - (void *)skb->data);
0529         skb->csum_offset = (void *)check - data;
0530         *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
0531                       datalen, proto, 0);
0532     } else {
0533         inet_proto_csum_replace2(check, skb,
0534                      htons(oldlen), htons(datalen), true);
0535     }
0536 }
0537 #endif
0538 
0539 void nf_nat_csum_recalc(struct sk_buff *skb,
0540             u8 nfproto, u8 proto, void *data, __sum16 *check,
0541             int datalen, int oldlen)
0542 {
0543     switch (nfproto) {
0544     case NFPROTO_IPV4:
0545         nf_nat_ipv4_csum_recalc(skb, proto, data, check,
0546                     datalen, oldlen);
0547         return;
0548 #if IS_ENABLED(CONFIG_IPV6)
0549     case NFPROTO_IPV6:
0550         nf_nat_ipv6_csum_recalc(skb, proto, data, check,
0551                     datalen, oldlen);
0552         return;
0553 #endif
0554     }
0555 
0556     WARN_ON_ONCE(1);
0557 }
0558 
0559 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
0560                   struct nf_conn *ct,
0561                   enum ip_conntrack_info ctinfo,
0562                   unsigned int hooknum)
0563 {
0564     struct {
0565         struct icmphdr  icmp;
0566         struct iphdr    ip;
0567     } *inside;
0568     enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
0569     enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
0570     unsigned int hdrlen = ip_hdrlen(skb);
0571     struct nf_conntrack_tuple target;
0572     unsigned long statusbit;
0573 
0574     WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
0575 
0576     if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
0577         return 0;
0578     if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
0579         return 0;
0580 
0581     inside = (void *)skb->data + hdrlen;
0582     if (inside->icmp.type == ICMP_REDIRECT) {
0583         if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
0584             return 0;
0585         if (ct->status & IPS_NAT_MASK)
0586             return 0;
0587     }
0588 
0589     if (manip == NF_NAT_MANIP_SRC)
0590         statusbit = IPS_SRC_NAT;
0591     else
0592         statusbit = IPS_DST_NAT;
0593 
0594     /* Invert if this is reply direction */
0595     if (dir == IP_CT_DIR_REPLY)
0596         statusbit ^= IPS_NAT_MASK;
0597 
0598     if (!(ct->status & statusbit))
0599         return 1;
0600 
0601     if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
0602                    &ct->tuplehash[!dir].tuple, !manip))
0603         return 0;
0604 
0605     if (skb->ip_summed != CHECKSUM_PARTIAL) {
0606         /* Reloading "inside" here since manip_pkt may reallocate */
0607         inside = (void *)skb->data + hdrlen;
0608         inside->icmp.checksum = 0;
0609         inside->icmp.checksum =
0610             csum_fold(skb_checksum(skb, hdrlen,
0611                            skb->len - hdrlen, 0));
0612     }
0613 
0614     /* Change outer to look like the reply to an incoming packet */
0615     nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
0616     target.dst.protonum = IPPROTO_ICMP;
0617     if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
0618         return 0;
0619 
0620     return 1;
0621 }
0622 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
0623 
0624 static unsigned int
0625 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
0626            const struct nf_hook_state *state)
0627 {
0628     struct nf_conn *ct;
0629     enum ip_conntrack_info ctinfo;
0630 
0631     ct = nf_ct_get(skb, &ctinfo);
0632     if (!ct)
0633         return NF_ACCEPT;
0634 
0635     if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
0636         if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
0637             if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
0638                                state->hook))
0639                 return NF_DROP;
0640             else
0641                 return NF_ACCEPT;
0642         }
0643     }
0644 
0645     return nf_nat_inet_fn(priv, skb, state);
0646 }
0647 
0648 static unsigned int
0649 nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
0650             const struct nf_hook_state *state)
0651 {
0652     unsigned int ret;
0653     __be32 daddr = ip_hdr(skb)->daddr;
0654 
0655     ret = nf_nat_ipv4_fn(priv, skb, state);
0656     if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
0657         skb_dst_drop(skb);
0658 
0659     return ret;
0660 }
0661 
0662 #ifdef CONFIG_XFRM
0663 static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
0664 {
0665     struct sock *sk = skb->sk;
0666     struct dst_entry *dst;
0667     unsigned int hh_len;
0668     struct flowi fl;
0669     int err;
0670 
0671     err = xfrm_decode_session(skb, &fl, family);
0672     if (err < 0)
0673         return err;
0674 
0675     dst = skb_dst(skb);
0676     if (dst->xfrm)
0677         dst = ((struct xfrm_dst *)dst)->route;
0678     if (!dst_hold_safe(dst))
0679         return -EHOSTUNREACH;
0680 
0681     if (sk && !net_eq(net, sock_net(sk)))
0682         sk = NULL;
0683 
0684     dst = xfrm_lookup(net, dst, &fl, sk, 0);
0685     if (IS_ERR(dst))
0686         return PTR_ERR(dst);
0687 
0688     skb_dst_drop(skb);
0689     skb_dst_set(skb, dst);
0690 
0691     /* Change in oif may mean change in hh_len. */
0692     hh_len = skb_dst(skb)->dev->hard_header_len;
0693     if (skb_headroom(skb) < hh_len &&
0694         pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
0695         return -ENOMEM;
0696     return 0;
0697 }
0698 #endif
0699 
0700 static unsigned int
0701 nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
0702              const struct nf_hook_state *state)
0703 {
0704     __be32 saddr = ip_hdr(skb)->saddr;
0705     struct sock *sk = skb->sk;
0706     unsigned int ret;
0707 
0708     ret = nf_nat_ipv4_fn(priv, skb, state);
0709 
0710     if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
0711         !inet_sk_transparent(sk))
0712         skb_orphan(skb); /* TCP edemux obtained wrong socket */
0713 
0714     return ret;
0715 }
0716 
0717 static unsigned int
0718 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
0719         const struct nf_hook_state *state)
0720 {
0721 #ifdef CONFIG_XFRM
0722     const struct nf_conn *ct;
0723     enum ip_conntrack_info ctinfo;
0724     int err;
0725 #endif
0726     unsigned int ret;
0727 
0728     ret = nf_nat_ipv4_fn(priv, skb, state);
0729 #ifdef CONFIG_XFRM
0730     if (ret != NF_ACCEPT)
0731         return ret;
0732 
0733     if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
0734         return ret;
0735 
0736     ct = nf_ct_get(skb, &ctinfo);
0737     if (ct) {
0738         enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
0739 
0740         if (ct->tuplehash[dir].tuple.src.u3.ip !=
0741              ct->tuplehash[!dir].tuple.dst.u3.ip ||
0742             (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
0743              ct->tuplehash[dir].tuple.src.u.all !=
0744              ct->tuplehash[!dir].tuple.dst.u.all)) {
0745             err = nf_xfrm_me_harder(state->net, skb, AF_INET);
0746             if (err < 0)
0747                 ret = NF_DROP_ERR(err);
0748         }
0749     }
0750 #endif
0751     return ret;
0752 }
0753 
0754 static unsigned int
0755 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
0756              const struct nf_hook_state *state)
0757 {
0758     const struct nf_conn *ct;
0759     enum ip_conntrack_info ctinfo;
0760     unsigned int ret;
0761     int err;
0762 
0763     ret = nf_nat_ipv4_fn(priv, skb, state);
0764     if (ret != NF_ACCEPT)
0765         return ret;
0766 
0767     ct = nf_ct_get(skb, &ctinfo);
0768     if (ct) {
0769         enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
0770 
0771         if (ct->tuplehash[dir].tuple.dst.u3.ip !=
0772             ct->tuplehash[!dir].tuple.src.u3.ip) {
0773             err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
0774             if (err < 0)
0775                 ret = NF_DROP_ERR(err);
0776         }
0777 #ifdef CONFIG_XFRM
0778         else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
0779              ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
0780              ct->tuplehash[dir].tuple.dst.u.all !=
0781              ct->tuplehash[!dir].tuple.src.u.all) {
0782             err = nf_xfrm_me_harder(state->net, skb, AF_INET);
0783             if (err < 0)
0784                 ret = NF_DROP_ERR(err);
0785         }
0786 #endif
0787     }
0788     return ret;
0789 }
0790 
0791 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
0792     /* Before packet filtering, change destination */
0793     {
0794         .hook       = nf_nat_ipv4_pre_routing,
0795         .pf     = NFPROTO_IPV4,
0796         .hooknum    = NF_INET_PRE_ROUTING,
0797         .priority   = NF_IP_PRI_NAT_DST,
0798     },
0799     /* After packet filtering, change source */
0800     {
0801         .hook       = nf_nat_ipv4_out,
0802         .pf     = NFPROTO_IPV4,
0803         .hooknum    = NF_INET_POST_ROUTING,
0804         .priority   = NF_IP_PRI_NAT_SRC,
0805     },
0806     /* Before packet filtering, change destination */
0807     {
0808         .hook       = nf_nat_ipv4_local_fn,
0809         .pf     = NFPROTO_IPV4,
0810         .hooknum    = NF_INET_LOCAL_OUT,
0811         .priority   = NF_IP_PRI_NAT_DST,
0812     },
0813     /* After packet filtering, change source */
0814     {
0815         .hook       = nf_nat_ipv4_local_in,
0816         .pf     = NFPROTO_IPV4,
0817         .hooknum    = NF_INET_LOCAL_IN,
0818         .priority   = NF_IP_PRI_NAT_SRC,
0819     },
0820 };
0821 
0822 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
0823 {
0824     return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
0825                   ARRAY_SIZE(nf_nat_ipv4_ops));
0826 }
0827 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
0828 
0829 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
0830 {
0831     nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
0832 }
0833 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
0834 
0835 #if IS_ENABLED(CONFIG_IPV6)
0836 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
0837                     struct nf_conn *ct,
0838                     enum ip_conntrack_info ctinfo,
0839                     unsigned int hooknum,
0840                     unsigned int hdrlen)
0841 {
0842     struct {
0843         struct icmp6hdr icmp6;
0844         struct ipv6hdr  ip6;
0845     } *inside;
0846     enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
0847     enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
0848     struct nf_conntrack_tuple target;
0849     unsigned long statusbit;
0850 
0851     WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
0852 
0853     if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
0854         return 0;
0855     if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
0856         return 0;
0857 
0858     inside = (void *)skb->data + hdrlen;
0859     if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
0860         if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
0861             return 0;
0862         if (ct->status & IPS_NAT_MASK)
0863             return 0;
0864     }
0865 
0866     if (manip == NF_NAT_MANIP_SRC)
0867         statusbit = IPS_SRC_NAT;
0868     else
0869         statusbit = IPS_DST_NAT;
0870 
0871     /* Invert if this is reply direction */
0872     if (dir == IP_CT_DIR_REPLY)
0873         statusbit ^= IPS_NAT_MASK;
0874 
0875     if (!(ct->status & statusbit))
0876         return 1;
0877 
0878     if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
0879                    &ct->tuplehash[!dir].tuple, !manip))
0880         return 0;
0881 
0882     if (skb->ip_summed != CHECKSUM_PARTIAL) {
0883         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
0884 
0885         inside = (void *)skb->data + hdrlen;
0886         inside->icmp6.icmp6_cksum = 0;
0887         inside->icmp6.icmp6_cksum =
0888             csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
0889                     skb->len - hdrlen, IPPROTO_ICMPV6,
0890                     skb_checksum(skb, hdrlen,
0891                              skb->len - hdrlen, 0));
0892     }
0893 
0894     nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
0895     target.dst.protonum = IPPROTO_ICMPV6;
0896     if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
0897         return 0;
0898 
0899     return 1;
0900 }
0901 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
0902 
0903 static unsigned int
0904 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
0905            const struct nf_hook_state *state)
0906 {
0907     struct nf_conn *ct;
0908     enum ip_conntrack_info ctinfo;
0909     __be16 frag_off;
0910     int hdrlen;
0911     u8 nexthdr;
0912 
0913     ct = nf_ct_get(skb, &ctinfo);
0914     /* Can't track?  It's not due to stress, or conntrack would
0915      * have dropped it.  Hence it's the user's responsibilty to
0916      * packet filter it out, or implement conntrack/NAT for that
0917      * protocol. 8) --RR
0918      */
0919     if (!ct)
0920         return NF_ACCEPT;
0921 
0922     if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
0923         nexthdr = ipv6_hdr(skb)->nexthdr;
0924         hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
0925                       &nexthdr, &frag_off);
0926 
0927         if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
0928             if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
0929                                  state->hook,
0930                                  hdrlen))
0931                 return NF_DROP;
0932             else
0933                 return NF_ACCEPT;
0934         }
0935     }
0936 
0937     return nf_nat_inet_fn(priv, skb, state);
0938 }
0939 
0940 static unsigned int
0941 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
0942            const struct nf_hook_state *state)
0943 {
0944     unsigned int ret;
0945     struct in6_addr daddr = ipv6_hdr(skb)->daddr;
0946 
0947     ret = nf_nat_ipv6_fn(priv, skb, state);
0948     if (ret != NF_DROP && ret != NF_STOLEN &&
0949         ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
0950         skb_dst_drop(skb);
0951 
0952     return ret;
0953 }
0954 
0955 static unsigned int
0956 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
0957         const struct nf_hook_state *state)
0958 {
0959 #ifdef CONFIG_XFRM
0960     const struct nf_conn *ct;
0961     enum ip_conntrack_info ctinfo;
0962     int err;
0963 #endif
0964     unsigned int ret;
0965 
0966     ret = nf_nat_ipv6_fn(priv, skb, state);
0967 #ifdef CONFIG_XFRM
0968     if (ret != NF_ACCEPT)
0969         return ret;
0970 
0971     if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
0972         return ret;
0973     ct = nf_ct_get(skb, &ctinfo);
0974     if (ct) {
0975         enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
0976 
0977         if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
0978                       &ct->tuplehash[!dir].tuple.dst.u3) ||
0979             (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
0980              ct->tuplehash[dir].tuple.src.u.all !=
0981              ct->tuplehash[!dir].tuple.dst.u.all)) {
0982             err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
0983             if (err < 0)
0984                 ret = NF_DROP_ERR(err);
0985         }
0986     }
0987 #endif
0988 
0989     return ret;
0990 }
0991 
0992 static unsigned int
0993 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
0994              const struct nf_hook_state *state)
0995 {
0996     const struct nf_conn *ct;
0997     enum ip_conntrack_info ctinfo;
0998     unsigned int ret;
0999     int err;
1000 
1001     ret = nf_nat_ipv6_fn(priv, skb, state);
1002     if (ret != NF_ACCEPT)
1003         return ret;
1004 
1005     ct = nf_ct_get(skb, &ctinfo);
1006     if (ct) {
1007         enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
1008 
1009         if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
1010                       &ct->tuplehash[!dir].tuple.src.u3)) {
1011             err = nf_ip6_route_me_harder(state->net, state->sk, skb);
1012             if (err < 0)
1013                 ret = NF_DROP_ERR(err);
1014         }
1015 #ifdef CONFIG_XFRM
1016         else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
1017              ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
1018              ct->tuplehash[dir].tuple.dst.u.all !=
1019              ct->tuplehash[!dir].tuple.src.u.all) {
1020             err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
1021             if (err < 0)
1022                 ret = NF_DROP_ERR(err);
1023         }
1024 #endif
1025     }
1026 
1027     return ret;
1028 }
1029 
1030 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
1031     /* Before packet filtering, change destination */
1032     {
1033         .hook       = nf_nat_ipv6_in,
1034         .pf     = NFPROTO_IPV6,
1035         .hooknum    = NF_INET_PRE_ROUTING,
1036         .priority   = NF_IP6_PRI_NAT_DST,
1037     },
1038     /* After packet filtering, change source */
1039     {
1040         .hook       = nf_nat_ipv6_out,
1041         .pf     = NFPROTO_IPV6,
1042         .hooknum    = NF_INET_POST_ROUTING,
1043         .priority   = NF_IP6_PRI_NAT_SRC,
1044     },
1045     /* Before packet filtering, change destination */
1046     {
1047         .hook       = nf_nat_ipv6_local_fn,
1048         .pf     = NFPROTO_IPV6,
1049         .hooknum    = NF_INET_LOCAL_OUT,
1050         .priority   = NF_IP6_PRI_NAT_DST,
1051     },
1052     /* After packet filtering, change source */
1053     {
1054         .hook       = nf_nat_ipv6_fn,
1055         .pf     = NFPROTO_IPV6,
1056         .hooknum    = NF_INET_LOCAL_IN,
1057         .priority   = NF_IP6_PRI_NAT_SRC,
1058     },
1059 };
1060 
1061 int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1062 {
1063     return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
1064                   ARRAY_SIZE(nf_nat_ipv6_ops));
1065 }
1066 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1067 
1068 void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1069 {
1070     nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1071 }
1072 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1073 #endif /* CONFIG_IPV6 */
1074 
1075 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1076 int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1077 {
1078     int ret;
1079 
1080     if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1081         return -EINVAL;
1082 
1083     ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1084                  ARRAY_SIZE(nf_nat_ipv6_ops));
1085     if (ret)
1086         return ret;
1087 
1088     ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1089                  ARRAY_SIZE(nf_nat_ipv4_ops));
1090     if (ret)
1091         nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
1092                     ARRAY_SIZE(nf_nat_ipv6_ops));
1093     return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1096 
1097 void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1098 {
1099     nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1100     nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1101 }
1102 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1103 #endif /* NFT INET NAT */