Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 #include <linux/module.h>
0003 #include <linux/errno.h>
0004 #include <linux/socket.h>
0005 #include <linux/skbuff.h>
0006 #include <linux/ip.h>
0007 #include <linux/icmp.h>
0008 #include <linux/udp.h>
0009 #include <linux/types.h>
0010 #include <linux/kernel.h>
0011 #include <net/genetlink.h>
0012 #include <net/gro.h>
0013 #include <net/gue.h>
0014 #include <net/fou.h>
0015 #include <net/ip.h>
0016 #include <net/protocol.h>
0017 #include <net/udp.h>
0018 #include <net/udp_tunnel.h>
0019 #include <uapi/linux/fou.h>
0020 #include <uapi/linux/genetlink.h>
0021 
0022 struct fou {
0023     struct socket *sock;
0024     u8 protocol;
0025     u8 flags;
0026     __be16 port;
0027     u8 family;
0028     u16 type;
0029     struct list_head list;
0030     struct rcu_head rcu;
0031 };
0032 
0033 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
0034 
0035 struct fou_cfg {
0036     u16 type;
0037     u8 protocol;
0038     u8 flags;
0039     struct udp_port_cfg udp_config;
0040 };
0041 
0042 static unsigned int fou_net_id;
0043 
0044 struct fou_net {
0045     struct list_head fou_list;
0046     struct mutex fou_lock;
0047 };
0048 
0049 static inline struct fou *fou_from_sock(struct sock *sk)
0050 {
0051     return sk->sk_user_data;
0052 }
0053 
0054 static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
0055 {
0056     /* Remove 'len' bytes from the packet (UDP header and
0057      * FOU header if present).
0058      */
0059     if (fou->family == AF_INET)
0060         ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
0061     else
0062         ipv6_hdr(skb)->payload_len =
0063             htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
0064 
0065     __skb_pull(skb, len);
0066     skb_postpull_rcsum(skb, udp_hdr(skb), len);
0067     skb_reset_transport_header(skb);
0068     return iptunnel_pull_offloads(skb);
0069 }
0070 
0071 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
0072 {
0073     struct fou *fou = fou_from_sock(sk);
0074 
0075     if (!fou)
0076         return 1;
0077 
0078     if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
0079         goto drop;
0080 
0081     return -fou->protocol;
0082 
0083 drop:
0084     kfree_skb(skb);
0085     return 0;
0086 }
0087 
0088 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
0089                   void *data, size_t hdrlen, u8 ipproto,
0090                   bool nopartial)
0091 {
0092     __be16 *pd = data;
0093     size_t start = ntohs(pd[0]);
0094     size_t offset = ntohs(pd[1]);
0095     size_t plen = sizeof(struct udphdr) + hdrlen +
0096         max_t(size_t, offset + sizeof(u16), start);
0097 
0098     if (skb->remcsum_offload)
0099         return guehdr;
0100 
0101     if (!pskb_may_pull(skb, plen))
0102         return NULL;
0103     guehdr = (struct guehdr *)&udp_hdr(skb)[1];
0104 
0105     skb_remcsum_process(skb, (void *)guehdr + hdrlen,
0106                 start, offset, nopartial);
0107 
0108     return guehdr;
0109 }
0110 
0111 static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
0112 {
0113     /* No support yet */
0114     kfree_skb(skb);
0115     return 0;
0116 }
0117 
0118 static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
0119 {
0120     struct fou *fou = fou_from_sock(sk);
0121     size_t len, optlen, hdrlen;
0122     struct guehdr *guehdr;
0123     void *data;
0124     u16 doffset = 0;
0125     u8 proto_ctype;
0126 
0127     if (!fou)
0128         return 1;
0129 
0130     len = sizeof(struct udphdr) + sizeof(struct guehdr);
0131     if (!pskb_may_pull(skb, len))
0132         goto drop;
0133 
0134     guehdr = (struct guehdr *)&udp_hdr(skb)[1];
0135 
0136     switch (guehdr->version) {
0137     case 0: /* Full GUE header present */
0138         break;
0139 
0140     case 1: {
0141         /* Direct encapsulation of IPv4 or IPv6 */
0142 
0143         int prot;
0144 
0145         switch (((struct iphdr *)guehdr)->version) {
0146         case 4:
0147             prot = IPPROTO_IPIP;
0148             break;
0149         case 6:
0150             prot = IPPROTO_IPV6;
0151             break;
0152         default:
0153             goto drop;
0154         }
0155 
0156         if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
0157             goto drop;
0158 
0159         return -prot;
0160     }
0161 
0162     default: /* Undefined version */
0163         goto drop;
0164     }
0165 
0166     optlen = guehdr->hlen << 2;
0167     len += optlen;
0168 
0169     if (!pskb_may_pull(skb, len))
0170         goto drop;
0171 
0172     /* guehdr may change after pull */
0173     guehdr = (struct guehdr *)&udp_hdr(skb)[1];
0174 
0175     if (validate_gue_flags(guehdr, optlen))
0176         goto drop;
0177 
0178     hdrlen = sizeof(struct guehdr) + optlen;
0179 
0180     if (fou->family == AF_INET)
0181         ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
0182     else
0183         ipv6_hdr(skb)->payload_len =
0184             htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
0185 
0186     /* Pull csum through the guehdr now . This can be used if
0187      * there is a remote checksum offload.
0188      */
0189     skb_postpull_rcsum(skb, udp_hdr(skb), len);
0190 
0191     data = &guehdr[1];
0192 
0193     if (guehdr->flags & GUE_FLAG_PRIV) {
0194         __be32 flags = *(__be32 *)(data + doffset);
0195 
0196         doffset += GUE_LEN_PRIV;
0197 
0198         if (flags & GUE_PFLAG_REMCSUM) {
0199             guehdr = gue_remcsum(skb, guehdr, data + doffset,
0200                          hdrlen, guehdr->proto_ctype,
0201                          !!(fou->flags &
0202                         FOU_F_REMCSUM_NOPARTIAL));
0203             if (!guehdr)
0204                 goto drop;
0205 
0206             data = &guehdr[1];
0207 
0208             doffset += GUE_PLEN_REMCSUM;
0209         }
0210     }
0211 
0212     if (unlikely(guehdr->control))
0213         return gue_control_message(skb, guehdr);
0214 
0215     proto_ctype = guehdr->proto_ctype;
0216     __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
0217     skb_reset_transport_header(skb);
0218 
0219     if (iptunnel_pull_offloads(skb))
0220         goto drop;
0221 
0222     return -proto_ctype;
0223 
0224 drop:
0225     kfree_skb(skb);
0226     return 0;
0227 }
0228 
0229 static struct sk_buff *fou_gro_receive(struct sock *sk,
0230                        struct list_head *head,
0231                        struct sk_buff *skb)
0232 {
0233     const struct net_offload __rcu **offloads;
0234     u8 proto = fou_from_sock(sk)->protocol;
0235     const struct net_offload *ops;
0236     struct sk_buff *pp = NULL;
0237 
0238     /* We can clear the encap_mark for FOU as we are essentially doing
0239      * one of two possible things.  We are either adding an L4 tunnel
0240      * header to the outer L3 tunnel header, or we are simply
0241      * treating the GRE tunnel header as though it is a UDP protocol
0242      * specific header such as VXLAN or GENEVE.
0243      */
0244     NAPI_GRO_CB(skb)->encap_mark = 0;
0245 
0246     /* Flag this frame as already having an outer encap header */
0247     NAPI_GRO_CB(skb)->is_fou = 1;
0248 
0249     offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
0250     ops = rcu_dereference(offloads[proto]);
0251     if (!ops || !ops->callbacks.gro_receive)
0252         goto out;
0253 
0254     pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
0255 
0256 out:
0257     return pp;
0258 }
0259 
0260 static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
0261                 int nhoff)
0262 {
0263     const struct net_offload __rcu **offloads;
0264     u8 proto = fou_from_sock(sk)->protocol;
0265     const struct net_offload *ops;
0266     int err = -ENOSYS;
0267 
0268     offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
0269     ops = rcu_dereference(offloads[proto]);
0270     if (WARN_ON(!ops || !ops->callbacks.gro_complete))
0271         goto out;
0272 
0273     err = ops->callbacks.gro_complete(skb, nhoff);
0274 
0275     skb_set_inner_mac_header(skb, nhoff);
0276 
0277 out:
0278     return err;
0279 }
0280 
0281 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
0282                       struct guehdr *guehdr, void *data,
0283                       size_t hdrlen, struct gro_remcsum *grc,
0284                       bool nopartial)
0285 {
0286     __be16 *pd = data;
0287     size_t start = ntohs(pd[0]);
0288     size_t offset = ntohs(pd[1]);
0289 
0290     if (skb->remcsum_offload)
0291         return guehdr;
0292 
0293     if (!NAPI_GRO_CB(skb)->csum_valid)
0294         return NULL;
0295 
0296     guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
0297                      start, offset, grc, nopartial);
0298 
0299     skb->remcsum_offload = 1;
0300 
0301     return guehdr;
0302 }
0303 
0304 static struct sk_buff *gue_gro_receive(struct sock *sk,
0305                        struct list_head *head,
0306                        struct sk_buff *skb)
0307 {
0308     const struct net_offload __rcu **offloads;
0309     const struct net_offload *ops;
0310     struct sk_buff *pp = NULL;
0311     struct sk_buff *p;
0312     struct guehdr *guehdr;
0313     size_t len, optlen, hdrlen, off;
0314     void *data;
0315     u16 doffset = 0;
0316     int flush = 1;
0317     struct fou *fou = fou_from_sock(sk);
0318     struct gro_remcsum grc;
0319     u8 proto;
0320 
0321     skb_gro_remcsum_init(&grc);
0322 
0323     off = skb_gro_offset(skb);
0324     len = off + sizeof(*guehdr);
0325 
0326     guehdr = skb_gro_header_fast(skb, off);
0327     if (skb_gro_header_hard(skb, len)) {
0328         guehdr = skb_gro_header_slow(skb, len, off);
0329         if (unlikely(!guehdr))
0330             goto out;
0331     }
0332 
0333     switch (guehdr->version) {
0334     case 0:
0335         break;
0336     case 1:
0337         switch (((struct iphdr *)guehdr)->version) {
0338         case 4:
0339             proto = IPPROTO_IPIP;
0340             break;
0341         case 6:
0342             proto = IPPROTO_IPV6;
0343             break;
0344         default:
0345             goto out;
0346         }
0347         goto next_proto;
0348     default:
0349         goto out;
0350     }
0351 
0352     optlen = guehdr->hlen << 2;
0353     len += optlen;
0354 
0355     if (skb_gro_header_hard(skb, len)) {
0356         guehdr = skb_gro_header_slow(skb, len, off);
0357         if (unlikely(!guehdr))
0358             goto out;
0359     }
0360 
0361     if (unlikely(guehdr->control) || guehdr->version != 0 ||
0362         validate_gue_flags(guehdr, optlen))
0363         goto out;
0364 
0365     hdrlen = sizeof(*guehdr) + optlen;
0366 
0367     /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
0368      * this is needed if there is a remote checkcsum offload.
0369      */
0370     skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
0371 
0372     data = &guehdr[1];
0373 
0374     if (guehdr->flags & GUE_FLAG_PRIV) {
0375         __be32 flags = *(__be32 *)(data + doffset);
0376 
0377         doffset += GUE_LEN_PRIV;
0378 
0379         if (flags & GUE_PFLAG_REMCSUM) {
0380             guehdr = gue_gro_remcsum(skb, off, guehdr,
0381                          data + doffset, hdrlen, &grc,
0382                          !!(fou->flags &
0383                             FOU_F_REMCSUM_NOPARTIAL));
0384 
0385             if (!guehdr)
0386                 goto out;
0387 
0388             data = &guehdr[1];
0389 
0390             doffset += GUE_PLEN_REMCSUM;
0391         }
0392     }
0393 
0394     skb_gro_pull(skb, hdrlen);
0395 
0396     list_for_each_entry(p, head, list) {
0397         const struct guehdr *guehdr2;
0398 
0399         if (!NAPI_GRO_CB(p)->same_flow)
0400             continue;
0401 
0402         guehdr2 = (struct guehdr *)(p->data + off);
0403 
0404         /* Compare base GUE header to be equal (covers
0405          * hlen, version, proto_ctype, and flags.
0406          */
0407         if (guehdr->word != guehdr2->word) {
0408             NAPI_GRO_CB(p)->same_flow = 0;
0409             continue;
0410         }
0411 
0412         /* Compare optional fields are the same. */
0413         if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
0414                        guehdr->hlen << 2)) {
0415             NAPI_GRO_CB(p)->same_flow = 0;
0416             continue;
0417         }
0418     }
0419 
0420     proto = guehdr->proto_ctype;
0421 
0422 next_proto:
0423 
0424     /* We can clear the encap_mark for GUE as we are essentially doing
0425      * one of two possible things.  We are either adding an L4 tunnel
0426      * header to the outer L3 tunnel header, or we are simply
0427      * treating the GRE tunnel header as though it is a UDP protocol
0428      * specific header such as VXLAN or GENEVE.
0429      */
0430     NAPI_GRO_CB(skb)->encap_mark = 0;
0431 
0432     /* Flag this frame as already having an outer encap header */
0433     NAPI_GRO_CB(skb)->is_fou = 1;
0434 
0435     offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
0436     ops = rcu_dereference(offloads[proto]);
0437     if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
0438         goto out;
0439 
0440     pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
0441     flush = 0;
0442 
0443 out:
0444     skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
0445 
0446     return pp;
0447 }
0448 
0449 static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
0450 {
0451     struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
0452     const struct net_offload __rcu **offloads;
0453     const struct net_offload *ops;
0454     unsigned int guehlen = 0;
0455     u8 proto;
0456     int err = -ENOENT;
0457 
0458     switch (guehdr->version) {
0459     case 0:
0460         proto = guehdr->proto_ctype;
0461         guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
0462         break;
0463     case 1:
0464         switch (((struct iphdr *)guehdr)->version) {
0465         case 4:
0466             proto = IPPROTO_IPIP;
0467             break;
0468         case 6:
0469             proto = IPPROTO_IPV6;
0470             break;
0471         default:
0472             return err;
0473         }
0474         break;
0475     default:
0476         return err;
0477     }
0478 
0479     offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
0480     ops = rcu_dereference(offloads[proto]);
0481     if (WARN_ON(!ops || !ops->callbacks.gro_complete))
0482         goto out;
0483 
0484     err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
0485 
0486     skb_set_inner_mac_header(skb, nhoff + guehlen);
0487 
0488 out:
0489     return err;
0490 }
0491 
0492 static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
0493 {
0494     struct sock *sk = fou->sock->sk;
0495     struct udp_port_cfg *udp_cfg = &cfg->udp_config;
0496 
0497     if (fou->family != udp_cfg->family ||
0498         fou->port != udp_cfg->local_udp_port ||
0499         sk->sk_dport != udp_cfg->peer_udp_port ||
0500         sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
0501         return false;
0502 
0503     if (fou->family == AF_INET) {
0504         if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
0505             sk->sk_daddr != udp_cfg->peer_ip.s_addr)
0506             return false;
0507         else
0508             return true;
0509 #if IS_ENABLED(CONFIG_IPV6)
0510     } else {
0511         if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
0512             ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
0513             return false;
0514         else
0515             return true;
0516 #endif
0517     }
0518 
0519     return false;
0520 }
0521 
0522 static int fou_add_to_port_list(struct net *net, struct fou *fou,
0523                 struct fou_cfg *cfg)
0524 {
0525     struct fou_net *fn = net_generic(net, fou_net_id);
0526     struct fou *fout;
0527 
0528     mutex_lock(&fn->fou_lock);
0529     list_for_each_entry(fout, &fn->fou_list, list) {
0530         if (fou_cfg_cmp(fout, cfg)) {
0531             mutex_unlock(&fn->fou_lock);
0532             return -EALREADY;
0533         }
0534     }
0535 
0536     list_add(&fou->list, &fn->fou_list);
0537     mutex_unlock(&fn->fou_lock);
0538 
0539     return 0;
0540 }
0541 
0542 static void fou_release(struct fou *fou)
0543 {
0544     struct socket *sock = fou->sock;
0545 
0546     list_del(&fou->list);
0547     udp_tunnel_sock_release(sock);
0548 
0549     kfree_rcu(fou, rcu);
0550 }
0551 
0552 static int fou_create(struct net *net, struct fou_cfg *cfg,
0553               struct socket **sockp)
0554 {
0555     struct socket *sock = NULL;
0556     struct fou *fou = NULL;
0557     struct sock *sk;
0558     struct udp_tunnel_sock_cfg tunnel_cfg;
0559     int err;
0560 
0561     /* Open UDP socket */
0562     err = udp_sock_create(net, &cfg->udp_config, &sock);
0563     if (err < 0)
0564         goto error;
0565 
0566     /* Allocate FOU port structure */
0567     fou = kzalloc(sizeof(*fou), GFP_KERNEL);
0568     if (!fou) {
0569         err = -ENOMEM;
0570         goto error;
0571     }
0572 
0573     sk = sock->sk;
0574 
0575     fou->port = cfg->udp_config.local_udp_port;
0576     fou->family = cfg->udp_config.family;
0577     fou->flags = cfg->flags;
0578     fou->type = cfg->type;
0579     fou->sock = sock;
0580 
0581     memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
0582     tunnel_cfg.encap_type = 1;
0583     tunnel_cfg.sk_user_data = fou;
0584     tunnel_cfg.encap_destroy = NULL;
0585 
0586     /* Initial for fou type */
0587     switch (cfg->type) {
0588     case FOU_ENCAP_DIRECT:
0589         tunnel_cfg.encap_rcv = fou_udp_recv;
0590         tunnel_cfg.gro_receive = fou_gro_receive;
0591         tunnel_cfg.gro_complete = fou_gro_complete;
0592         fou->protocol = cfg->protocol;
0593         break;
0594     case FOU_ENCAP_GUE:
0595         tunnel_cfg.encap_rcv = gue_udp_recv;
0596         tunnel_cfg.gro_receive = gue_gro_receive;
0597         tunnel_cfg.gro_complete = gue_gro_complete;
0598         break;
0599     default:
0600         err = -EINVAL;
0601         goto error;
0602     }
0603 
0604     setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
0605 
0606     sk->sk_allocation = GFP_ATOMIC;
0607 
0608     err = fou_add_to_port_list(net, fou, cfg);
0609     if (err)
0610         goto error;
0611 
0612     if (sockp)
0613         *sockp = sock;
0614 
0615     return 0;
0616 
0617 error:
0618     kfree(fou);
0619     if (sock)
0620         udp_tunnel_sock_release(sock);
0621 
0622     return err;
0623 }
0624 
0625 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
0626 {
0627     struct fou_net *fn = net_generic(net, fou_net_id);
0628     int err = -EINVAL;
0629     struct fou *fou;
0630 
0631     mutex_lock(&fn->fou_lock);
0632     list_for_each_entry(fou, &fn->fou_list, list) {
0633         if (fou_cfg_cmp(fou, cfg)) {
0634             fou_release(fou);
0635             err = 0;
0636             break;
0637         }
0638     }
0639     mutex_unlock(&fn->fou_lock);
0640 
0641     return err;
0642 }
0643 
0644 static struct genl_family fou_nl_family;
0645 
0646 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
0647     [FOU_ATTR_PORT]         = { .type = NLA_U16, },
0648     [FOU_ATTR_AF]           = { .type = NLA_U8, },
0649     [FOU_ATTR_IPPROTO]      = { .type = NLA_U8, },
0650     [FOU_ATTR_TYPE]         = { .type = NLA_U8, },
0651     [FOU_ATTR_REMCSUM_NOPARTIAL]    = { .type = NLA_FLAG, },
0652     [FOU_ATTR_LOCAL_V4]     = { .type = NLA_U32, },
0653     [FOU_ATTR_PEER_V4]      = { .type = NLA_U32, },
0654     [FOU_ATTR_LOCAL_V6]     = { .len = sizeof(struct in6_addr), },
0655     [FOU_ATTR_PEER_V6]      = { .len = sizeof(struct in6_addr), },
0656     [FOU_ATTR_PEER_PORT]        = { .type = NLA_U16, },
0657     [FOU_ATTR_IFINDEX]      = { .type = NLA_S32, },
0658 };
0659 
0660 static int parse_nl_config(struct genl_info *info,
0661                struct fou_cfg *cfg)
0662 {
0663     bool has_local = false, has_peer = false;
0664     struct nlattr *attr;
0665     int ifindex;
0666     __be16 port;
0667 
0668     memset(cfg, 0, sizeof(*cfg));
0669 
0670     cfg->udp_config.family = AF_INET;
0671 
0672     if (info->attrs[FOU_ATTR_AF]) {
0673         u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
0674 
0675         switch (family) {
0676         case AF_INET:
0677             break;
0678         case AF_INET6:
0679             cfg->udp_config.ipv6_v6only = 1;
0680             break;
0681         default:
0682             return -EAFNOSUPPORT;
0683         }
0684 
0685         cfg->udp_config.family = family;
0686     }
0687 
0688     if (info->attrs[FOU_ATTR_PORT]) {
0689         port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
0690         cfg->udp_config.local_udp_port = port;
0691     }
0692 
0693     if (info->attrs[FOU_ATTR_IPPROTO])
0694         cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
0695 
0696     if (info->attrs[FOU_ATTR_TYPE])
0697         cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
0698 
0699     if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
0700         cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
0701 
0702     if (cfg->udp_config.family == AF_INET) {
0703         if (info->attrs[FOU_ATTR_LOCAL_V4]) {
0704             attr = info->attrs[FOU_ATTR_LOCAL_V4];
0705             cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
0706             has_local = true;
0707         }
0708 
0709         if (info->attrs[FOU_ATTR_PEER_V4]) {
0710             attr = info->attrs[FOU_ATTR_PEER_V4];
0711             cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
0712             has_peer = true;
0713         }
0714 #if IS_ENABLED(CONFIG_IPV6)
0715     } else {
0716         if (info->attrs[FOU_ATTR_LOCAL_V6]) {
0717             attr = info->attrs[FOU_ATTR_LOCAL_V6];
0718             cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
0719             has_local = true;
0720         }
0721 
0722         if (info->attrs[FOU_ATTR_PEER_V6]) {
0723             attr = info->attrs[FOU_ATTR_PEER_V6];
0724             cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
0725             has_peer = true;
0726         }
0727 #endif
0728     }
0729 
0730     if (has_peer) {
0731         if (info->attrs[FOU_ATTR_PEER_PORT]) {
0732             port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
0733             cfg->udp_config.peer_udp_port = port;
0734         } else {
0735             return -EINVAL;
0736         }
0737     }
0738 
0739     if (info->attrs[FOU_ATTR_IFINDEX]) {
0740         if (!has_local)
0741             return -EINVAL;
0742 
0743         ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
0744 
0745         cfg->udp_config.bind_ifindex = ifindex;
0746     }
0747 
0748     return 0;
0749 }
0750 
0751 static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
0752 {
0753     struct net *net = genl_info_net(info);
0754     struct fou_cfg cfg;
0755     int err;
0756 
0757     err = parse_nl_config(info, &cfg);
0758     if (err)
0759         return err;
0760 
0761     return fou_create(net, &cfg, NULL);
0762 }
0763 
0764 static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
0765 {
0766     struct net *net = genl_info_net(info);
0767     struct fou_cfg cfg;
0768     int err;
0769 
0770     err = parse_nl_config(info, &cfg);
0771     if (err)
0772         return err;
0773 
0774     return fou_destroy(net, &cfg);
0775 }
0776 
0777 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
0778 {
0779     struct sock *sk = fou->sock->sk;
0780 
0781     if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
0782         nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
0783         nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
0784         nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
0785         nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
0786         nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
0787         return -1;
0788 
0789     if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
0790         if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
0791             return -1;
0792 
0793     if (fou->sock->sk->sk_family == AF_INET) {
0794         if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
0795             return -1;
0796 
0797         if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
0798             return -1;
0799 #if IS_ENABLED(CONFIG_IPV6)
0800     } else {
0801         if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
0802                      &sk->sk_v6_rcv_saddr))
0803             return -1;
0804 
0805         if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
0806             return -1;
0807 #endif
0808     }
0809 
0810     return 0;
0811 }
0812 
0813 static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
0814              u32 flags, struct sk_buff *skb, u8 cmd)
0815 {
0816     void *hdr;
0817 
0818     hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
0819     if (!hdr)
0820         return -ENOMEM;
0821 
0822     if (fou_fill_info(fou, skb) < 0)
0823         goto nla_put_failure;
0824 
0825     genlmsg_end(skb, hdr);
0826     return 0;
0827 
0828 nla_put_failure:
0829     genlmsg_cancel(skb, hdr);
0830     return -EMSGSIZE;
0831 }
0832 
0833 static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
0834 {
0835     struct net *net = genl_info_net(info);
0836     struct fou_net *fn = net_generic(net, fou_net_id);
0837     struct sk_buff *msg;
0838     struct fou_cfg cfg;
0839     struct fou *fout;
0840     __be16 port;
0841     u8 family;
0842     int ret;
0843 
0844     ret = parse_nl_config(info, &cfg);
0845     if (ret)
0846         return ret;
0847     port = cfg.udp_config.local_udp_port;
0848     if (port == 0)
0849         return -EINVAL;
0850 
0851     family = cfg.udp_config.family;
0852     if (family != AF_INET && family != AF_INET6)
0853         return -EINVAL;
0854 
0855     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
0856     if (!msg)
0857         return -ENOMEM;
0858 
0859     ret = -ESRCH;
0860     mutex_lock(&fn->fou_lock);
0861     list_for_each_entry(fout, &fn->fou_list, list) {
0862         if (fou_cfg_cmp(fout, &cfg)) {
0863             ret = fou_dump_info(fout, info->snd_portid,
0864                         info->snd_seq, 0, msg,
0865                         info->genlhdr->cmd);
0866             break;
0867         }
0868     }
0869     mutex_unlock(&fn->fou_lock);
0870     if (ret < 0)
0871         goto out_free;
0872 
0873     return genlmsg_reply(msg, info);
0874 
0875 out_free:
0876     nlmsg_free(msg);
0877     return ret;
0878 }
0879 
0880 static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
0881 {
0882     struct net *net = sock_net(skb->sk);
0883     struct fou_net *fn = net_generic(net, fou_net_id);
0884     struct fou *fout;
0885     int idx = 0, ret;
0886 
0887     mutex_lock(&fn->fou_lock);
0888     list_for_each_entry(fout, &fn->fou_list, list) {
0889         if (idx++ < cb->args[0])
0890             continue;
0891         ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
0892                     cb->nlh->nlmsg_seq, NLM_F_MULTI,
0893                     skb, FOU_CMD_GET);
0894         if (ret)
0895             break;
0896     }
0897     mutex_unlock(&fn->fou_lock);
0898 
0899     cb->args[0] = idx;
0900     return skb->len;
0901 }
0902 
0903 static const struct genl_small_ops fou_nl_ops[] = {
0904     {
0905         .cmd = FOU_CMD_ADD,
0906         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0907         .doit = fou_nl_cmd_add_port,
0908         .flags = GENL_ADMIN_PERM,
0909     },
0910     {
0911         .cmd = FOU_CMD_DEL,
0912         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0913         .doit = fou_nl_cmd_rm_port,
0914         .flags = GENL_ADMIN_PERM,
0915     },
0916     {
0917         .cmd = FOU_CMD_GET,
0918         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0919         .doit = fou_nl_cmd_get_port,
0920         .dumpit = fou_nl_dump,
0921     },
0922 };
0923 
0924 static struct genl_family fou_nl_family __ro_after_init = {
0925     .hdrsize    = 0,
0926     .name       = FOU_GENL_NAME,
0927     .version    = FOU_GENL_VERSION,
0928     .maxattr    = FOU_ATTR_MAX,
0929     .policy = fou_nl_policy,
0930     .netnsok    = true,
0931     .module     = THIS_MODULE,
0932     .small_ops  = fou_nl_ops,
0933     .n_small_ops    = ARRAY_SIZE(fou_nl_ops),
0934 };
0935 
0936 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
0937 {
0938     return sizeof(struct udphdr);
0939 }
0940 EXPORT_SYMBOL(fou_encap_hlen);
0941 
0942 size_t gue_encap_hlen(struct ip_tunnel_encap *e)
0943 {
0944     size_t len;
0945     bool need_priv = false;
0946 
0947     len = sizeof(struct udphdr) + sizeof(struct guehdr);
0948 
0949     if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
0950         len += GUE_PLEN_REMCSUM;
0951         need_priv = true;
0952     }
0953 
0954     len += need_priv ? GUE_LEN_PRIV : 0;
0955 
0956     return len;
0957 }
0958 EXPORT_SYMBOL(gue_encap_hlen);
0959 
0960 int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
0961                u8 *protocol, __be16 *sport, int type)
0962 {
0963     int err;
0964 
0965     err = iptunnel_handle_offloads(skb, type);
0966     if (err)
0967         return err;
0968 
0969     *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
0970                         skb, 0, 0, false);
0971 
0972     return 0;
0973 }
0974 EXPORT_SYMBOL(__fou_build_header);
0975 
0976 int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
0977                u8 *protocol, __be16 *sport, int type)
0978 {
0979     struct guehdr *guehdr;
0980     size_t hdrlen, optlen = 0;
0981     void *data;
0982     bool need_priv = false;
0983     int err;
0984 
0985     if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
0986         skb->ip_summed == CHECKSUM_PARTIAL) {
0987         optlen += GUE_PLEN_REMCSUM;
0988         type |= SKB_GSO_TUNNEL_REMCSUM;
0989         need_priv = true;
0990     }
0991 
0992     optlen += need_priv ? GUE_LEN_PRIV : 0;
0993 
0994     err = iptunnel_handle_offloads(skb, type);
0995     if (err)
0996         return err;
0997 
0998     /* Get source port (based on flow hash) before skb_push */
0999     *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
1000                         skb, 0, 0, false);
1001 
1002     hdrlen = sizeof(struct guehdr) + optlen;
1003 
1004     skb_push(skb, hdrlen);
1005 
1006     guehdr = (struct guehdr *)skb->data;
1007 
1008     guehdr->control = 0;
1009     guehdr->version = 0;
1010     guehdr->hlen = optlen >> 2;
1011     guehdr->flags = 0;
1012     guehdr->proto_ctype = *protocol;
1013 
1014     data = &guehdr[1];
1015 
1016     if (need_priv) {
1017         __be32 *flags = data;
1018 
1019         guehdr->flags |= GUE_FLAG_PRIV;
1020         *flags = 0;
1021         data += GUE_LEN_PRIV;
1022 
1023         if (type & SKB_GSO_TUNNEL_REMCSUM) {
1024             u16 csum_start = skb_checksum_start_offset(skb);
1025             __be16 *pd = data;
1026 
1027             if (csum_start < hdrlen)
1028                 return -EINVAL;
1029 
1030             csum_start -= hdrlen;
1031             pd[0] = htons(csum_start);
1032             pd[1] = htons(csum_start + skb->csum_offset);
1033 
1034             if (!skb_is_gso(skb)) {
1035                 skb->ip_summed = CHECKSUM_NONE;
1036                 skb->encapsulation = 0;
1037             }
1038 
1039             *flags |= GUE_PFLAG_REMCSUM;
1040             data += GUE_PLEN_REMCSUM;
1041         }
1042 
1043     }
1044 
1045     return 0;
1046 }
1047 EXPORT_SYMBOL(__gue_build_header);
1048 
1049 #ifdef CONFIG_NET_FOU_IP_TUNNELS
1050 
1051 static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
1052               struct flowi4 *fl4, u8 *protocol, __be16 sport)
1053 {
1054     struct udphdr *uh;
1055 
1056     skb_push(skb, sizeof(struct udphdr));
1057     skb_reset_transport_header(skb);
1058 
1059     uh = udp_hdr(skb);
1060 
1061     uh->dest = e->dport;
1062     uh->source = sport;
1063     uh->len = htons(skb->len);
1064     udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
1065              fl4->saddr, fl4->daddr, skb->len);
1066 
1067     *protocol = IPPROTO_UDP;
1068 }
1069 
1070 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1071                 u8 *protocol, struct flowi4 *fl4)
1072 {
1073     int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1074                                SKB_GSO_UDP_TUNNEL;
1075     __be16 sport;
1076     int err;
1077 
1078     err = __fou_build_header(skb, e, protocol, &sport, type);
1079     if (err)
1080         return err;
1081 
1082     fou_build_udp(skb, e, fl4, protocol, sport);
1083 
1084     return 0;
1085 }
1086 
1087 static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1088                 u8 *protocol, struct flowi4 *fl4)
1089 {
1090     int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1091                                SKB_GSO_UDP_TUNNEL;
1092     __be16 sport;
1093     int err;
1094 
1095     err = __gue_build_header(skb, e, protocol, &sport, type);
1096     if (err)
1097         return err;
1098 
1099     fou_build_udp(skb, e, fl4, protocol, sport);
1100 
1101     return 0;
1102 }
1103 
1104 static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
1105 {
1106     const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
1107 
1108     if (ipprot && ipprot->err_handler) {
1109         if (!ipprot->err_handler(skb, info))
1110             return 0;
1111     }
1112 
1113     return -ENOENT;
1114 }
1115 
1116 static int gue_err(struct sk_buff *skb, u32 info)
1117 {
1118     int transport_offset = skb_transport_offset(skb);
1119     struct guehdr *guehdr;
1120     size_t len, optlen;
1121     int ret;
1122 
1123     len = sizeof(struct udphdr) + sizeof(struct guehdr);
1124     if (!pskb_may_pull(skb, transport_offset + len))
1125         return -EINVAL;
1126 
1127     guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1128 
1129     switch (guehdr->version) {
1130     case 0: /* Full GUE header present */
1131         break;
1132     case 1: {
1133         /* Direct encapsulation of IPv4 or IPv6 */
1134         skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1135 
1136         switch (((struct iphdr *)guehdr)->version) {
1137         case 4:
1138             ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
1139             goto out;
1140 #if IS_ENABLED(CONFIG_IPV6)
1141         case 6:
1142             ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
1143             goto out;
1144 #endif
1145         default:
1146             ret = -EOPNOTSUPP;
1147             goto out;
1148         }
1149     }
1150     default: /* Undefined version */
1151         return -EOPNOTSUPP;
1152     }
1153 
1154     if (guehdr->control)
1155         return -ENOENT;
1156 
1157     optlen = guehdr->hlen << 2;
1158 
1159     if (!pskb_may_pull(skb, transport_offset + len + optlen))
1160         return -EINVAL;
1161 
1162     guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1163     if (validate_gue_flags(guehdr, optlen))
1164         return -EINVAL;
1165 
1166     /* Handling exceptions for direct UDP encapsulation in GUE would lead to
1167      * recursion. Besides, this kind of encapsulation can't even be
1168      * configured currently. Discard this.
1169      */
1170     if (guehdr->proto_ctype == IPPROTO_UDP ||
1171         guehdr->proto_ctype == IPPROTO_UDPLITE)
1172         return -EOPNOTSUPP;
1173 
1174     skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1175     ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
1176 
1177 out:
1178     skb_set_transport_header(skb, transport_offset);
1179     return ret;
1180 }
1181 
1182 
1183 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
1184     .encap_hlen = fou_encap_hlen,
1185     .build_header = fou_build_header,
1186     .err_handler = gue_err,
1187 };
1188 
1189 static const struct ip_tunnel_encap_ops gue_iptun_ops = {
1190     .encap_hlen = gue_encap_hlen,
1191     .build_header = gue_build_header,
1192     .err_handler = gue_err,
1193 };
1194 
1195 static int ip_tunnel_encap_add_fou_ops(void)
1196 {
1197     int ret;
1198 
1199     ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1200     if (ret < 0) {
1201         pr_err("can't add fou ops\n");
1202         return ret;
1203     }
1204 
1205     ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1206     if (ret < 0) {
1207         pr_err("can't add gue ops\n");
1208         ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1209         return ret;
1210     }
1211 
1212     return 0;
1213 }
1214 
1215 static void ip_tunnel_encap_del_fou_ops(void)
1216 {
1217     ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1218     ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1219 }
1220 
1221 #else
1222 
1223 static int ip_tunnel_encap_add_fou_ops(void)
1224 {
1225     return 0;
1226 }
1227 
1228 static void ip_tunnel_encap_del_fou_ops(void)
1229 {
1230 }
1231 
1232 #endif
1233 
1234 static __net_init int fou_init_net(struct net *net)
1235 {
1236     struct fou_net *fn = net_generic(net, fou_net_id);
1237 
1238     INIT_LIST_HEAD(&fn->fou_list);
1239     mutex_init(&fn->fou_lock);
1240     return 0;
1241 }
1242 
1243 static __net_exit void fou_exit_net(struct net *net)
1244 {
1245     struct fou_net *fn = net_generic(net, fou_net_id);
1246     struct fou *fou, *next;
1247 
1248     /* Close all the FOU sockets */
1249     mutex_lock(&fn->fou_lock);
1250     list_for_each_entry_safe(fou, next, &fn->fou_list, list)
1251         fou_release(fou);
1252     mutex_unlock(&fn->fou_lock);
1253 }
1254 
1255 static struct pernet_operations fou_net_ops = {
1256     .init = fou_init_net,
1257     .exit = fou_exit_net,
1258     .id   = &fou_net_id,
1259     .size = sizeof(struct fou_net),
1260 };
1261 
1262 static int __init fou_init(void)
1263 {
1264     int ret;
1265 
1266     ret = register_pernet_device(&fou_net_ops);
1267     if (ret)
1268         goto exit;
1269 
1270     ret = genl_register_family(&fou_nl_family);
1271     if (ret < 0)
1272         goto unregister;
1273 
1274     ret = ip_tunnel_encap_add_fou_ops();
1275     if (ret == 0)
1276         return 0;
1277 
1278     genl_unregister_family(&fou_nl_family);
1279 unregister:
1280     unregister_pernet_device(&fou_net_ops);
1281 exit:
1282     return ret;
1283 }
1284 
1285 static void __exit fou_fini(void)
1286 {
1287     ip_tunnel_encap_del_fou_ops();
1288     genl_unregister_family(&fou_nl_family);
1289     unregister_pernet_device(&fou_net_ops);
1290 }
1291 
1292 module_init(fou_init);
1293 module_exit(fou_fini);
1294 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
1295 MODULE_LICENSE("GPL");
1296 MODULE_DESCRIPTION("Foo over UDP");