Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
0002 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
0003 
0004 #include "vmlinux.h"
0005 
0006 #include <bpf/bpf_helpers.h>
0007 #include <bpf/bpf_endian.h>
0008 #include <asm/errno.h>
0009 
0010 #define TC_ACT_OK 0
0011 #define TC_ACT_SHOT 2
0012 
0013 #define NSEC_PER_SEC 1000000000L
0014 
0015 #define ETH_ALEN 6
0016 #define ETH_P_IP 0x0800
0017 #define ETH_P_IPV6 0x86DD
0018 
0019 #define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
0020 
0021 #define IP_DF 0x4000
0022 #define IP_MF 0x2000
0023 #define IP_OFFSET 0x1fff
0024 
0025 #define NEXTHDR_TCP 6
0026 
0027 #define TCPOPT_NOP 1
0028 #define TCPOPT_EOL 0
0029 #define TCPOPT_MSS 2
0030 #define TCPOPT_WINDOW 3
0031 #define TCPOPT_SACK_PERM 4
0032 #define TCPOPT_TIMESTAMP 8
0033 
0034 #define TCPOLEN_MSS 4
0035 #define TCPOLEN_WINDOW 3
0036 #define TCPOLEN_SACK_PERM 2
0037 #define TCPOLEN_TIMESTAMP 10
0038 
0039 #define TCP_TS_HZ 1000
0040 #define TS_OPT_WSCALE_MASK 0xf
0041 #define TS_OPT_SACK (1 << 4)
0042 #define TS_OPT_ECN (1 << 5)
0043 #define TSBITS 6
0044 #define TSMASK (((__u32)1 << TSBITS) - 1)
0045 #define TCP_MAX_WSCALE 14U
0046 
0047 #define IPV4_MAXLEN 60
0048 #define TCP_MAXLEN 60
0049 
0050 #define DEFAULT_MSS4 1460
0051 #define DEFAULT_MSS6 1440
0052 #define DEFAULT_WSCALE 7
0053 #define DEFAULT_TTL 64
0054 #define MAX_ALLOWED_PORTS 8
0055 
0056 #define swap(a, b) \
0057     do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
0058 
0059 #define __get_unaligned_t(type, ptr) ({                     \
0060     const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
0061     __pptr->x;                              \
0062 })
0063 
0064 #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
0065 
0066 struct {
0067     __uint(type, BPF_MAP_TYPE_ARRAY);
0068     __type(key, __u32);
0069     __type(value, __u64);
0070     __uint(max_entries, 2);
0071 } values SEC(".maps");
0072 
0073 struct {
0074     __uint(type, BPF_MAP_TYPE_ARRAY);
0075     __type(key, __u32);
0076     __type(value, __u16);
0077     __uint(max_entries, MAX_ALLOWED_PORTS);
0078 } allowed_ports SEC(".maps");
0079 
0080 /* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
0081  * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
0082  */
0083 
0084 struct bpf_ct_opts___local {
0085     s32 netns_id;
0086     s32 error;
0087     u8 l4proto;
0088     u8 dir;
0089     u8 reserved[2];
0090 } __attribute__((preserve_access_index));
0091 
0092 #define BPF_F_CURRENT_NETNS (-1)
0093 
0094 extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
0095                      struct bpf_sock_tuple *bpf_tuple,
0096                      __u32 len_tuple,
0097                      struct bpf_ct_opts___local *opts,
0098                      __u32 len_opts) __ksym;
0099 
0100 extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
0101                      struct bpf_sock_tuple *bpf_tuple,
0102                      u32 len_tuple,
0103                      struct bpf_ct_opts___local *opts,
0104                      u32 len_opts) __ksym;
0105 
0106 extern void bpf_ct_release(struct nf_conn *ct) __ksym;
0107 
0108 static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
0109 {
0110     __u8 tmp[ETH_ALEN];
0111 
0112     __builtin_memcpy(tmp, a, ETH_ALEN);
0113     __builtin_memcpy(a, b, ETH_ALEN);
0114     __builtin_memcpy(b, tmp, ETH_ALEN);
0115 }
0116 
0117 static __always_inline __u16 csum_fold(__u32 csum)
0118 {
0119     csum = (csum & 0xffff) + (csum >> 16);
0120     csum = (csum & 0xffff) + (csum >> 16);
0121     return (__u16)~csum;
0122 }
0123 
0124 static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
0125                            __u32 len, __u8 proto,
0126                            __u32 csum)
0127 {
0128     __u64 s = csum;
0129 
0130     s += (__u32)saddr;
0131     s += (__u32)daddr;
0132 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
0133     s += proto + len;
0134 #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0135     s += (proto + len) << 8;
0136 #else
0137 #error Unknown endian
0138 #endif
0139     s = (s & 0xffffffff) + (s >> 32);
0140     s = (s & 0xffffffff) + (s >> 32);
0141 
0142     return csum_fold((__u32)s);
0143 }
0144 
0145 static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
0146                          const struct in6_addr *daddr,
0147                          __u32 len, __u8 proto, __u32 csum)
0148 {
0149     __u64 sum = csum;
0150     int i;
0151 
0152 #pragma unroll
0153     for (i = 0; i < 4; i++)
0154         sum += (__u32)saddr->in6_u.u6_addr32[i];
0155 
0156 #pragma unroll
0157     for (i = 0; i < 4; i++)
0158         sum += (__u32)daddr->in6_u.u6_addr32[i];
0159 
0160     /* Don't combine additions to avoid 32-bit overflow. */
0161     sum += bpf_htonl(len);
0162     sum += bpf_htonl(proto);
0163 
0164     sum = (sum & 0xffffffff) + (sum >> 32);
0165     sum = (sum & 0xffffffff) + (sum >> 32);
0166 
0167     return csum_fold((__u32)sum);
0168 }
0169 
0170 static __always_inline __u64 tcp_clock_ns(void)
0171 {
0172     return bpf_ktime_get_ns();
0173 }
0174 
0175 static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
0176 {
0177     return ns / (NSEC_PER_SEC / TCP_TS_HZ);
0178 }
0179 
0180 static __always_inline __u32 tcp_time_stamp_raw(void)
0181 {
0182     return tcp_ns_to_ts(tcp_clock_ns());
0183 }
0184 
0185 struct tcpopt_context {
0186     __u8 *ptr;
0187     __u8 *end;
0188     void *data_end;
0189     __be32 *tsecr;
0190     __u8 wscale;
0191     bool option_timestamp;
0192     bool option_sack;
0193 };
0194 
0195 static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
0196 {
0197     __u8 opcode, opsize;
0198 
0199     if (ctx->ptr >= ctx->end)
0200         return 1;
0201     if (ctx->ptr >= ctx->data_end)
0202         return 1;
0203 
0204     opcode = ctx->ptr[0];
0205 
0206     if (opcode == TCPOPT_EOL)
0207         return 1;
0208     if (opcode == TCPOPT_NOP) {
0209         ++ctx->ptr;
0210         return 0;
0211     }
0212 
0213     if (ctx->ptr + 1 >= ctx->end)
0214         return 1;
0215     if (ctx->ptr + 1 >= ctx->data_end)
0216         return 1;
0217     opsize = ctx->ptr[1];
0218     if (opsize < 2)
0219         return 1;
0220 
0221     if (ctx->ptr + opsize > ctx->end)
0222         return 1;
0223 
0224     switch (opcode) {
0225     case TCPOPT_WINDOW:
0226         if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
0227             ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
0228         break;
0229     case TCPOPT_TIMESTAMP:
0230         if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
0231             ctx->option_timestamp = true;
0232             /* Client's tsval becomes our tsecr. */
0233             *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
0234         }
0235         break;
0236     case TCPOPT_SACK_PERM:
0237         if (opsize == TCPOLEN_SACK_PERM)
0238             ctx->option_sack = true;
0239         break;
0240     }
0241 
0242     ctx->ptr += opsize;
0243 
0244     return 0;
0245 }
0246 
0247 static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
0248 {
0249     int i;
0250 
0251     for (i = 0; i < 7; i++)
0252         if (tscookie_tcpopt_parse(context))
0253             return 1;
0254     return 0;
0255 }
0256 
0257 static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
0258                       __u16 tcp_len, __be32 *tsval,
0259                       __be32 *tsecr, void *data_end)
0260 {
0261     struct tcpopt_context loop_ctx = {
0262         .ptr = (__u8 *)(tcp_header + 1),
0263         .end = (__u8 *)tcp_header + tcp_len,
0264         .data_end = data_end,
0265         .tsecr = tsecr,
0266         .wscale = TS_OPT_WSCALE_MASK,
0267         .option_timestamp = false,
0268         .option_sack = false,
0269     };
0270     u32 cookie;
0271 
0272     bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
0273 
0274     if (!loop_ctx.option_timestamp)
0275         return false;
0276 
0277     cookie = tcp_time_stamp_raw() & ~TSMASK;
0278     cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
0279     if (loop_ctx.option_sack)
0280         cookie |= TS_OPT_SACK;
0281     if (tcp_header->ece && tcp_header->cwr)
0282         cookie |= TS_OPT_ECN;
0283     *tsval = bpf_htonl(cookie);
0284 
0285     return true;
0286 }
0287 
0288 static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
0289                          __u8 *ttl, bool ipv6)
0290 {
0291     __u32 key = 0;
0292     __u64 *value;
0293 
0294     value = bpf_map_lookup_elem(&values, &key);
0295     if (value && *value != 0) {
0296         if (ipv6)
0297             *mss = (*value >> 32) & 0xffff;
0298         else
0299             *mss = *value & 0xffff;
0300         *wscale = (*value >> 16) & 0xf;
0301         *ttl = (*value >> 24) & 0xff;
0302         return;
0303     }
0304 
0305     *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
0306     *wscale = DEFAULT_WSCALE;
0307     *ttl = DEFAULT_TTL;
0308 }
0309 
0310 static __always_inline void values_inc_synacks(void)
0311 {
0312     __u32 key = 1;
0313     __u32 *value;
0314 
0315     value = bpf_map_lookup_elem(&values, &key);
0316     if (value)
0317         __sync_fetch_and_add(value, 1);
0318 }
0319 
0320 static __always_inline bool check_port_allowed(__u16 port)
0321 {
0322     __u32 i;
0323 
0324     for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
0325         __u32 key = i;
0326         __u16 *value;
0327 
0328         value = bpf_map_lookup_elem(&allowed_ports, &key);
0329 
0330         if (!value)
0331             break;
0332         /* 0 is a terminator value. Check it first to avoid matching on
0333          * a forbidden port == 0 and returning true.
0334          */
0335         if (*value == 0)
0336             break;
0337 
0338         if (*value == port)
0339             return true;
0340     }
0341 
0342     return false;
0343 }
0344 
0345 struct header_pointers {
0346     struct ethhdr *eth;
0347     struct iphdr *ipv4;
0348     struct ipv6hdr *ipv6;
0349     struct tcphdr *tcp;
0350     __u16 tcp_len;
0351 };
0352 
0353 static __always_inline int tcp_dissect(void *data, void *data_end,
0354                        struct header_pointers *hdr)
0355 {
0356     hdr->eth = data;
0357     if (hdr->eth + 1 > data_end)
0358         return XDP_DROP;
0359 
0360     switch (bpf_ntohs(hdr->eth->h_proto)) {
0361     case ETH_P_IP:
0362         hdr->ipv6 = NULL;
0363 
0364         hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
0365         if (hdr->ipv4 + 1 > data_end)
0366             return XDP_DROP;
0367         if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
0368             return XDP_DROP;
0369         if (hdr->ipv4->version != 4)
0370             return XDP_DROP;
0371 
0372         if (hdr->ipv4->protocol != IPPROTO_TCP)
0373             return XDP_PASS;
0374 
0375         hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
0376         break;
0377     case ETH_P_IPV6:
0378         hdr->ipv4 = NULL;
0379 
0380         hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
0381         if (hdr->ipv6 + 1 > data_end)
0382             return XDP_DROP;
0383         if (hdr->ipv6->version != 6)
0384             return XDP_DROP;
0385 
0386         /* XXX: Extension headers are not supported and could circumvent
0387          * XDP SYN flood protection.
0388          */
0389         if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
0390             return XDP_PASS;
0391 
0392         hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
0393         break;
0394     default:
0395         /* XXX: VLANs will circumvent XDP SYN flood protection. */
0396         return XDP_PASS;
0397     }
0398 
0399     if (hdr->tcp + 1 > data_end)
0400         return XDP_DROP;
0401     hdr->tcp_len = hdr->tcp->doff * 4;
0402     if (hdr->tcp_len < sizeof(*hdr->tcp))
0403         return XDP_DROP;
0404 
0405     return XDP_TX;
0406 }
0407 
0408 static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
0409 {
0410     struct bpf_ct_opts___local ct_lookup_opts = {
0411         .netns_id = BPF_F_CURRENT_NETNS,
0412         .l4proto = IPPROTO_TCP,
0413     };
0414     struct bpf_sock_tuple tup = {};
0415     struct nf_conn *ct;
0416     __u32 tup_size;
0417 
0418     if (hdr->ipv4) {
0419         /* TCP doesn't normally use fragments, and XDP can't reassemble
0420          * them.
0421          */
0422         if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF))
0423             return XDP_DROP;
0424 
0425         tup.ipv4.saddr = hdr->ipv4->saddr;
0426         tup.ipv4.daddr = hdr->ipv4->daddr;
0427         tup.ipv4.sport = hdr->tcp->source;
0428         tup.ipv4.dport = hdr->tcp->dest;
0429         tup_size = sizeof(tup.ipv4);
0430     } else if (hdr->ipv6) {
0431         __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
0432         __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
0433         tup.ipv6.sport = hdr->tcp->source;
0434         tup.ipv6.dport = hdr->tcp->dest;
0435         tup_size = sizeof(tup.ipv6);
0436     } else {
0437         /* The verifier can't track that either ipv4 or ipv6 is not
0438          * NULL.
0439          */
0440         return XDP_ABORTED;
0441     }
0442     if (xdp)
0443         ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
0444     else
0445         ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
0446     if (ct) {
0447         unsigned long status = ct->status;
0448 
0449         bpf_ct_release(ct);
0450         if (status & IPS_CONFIRMED_BIT)
0451             return XDP_PASS;
0452     } else if (ct_lookup_opts.error != -ENOENT) {
0453         return XDP_ABORTED;
0454     }
0455 
0456     /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
0457     return XDP_TX;
0458 }
0459 
0460 static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
0461                       __u8 wscale)
0462 {
0463     __be32 *start = buf;
0464 
0465     *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
0466 
0467     if (!tsopt)
0468         return buf - start;
0469 
0470     if (tsopt[0] & bpf_htonl(1 << 4))
0471         *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
0472                    (TCPOLEN_SACK_PERM << 16) |
0473                    (TCPOPT_TIMESTAMP << 8) |
0474                    TCPOLEN_TIMESTAMP);
0475     else
0476         *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
0477                    (TCPOPT_NOP << 16) |
0478                    (TCPOPT_TIMESTAMP << 8) |
0479                    TCPOLEN_TIMESTAMP);
0480     *buf++ = tsopt[0];
0481     *buf++ = tsopt[1];
0482 
0483     if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
0484         *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
0485                    (TCPOPT_WINDOW << 16) |
0486                    (TCPOLEN_WINDOW << 8) |
0487                    wscale);
0488 
0489     return buf - start;
0490 }
0491 
0492 static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
0493                        __u32 cookie, __be32 *tsopt,
0494                        __u16 mss, __u8 wscale)
0495 {
0496     void *tcp_options;
0497 
0498     tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
0499     if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
0500         tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
0501     tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
0502     swap(tcp_header->source, tcp_header->dest);
0503     tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
0504     tcp_header->seq = bpf_htonl(cookie);
0505     tcp_header->window = 0;
0506     tcp_header->urg_ptr = 0;
0507     tcp_header->check = 0; /* Calculate checksum later. */
0508 
0509     tcp_options = (void *)(tcp_header + 1);
0510     tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
0511 }
0512 
0513 static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
0514                          __u32 cookie, __be32 *tsopt)
0515 {
0516     __u8 wscale;
0517     __u16 mss;
0518     __u8 ttl;
0519 
0520     values_get_tcpipopts(&mss, &wscale, &ttl, false);
0521 
0522     swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
0523 
0524     swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
0525     hdr->ipv4->check = 0; /* Calculate checksum later. */
0526     hdr->ipv4->tos = 0;
0527     hdr->ipv4->id = 0;
0528     hdr->ipv4->ttl = ttl;
0529 
0530     tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
0531 
0532     hdr->tcp_len = hdr->tcp->doff * 4;
0533     hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
0534 }
0535 
0536 static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
0537                          __u32 cookie, __be32 *tsopt)
0538 {
0539     __u8 wscale;
0540     __u16 mss;
0541     __u8 ttl;
0542 
0543     values_get_tcpipopts(&mss, &wscale, &ttl, true);
0544 
0545     swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
0546 
0547     swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
0548     *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
0549     hdr->ipv6->hop_limit = ttl;
0550 
0551     tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
0552 
0553     hdr->tcp_len = hdr->tcp->doff * 4;
0554     hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
0555 }
0556 
0557 static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
0558                         void *ctx,
0559                         void *data, void *data_end,
0560                         bool xdp)
0561 {
0562     __u32 old_pkt_size, new_pkt_size;
0563     /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
0564      * BPF verifier if tsopt is not volatile. Volatile forces it to store
0565      * the pointer value and use it directly, otherwise tcp_mkoptions is
0566      * (mis)compiled like this:
0567      *   if (!tsopt)
0568      *       return buf - start;
0569      *   reg = stored_return_value_of_tscookie_init;
0570      *   if (reg)
0571      *       tsopt = tsopt_buf;
0572      *   else
0573      *       tsopt = NULL;
0574      *   ...
0575      *   *buf++ = tsopt[1];
0576      * It creates a dead branch where tsopt is assigned NULL, but the
0577      * verifier can't prove it's dead and blocks the program.
0578      */
0579     __be32 * volatile tsopt = NULL;
0580     __be32 tsopt_buf[2] = {};
0581     __u16 ip_len;
0582     __u32 cookie;
0583     __s64 value;
0584 
0585     /* Checksum is not yet verified, but both checksum failure and TCP
0586      * header checks return XDP_DROP, so the order doesn't matter.
0587      */
0588     if (hdr->tcp->fin || hdr->tcp->rst)
0589         return XDP_DROP;
0590 
0591     /* Issue SYN cookies on allowed ports, drop SYN packets on blocked
0592      * ports.
0593      */
0594     if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
0595         return XDP_DROP;
0596 
0597     if (hdr->ipv4) {
0598         /* Check the IPv4 and TCP checksums before creating a SYNACK. */
0599         value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
0600         if (value < 0)
0601             return XDP_ABORTED;
0602         if (csum_fold(value) != 0)
0603             return XDP_DROP; /* Bad IPv4 checksum. */
0604 
0605         value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
0606         if (value < 0)
0607             return XDP_ABORTED;
0608         if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
0609                       hdr->tcp_len, IPPROTO_TCP, value) != 0)
0610             return XDP_DROP; /* Bad TCP checksum. */
0611 
0612         ip_len = sizeof(*hdr->ipv4);
0613 
0614         value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
0615                                hdr->tcp_len);
0616     } else if (hdr->ipv6) {
0617         /* Check the TCP checksum before creating a SYNACK. */
0618         value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
0619         if (value < 0)
0620             return XDP_ABORTED;
0621         if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
0622                     hdr->tcp_len, IPPROTO_TCP, value) != 0)
0623             return XDP_DROP; /* Bad TCP checksum. */
0624 
0625         ip_len = sizeof(*hdr->ipv6);
0626 
0627         value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
0628                                hdr->tcp_len);
0629     } else {
0630         return XDP_ABORTED;
0631     }
0632 
0633     if (value < 0)
0634         return XDP_ABORTED;
0635     cookie = (__u32)value;
0636 
0637     if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
0638               &tsopt_buf[0], &tsopt_buf[1], data_end))
0639         tsopt = tsopt_buf;
0640 
0641     /* Check that there is enough space for a SYNACK. It also covers
0642      * the check that the destination of the __builtin_memmove below
0643      * doesn't overflow.
0644      */
0645     if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
0646         return XDP_ABORTED;
0647 
0648     if (hdr->ipv4) {
0649         if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
0650             struct tcphdr *new_tcp_header;
0651 
0652             new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
0653             __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
0654             hdr->tcp = new_tcp_header;
0655 
0656             hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
0657         }
0658 
0659         tcpv4_gen_synack(hdr, cookie, tsopt);
0660     } else if (hdr->ipv6) {
0661         tcpv6_gen_synack(hdr, cookie, tsopt);
0662     } else {
0663         return XDP_ABORTED;
0664     }
0665 
0666     /* Recalculate checksums. */
0667     hdr->tcp->check = 0;
0668     value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
0669     if (value < 0)
0670         return XDP_ABORTED;
0671     if (hdr->ipv4) {
0672         hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
0673                             hdr->ipv4->daddr,
0674                             hdr->tcp_len,
0675                             IPPROTO_TCP,
0676                             value);
0677 
0678         hdr->ipv4->check = 0;
0679         value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
0680         if (value < 0)
0681             return XDP_ABORTED;
0682         hdr->ipv4->check = csum_fold(value);
0683     } else if (hdr->ipv6) {
0684         hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
0685                           &hdr->ipv6->daddr,
0686                           hdr->tcp_len,
0687                           IPPROTO_TCP,
0688                           value);
0689     } else {
0690         return XDP_ABORTED;
0691     }
0692 
0693     /* Set the new packet size. */
0694     old_pkt_size = data_end - data;
0695     new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
0696     if (xdp) {
0697         if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
0698             return XDP_ABORTED;
0699     } else {
0700         if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
0701             return XDP_ABORTED;
0702     }
0703 
0704     values_inc_synacks();
0705 
0706     return XDP_TX;
0707 }
0708 
0709 static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
0710 {
0711     int err;
0712 
0713     if (hdr->tcp->rst)
0714         return XDP_DROP;
0715 
0716     if (hdr->ipv4)
0717         err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
0718     else if (hdr->ipv6)
0719         err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
0720     else
0721         return XDP_ABORTED;
0722     if (err)
0723         return XDP_DROP;
0724 
0725     return XDP_PASS;
0726 }
0727 
0728 static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
0729                        struct header_pointers *hdr, bool xdp)
0730 {
0731     int ret;
0732 
0733     ret = tcp_dissect(data, data_end, hdr);
0734     if (ret != XDP_TX)
0735         return ret;
0736 
0737     ret = tcp_lookup(ctx, hdr, xdp);
0738     if (ret != XDP_TX)
0739         return ret;
0740 
0741     /* Packet is TCP and doesn't belong to an established connection. */
0742 
0743     if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
0744         return XDP_DROP;
0745 
0746     /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
0747      * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
0748      */
0749     if (xdp) {
0750         if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
0751             return XDP_ABORTED;
0752     } else {
0753         /* Without volatile the verifier throws this error:
0754          * R9 32-bit pointer arithmetic prohibited
0755          */
0756         volatile u64 old_len = data_end - data;
0757 
0758         if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
0759             return XDP_ABORTED;
0760     }
0761 
0762     return XDP_TX;
0763 }
0764 
0765 static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
0766                        struct header_pointers *hdr, bool xdp)
0767 {
0768     if (hdr->ipv4) {
0769         hdr->eth = data;
0770         hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
0771         /* IPV4_MAXLEN is needed when calculating checksum.
0772          * At least sizeof(struct iphdr) is needed here to access ihl.
0773          */
0774         if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
0775             return XDP_ABORTED;
0776         hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
0777     } else if (hdr->ipv6) {
0778         hdr->eth = data;
0779         hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
0780         hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
0781     } else {
0782         return XDP_ABORTED;
0783     }
0784 
0785     if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
0786         return XDP_ABORTED;
0787 
0788     /* We run out of registers, tcp_len gets spilled to the stack, and the
0789      * verifier forgets its min and max values checked above in tcp_dissect.
0790      */
0791     hdr->tcp_len = hdr->tcp->doff * 4;
0792     if (hdr->tcp_len < sizeof(*hdr->tcp))
0793         return XDP_ABORTED;
0794 
0795     return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
0796                    syncookie_handle_ack(hdr);
0797 }
0798 
0799 SEC("xdp")
0800 int syncookie_xdp(struct xdp_md *ctx)
0801 {
0802     void *data_end = (void *)(long)ctx->data_end;
0803     void *data = (void *)(long)ctx->data;
0804     struct header_pointers hdr;
0805     int ret;
0806 
0807     ret = syncookie_part1(ctx, data, data_end, &hdr, true);
0808     if (ret != XDP_TX)
0809         return ret;
0810 
0811     data_end = (void *)(long)ctx->data_end;
0812     data = (void *)(long)ctx->data;
0813 
0814     return syncookie_part2(ctx, data, data_end, &hdr, true);
0815 }
0816 
0817 SEC("tc")
0818 int syncookie_tc(struct __sk_buff *skb)
0819 {
0820     void *data_end = (void *)(long)skb->data_end;
0821     void *data = (void *)(long)skb->data;
0822     struct header_pointers hdr;
0823     int ret;
0824 
0825     ret = syncookie_part1(skb, data, data_end, &hdr, false);
0826     if (ret != XDP_TX)
0827         return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
0828 
0829     data_end = (void *)(long)skb->data_end;
0830     data = (void *)(long)skb->data;
0831 
0832     ret = syncookie_part2(skb, data, data_end, &hdr, false);
0833     switch (ret) {
0834     case XDP_PASS:
0835         return TC_ACT_OK;
0836     case XDP_TX:
0837         return bpf_redirect(skb->ifindex, 0);
0838     default:
0839         return TC_ACT_SHOT;
0840     }
0841 }
0842 
0843 char _license[] SEC("license") = "GPL";