0001
0002
0003 #include <stddef.h>
0004 #include <stdbool.h>
0005 #include <string.h>
0006 #include <linux/pkt_cls.h>
0007 #include <linux/bpf.h>
0008 #include <linux/in.h>
0009 #include <linux/if_ether.h>
0010 #include <linux/ip.h>
0011 #include <linux/ipv6.h>
0012 #include <linux/icmp.h>
0013 #include <linux/icmpv6.h>
0014 #include <linux/tcp.h>
0015 #include <linux/udp.h>
0016 #include <bpf/bpf_helpers.h>
0017 #include <bpf/bpf_endian.h>
0018
0019 static __always_inline __u32 rol32(__u32 word, unsigned int shift)
0020 {
0021 return (word << shift) | (word >> ((-shift) & 31));
0022 }
0023
0024
0025
0026
0027 #define __jhash_mix(a, b, c) \
0028 { \
0029 a -= c; a ^= rol32(c, 4); c += b; \
0030 b -= a; b ^= rol32(a, 6); a += c; \
0031 c -= b; c ^= rol32(b, 8); b += a; \
0032 a -= c; a ^= rol32(c, 16); c += b; \
0033 b -= a; b ^= rol32(a, 19); a += c; \
0034 c -= b; c ^= rol32(b, 4); b += a; \
0035 }
0036
0037 #define __jhash_final(a, b, c) \
0038 { \
0039 c ^= b; c -= rol32(b, 14); \
0040 a ^= c; a -= rol32(c, 11); \
0041 b ^= a; b -= rol32(a, 25); \
0042 c ^= b; c -= rol32(b, 16); \
0043 a ^= c; a -= rol32(c, 4); \
0044 b ^= a; b -= rol32(a, 14); \
0045 c ^= b; c -= rol32(b, 24); \
0046 }
0047
0048 #define JHASH_INITVAL 0xdeadbeef
0049
0050 typedef unsigned int u32;
0051
0052 static __noinline
0053 u32 jhash(const void *key, u32 length, u32 initval)
0054 {
0055 u32 a, b, c;
0056 const unsigned char *k = key;
0057
0058 a = b = c = JHASH_INITVAL + length + initval;
0059
0060 while (length > 12) {
0061 a += *(u32 *)(k);
0062 b += *(u32 *)(k + 4);
0063 c += *(u32 *)(k + 8);
0064 __jhash_mix(a, b, c);
0065 length -= 12;
0066 k += 12;
0067 }
0068 switch (length) {
0069 case 12: c += (u32)k[11]<<24;
0070 case 11: c += (u32)k[10]<<16;
0071 case 10: c += (u32)k[9]<<8;
0072 case 9: c += k[8];
0073 case 8: b += (u32)k[7]<<24;
0074 case 7: b += (u32)k[6]<<16;
0075 case 6: b += (u32)k[5]<<8;
0076 case 5: b += k[4];
0077 case 4: a += (u32)k[3]<<24;
0078 case 3: a += (u32)k[2]<<16;
0079 case 2: a += (u32)k[1]<<8;
0080 case 1: a += k[0];
0081 __jhash_final(a, b, c);
0082 case 0:
0083 break;
0084 }
0085
0086 return c;
0087 }
0088
0089 __noinline
0090 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
0091 {
0092 a += initval;
0093 b += initval;
0094 c += initval;
0095 __jhash_final(a, b, c);
0096 return c;
0097 }
0098
0099 __noinline
0100 u32 jhash_2words(u32 a, u32 b, u32 initval)
0101 {
0102 return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
0103 }
0104
0105 struct flow_key {
0106 union {
0107 __be32 src;
0108 __be32 srcv6[4];
0109 };
0110 union {
0111 __be32 dst;
0112 __be32 dstv6[4];
0113 };
0114 union {
0115 __u32 ports;
0116 __u16 port16[2];
0117 };
0118 __u8 proto;
0119 };
0120
0121 struct packet_description {
0122 struct flow_key flow;
0123 __u8 flags;
0124 };
0125
0126 struct ctl_value {
0127 union {
0128 __u64 value;
0129 __u32 ifindex;
0130 __u8 mac[6];
0131 };
0132 };
0133
0134 struct vip_definition {
0135 union {
0136 __be32 vip;
0137 __be32 vipv6[4];
0138 };
0139 __u16 port;
0140 __u16 family;
0141 __u8 proto;
0142 };
0143
0144 struct vip_meta {
0145 __u32 flags;
0146 __u32 vip_num;
0147 };
0148
0149 struct real_pos_lru {
0150 __u32 pos;
0151 __u64 atime;
0152 };
0153
0154 struct real_definition {
0155 union {
0156 __be32 dst;
0157 __be32 dstv6[4];
0158 };
0159 __u8 flags;
0160 };
0161
0162 struct lb_stats {
0163 __u64 v2;
0164 __u64 v1;
0165 };
0166
0167 struct {
0168 __uint(type, BPF_MAP_TYPE_HASH);
0169 __uint(max_entries, 512);
0170 __type(key, struct vip_definition);
0171 __type(value, struct vip_meta);
0172 } vip_map SEC(".maps");
0173
0174 struct {
0175 __uint(type, BPF_MAP_TYPE_LRU_HASH);
0176 __uint(max_entries, 300);
0177 __uint(map_flags, 1U << 1);
0178 __type(key, struct flow_key);
0179 __type(value, struct real_pos_lru);
0180 } lru_cache SEC(".maps");
0181
0182 struct {
0183 __uint(type, BPF_MAP_TYPE_ARRAY);
0184 __uint(max_entries, 12 * 655);
0185 __type(key, __u32);
0186 __type(value, __u32);
0187 } ch_rings SEC(".maps");
0188
0189 struct {
0190 __uint(type, BPF_MAP_TYPE_ARRAY);
0191 __uint(max_entries, 40);
0192 __type(key, __u32);
0193 __type(value, struct real_definition);
0194 } reals SEC(".maps");
0195
0196 struct {
0197 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
0198 __uint(max_entries, 515);
0199 __type(key, __u32);
0200 __type(value, struct lb_stats);
0201 } stats SEC(".maps");
0202
0203 struct {
0204 __uint(type, BPF_MAP_TYPE_ARRAY);
0205 __uint(max_entries, 16);
0206 __type(key, __u32);
0207 __type(value, struct ctl_value);
0208 } ctl_array SEC(".maps");
0209
0210 struct eth_hdr {
0211 unsigned char eth_dest[6];
0212 unsigned char eth_source[6];
0213 unsigned short eth_proto;
0214 };
0215
0216 static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
0217 {
0218 __u64 off = sizeof(struct eth_hdr);
0219 if (is_ipv6) {
0220 off += sizeof(struct ipv6hdr);
0221 if (is_icmp)
0222 off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
0223 } else {
0224 off += sizeof(struct iphdr);
0225 if (is_icmp)
0226 off += sizeof(struct icmphdr) + sizeof(struct iphdr);
0227 }
0228 return off;
0229 }
0230
0231 static __attribute__ ((noinline))
0232 bool parse_udp(void *data, void *data_end,
0233 bool is_ipv6, struct packet_description *pckt)
0234 {
0235
0236 bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
0237 __u64 off = calc_offset(is_ipv6, is_icmp);
0238 struct udphdr *udp;
0239 udp = data + off;
0240
0241 if (udp + 1 > data_end)
0242 return false;
0243 if (!is_icmp) {
0244 pckt->flow.port16[0] = udp->source;
0245 pckt->flow.port16[1] = udp->dest;
0246 } else {
0247 pckt->flow.port16[0] = udp->dest;
0248 pckt->flow.port16[1] = udp->source;
0249 }
0250 return true;
0251 }
0252
0253 static __attribute__ ((noinline))
0254 bool parse_tcp(void *data, void *data_end,
0255 bool is_ipv6, struct packet_description *pckt)
0256 {
0257
0258 bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
0259 __u64 off = calc_offset(is_ipv6, is_icmp);
0260 struct tcphdr *tcp;
0261
0262 tcp = data + off;
0263 if (tcp + 1 > data_end)
0264 return false;
0265 if (tcp->syn)
0266 pckt->flags |= (1 << 1);
0267 if (!is_icmp) {
0268 pckt->flow.port16[0] = tcp->source;
0269 pckt->flow.port16[1] = tcp->dest;
0270 } else {
0271 pckt->flow.port16[0] = tcp->dest;
0272 pckt->flow.port16[1] = tcp->source;
0273 }
0274 return true;
0275 }
0276
0277 static __attribute__ ((noinline))
0278 bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
0279 struct packet_description *pckt,
0280 struct real_definition *dst, __u32 pkt_bytes)
0281 {
0282 struct eth_hdr *new_eth;
0283 struct eth_hdr *old_eth;
0284 struct ipv6hdr *ip6h;
0285 __u32 ip_suffix;
0286 void *data_end;
0287 void *data;
0288
0289 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
0290 return false;
0291 data = (void *)(long)xdp->data;
0292 data_end = (void *)(long)xdp->data_end;
0293 new_eth = data;
0294 ip6h = data + sizeof(struct eth_hdr);
0295 old_eth = data + sizeof(struct ipv6hdr);
0296 if (new_eth + 1 > data_end ||
0297 old_eth + 1 > data_end || ip6h + 1 > data_end)
0298 return false;
0299 memcpy(new_eth->eth_dest, cval->mac, 6);
0300 memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
0301 new_eth->eth_proto = 56710;
0302 ip6h->version = 6;
0303 ip6h->priority = 0;
0304 memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
0305
0306 ip6h->nexthdr = IPPROTO_IPV6;
0307 ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
0308 ip6h->payload_len =
0309 bpf_htons(pkt_bytes + sizeof(struct ipv6hdr));
0310 ip6h->hop_limit = 4;
0311
0312 ip6h->saddr.in6_u.u6_addr32[0] = 1;
0313 ip6h->saddr.in6_u.u6_addr32[1] = 2;
0314 ip6h->saddr.in6_u.u6_addr32[2] = 3;
0315 ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
0316 memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
0317 return true;
0318 }
0319
0320 static __attribute__ ((noinline))
0321 bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
0322 struct packet_description *pckt,
0323 struct real_definition *dst, __u32 pkt_bytes)
0324 {
0325
0326 __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]);
0327 struct eth_hdr *new_eth;
0328 struct eth_hdr *old_eth;
0329 __u16 *next_iph_u16;
0330 struct iphdr *iph;
0331 __u32 csum = 0;
0332 void *data_end;
0333 void *data;
0334
0335 ip_suffix <<= 15;
0336 ip_suffix ^= pckt->flow.src;
0337 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
0338 return false;
0339 data = (void *)(long)xdp->data;
0340 data_end = (void *)(long)xdp->data_end;
0341 new_eth = data;
0342 iph = data + sizeof(struct eth_hdr);
0343 old_eth = data + sizeof(struct iphdr);
0344 if (new_eth + 1 > data_end ||
0345 old_eth + 1 > data_end || iph + 1 > data_end)
0346 return false;
0347 memcpy(new_eth->eth_dest, cval->mac, 6);
0348 memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
0349 new_eth->eth_proto = 8;
0350 iph->version = 4;
0351 iph->ihl = 5;
0352 iph->frag_off = 0;
0353 iph->protocol = IPPROTO_IPIP;
0354 iph->check = 0;
0355 iph->tos = 1;
0356 iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr));
0357
0358
0359
0360
0361 iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
0362 iph->ttl = 4;
0363
0364 next_iph_u16 = (__u16 *) iph;
0365 #pragma clang loop unroll(full)
0366 for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
0367 csum += *next_iph_u16++;
0368 iph->check = ~((csum & 0xffff) + (csum >> 16));
0369 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
0370 return false;
0371 return true;
0372 }
0373
0374 static __attribute__ ((noinline))
0375 bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
0376 {
0377 struct eth_hdr *new_eth;
0378 struct eth_hdr *old_eth;
0379
0380 old_eth = *data;
0381 new_eth = *data + sizeof(struct ipv6hdr);
0382 memcpy(new_eth->eth_source, old_eth->eth_source, 6);
0383 memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
0384 if (inner_v4)
0385 new_eth->eth_proto = 8;
0386 else
0387 new_eth->eth_proto = 56710;
0388 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
0389 return false;
0390 *data = (void *)(long)xdp->data;
0391 *data_end = (void *)(long)xdp->data_end;
0392 return true;
0393 }
0394
0395 static __attribute__ ((noinline))
0396 bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
0397 {
0398 struct eth_hdr *new_eth;
0399 struct eth_hdr *old_eth;
0400
0401 old_eth = *data;
0402 new_eth = *data + sizeof(struct iphdr);
0403 memcpy(new_eth->eth_source, old_eth->eth_source, 6);
0404 memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
0405 new_eth->eth_proto = 8;
0406 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
0407 return false;
0408 *data = (void *)(long)xdp->data;
0409 *data_end = (void *)(long)xdp->data_end;
0410 return true;
0411 }
0412
0413 static __attribute__ ((noinline))
0414 int swap_mac_and_send(void *data, void *data_end)
0415 {
0416 unsigned char tmp_mac[6];
0417 struct eth_hdr *eth;
0418
0419 eth = data;
0420 memcpy(tmp_mac, eth->eth_source, 6);
0421 memcpy(eth->eth_source, eth->eth_dest, 6);
0422 memcpy(eth->eth_dest, tmp_mac, 6);
0423 return XDP_TX;
0424 }
0425
0426 static __attribute__ ((noinline))
0427 int send_icmp_reply(void *data, void *data_end)
0428 {
0429 struct icmphdr *icmp_hdr;
0430 __u16 *next_iph_u16;
0431 __u32 tmp_addr = 0;
0432 struct iphdr *iph;
0433 __u32 csum1 = 0;
0434 __u32 csum = 0;
0435 __u64 off = 0;
0436
0437 if (data + sizeof(struct eth_hdr)
0438 + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
0439 return XDP_DROP;
0440 off += sizeof(struct eth_hdr);
0441 iph = data + off;
0442 off += sizeof(struct iphdr);
0443 icmp_hdr = data + off;
0444 icmp_hdr->type = 0;
0445 icmp_hdr->checksum += 0x0007;
0446 iph->ttl = 4;
0447 tmp_addr = iph->daddr;
0448 iph->daddr = iph->saddr;
0449 iph->saddr = tmp_addr;
0450 iph->check = 0;
0451 next_iph_u16 = (__u16 *) iph;
0452 #pragma clang loop unroll(full)
0453 for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
0454 csum += *next_iph_u16++;
0455 iph->check = ~((csum & 0xffff) + (csum >> 16));
0456 return swap_mac_and_send(data, data_end);
0457 }
0458
0459 static __attribute__ ((noinline))
0460 int send_icmp6_reply(void *data, void *data_end)
0461 {
0462 struct icmp6hdr *icmp_hdr;
0463 struct ipv6hdr *ip6h;
0464 __be32 tmp_addr[4];
0465 __u64 off = 0;
0466
0467 if (data + sizeof(struct eth_hdr)
0468 + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
0469 return XDP_DROP;
0470 off += sizeof(struct eth_hdr);
0471 ip6h = data + off;
0472 off += sizeof(struct ipv6hdr);
0473 icmp_hdr = data + off;
0474 icmp_hdr->icmp6_type = 129;
0475 icmp_hdr->icmp6_cksum -= 0x0001;
0476 ip6h->hop_limit = 4;
0477 memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
0478 memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
0479 memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
0480 return swap_mac_and_send(data, data_end);
0481 }
0482
0483 static __attribute__ ((noinline))
0484 int parse_icmpv6(void *data, void *data_end, __u64 off,
0485 struct packet_description *pckt)
0486 {
0487 struct icmp6hdr *icmp_hdr;
0488 struct ipv6hdr *ip6h;
0489
0490 icmp_hdr = data + off;
0491 if (icmp_hdr + 1 > data_end)
0492 return XDP_DROP;
0493 if (icmp_hdr->icmp6_type == 128)
0494 return send_icmp6_reply(data, data_end);
0495 if (icmp_hdr->icmp6_type != 3)
0496 return XDP_PASS;
0497 off += sizeof(struct icmp6hdr);
0498 ip6h = data + off;
0499 if (ip6h + 1 > data_end)
0500 return XDP_DROP;
0501 pckt->flow.proto = ip6h->nexthdr;
0502 pckt->flags |= (1 << 0);
0503 memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
0504 memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
0505 return -1;
0506 }
0507
0508 static __attribute__ ((noinline))
0509 int parse_icmp(void *data, void *data_end, __u64 off,
0510 struct packet_description *pckt)
0511 {
0512 struct icmphdr *icmp_hdr;
0513 struct iphdr *iph;
0514
0515 icmp_hdr = data + off;
0516 if (icmp_hdr + 1 > data_end)
0517 return XDP_DROP;
0518 if (icmp_hdr->type == 8)
0519 return send_icmp_reply(data, data_end);
0520 if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
0521 return XDP_PASS;
0522 off += sizeof(struct icmphdr);
0523 iph = data + off;
0524 if (iph + 1 > data_end)
0525 return XDP_DROP;
0526 if (iph->ihl != 5)
0527 return XDP_DROP;
0528 pckt->flow.proto = iph->protocol;
0529 pckt->flags |= (1 << 0);
0530 pckt->flow.src = iph->daddr;
0531 pckt->flow.dst = iph->saddr;
0532 return -1;
0533 }
0534
0535 static __attribute__ ((noinline))
0536 __u32 get_packet_hash(struct packet_description *pckt,
0537 bool hash_16bytes)
0538 {
0539 if (hash_16bytes)
0540 return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
0541 pckt->flow.ports, 24);
0542 else
0543 return jhash_2words(pckt->flow.src, pckt->flow.ports,
0544 24);
0545 }
0546
0547 __attribute__ ((noinline))
0548 static bool get_packet_dst(struct real_definition **real,
0549 struct packet_description *pckt,
0550 struct vip_meta *vip_info,
0551 bool is_ipv6, void *lru_map)
0552 {
0553 struct real_pos_lru new_dst_lru = { };
0554 bool hash_16bytes = is_ipv6;
0555 __u32 *real_pos, hash, key;
0556 __u64 cur_time;
0557
0558 if (vip_info->flags & (1 << 2))
0559 hash_16bytes = 1;
0560 if (vip_info->flags & (1 << 3)) {
0561 pckt->flow.port16[0] = pckt->flow.port16[1];
0562 memset(pckt->flow.srcv6, 0, 16);
0563 }
0564 hash = get_packet_hash(pckt, hash_16bytes);
0565 if (hash != 0x358459b7 &&
0566 hash != 0x2f4bc6bb )
0567 return false;
0568 key = 2 * vip_info->vip_num + hash % 2;
0569 real_pos = bpf_map_lookup_elem(&ch_rings, &key);
0570 if (!real_pos)
0571 return false;
0572 key = *real_pos;
0573 *real = bpf_map_lookup_elem(&reals, &key);
0574 if (!(*real))
0575 return false;
0576 if (!(vip_info->flags & (1 << 1))) {
0577 __u32 conn_rate_key = 512 + 2;
0578 struct lb_stats *conn_rate_stats =
0579 bpf_map_lookup_elem(&stats, &conn_rate_key);
0580
0581 if (!conn_rate_stats)
0582 return true;
0583 cur_time = bpf_ktime_get_ns();
0584 if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
0585 conn_rate_stats->v1 = 1;
0586 conn_rate_stats->v2 = cur_time;
0587 } else {
0588 conn_rate_stats->v1 += 1;
0589 if (conn_rate_stats->v1 >= 1)
0590 return true;
0591 }
0592 if (pckt->flow.proto == IPPROTO_UDP)
0593 new_dst_lru.atime = cur_time;
0594 new_dst_lru.pos = key;
0595 bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
0596 }
0597 return true;
0598 }
0599
0600 __attribute__ ((noinline))
0601 static void connection_table_lookup(struct real_definition **real,
0602 struct packet_description *pckt,
0603 void *lru_map)
0604 {
0605
0606 struct real_pos_lru *dst_lru;
0607 __u64 cur_time;
0608 __u32 key;
0609
0610 dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
0611 if (!dst_lru)
0612 return;
0613 if (pckt->flow.proto == IPPROTO_UDP) {
0614 cur_time = bpf_ktime_get_ns();
0615 if (cur_time - dst_lru->atime > 300000)
0616 return;
0617 dst_lru->atime = cur_time;
0618 }
0619 key = dst_lru->pos;
0620 *real = bpf_map_lookup_elem(&reals, &key);
0621 }
0622
0623
0624
0625
0626
0627 __attribute__ ((noinline))
0628 static int process_l3_headers_v6(struct packet_description *pckt,
0629 __u8 *protocol, __u64 off,
0630 __u16 *pkt_bytes, void *data,
0631 void *data_end)
0632 {
0633 struct ipv6hdr *ip6h;
0634 __u64 iph_len;
0635 int action;
0636
0637 ip6h = data + off;
0638 if (ip6h + 1 > data_end)
0639 return XDP_DROP;
0640 iph_len = sizeof(struct ipv6hdr);
0641 *protocol = ip6h->nexthdr;
0642 pckt->flow.proto = *protocol;
0643 *pkt_bytes = bpf_ntohs(ip6h->payload_len);
0644 off += iph_len;
0645 if (*protocol == 45) {
0646 return XDP_DROP;
0647 } else if (*protocol == 59) {
0648 action = parse_icmpv6(data, data_end, off, pckt);
0649 if (action >= 0)
0650 return action;
0651 } else {
0652 memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
0653 memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
0654 }
0655 return -1;
0656 }
0657
0658 __attribute__ ((noinline))
0659 static int process_l3_headers_v4(struct packet_description *pckt,
0660 __u8 *protocol, __u64 off,
0661 __u16 *pkt_bytes, void *data,
0662 void *data_end)
0663 {
0664 struct iphdr *iph;
0665 __u64 iph_len;
0666 int action;
0667
0668 iph = data + off;
0669 if (iph + 1 > data_end)
0670 return XDP_DROP;
0671 if (iph->ihl != 5)
0672 return XDP_DROP;
0673 *protocol = iph->protocol;
0674 pckt->flow.proto = *protocol;
0675 *pkt_bytes = bpf_ntohs(iph->tot_len);
0676 off += 20;
0677 if (iph->frag_off & 65343)
0678 return XDP_DROP;
0679 if (*protocol == IPPROTO_ICMP) {
0680 action = parse_icmp(data, data_end, off, pckt);
0681 if (action >= 0)
0682 return action;
0683 } else {
0684 pckt->flow.src = iph->saddr;
0685 pckt->flow.dst = iph->daddr;
0686 }
0687 return -1;
0688 }
0689
0690 __attribute__ ((noinline))
0691 static int process_packet(void *data, __u64 off, void *data_end,
0692 bool is_ipv6, struct xdp_md *xdp)
0693 {
0694
0695 struct real_definition *dst = NULL;
0696 struct packet_description pckt = { };
0697 struct vip_definition vip = { };
0698 struct lb_stats *data_stats;
0699 struct eth_hdr *eth = data;
0700 void *lru_map = &lru_cache;
0701 struct vip_meta *vip_info;
0702 __u32 lru_stats_key = 513;
0703 __u32 mac_addr_pos = 0;
0704 __u32 stats_key = 512;
0705 struct ctl_value *cval;
0706 __u16 pkt_bytes;
0707 __u64 iph_len;
0708 __u8 protocol;
0709 __u32 vip_num;
0710 int action;
0711
0712 if (is_ipv6)
0713 action = process_l3_headers_v6(&pckt, &protocol, off,
0714 &pkt_bytes, data, data_end);
0715 else
0716 action = process_l3_headers_v4(&pckt, &protocol, off,
0717 &pkt_bytes, data, data_end);
0718 if (action >= 0)
0719 return action;
0720 protocol = pckt.flow.proto;
0721 if (protocol == IPPROTO_TCP) {
0722 if (!parse_tcp(data, data_end, is_ipv6, &pckt))
0723 return XDP_DROP;
0724 } else if (protocol == IPPROTO_UDP) {
0725 if (!parse_udp(data, data_end, is_ipv6, &pckt))
0726 return XDP_DROP;
0727 } else {
0728 return XDP_TX;
0729 }
0730
0731 if (is_ipv6)
0732 memcpy(vip.vipv6, pckt.flow.dstv6, 16);
0733 else
0734 vip.vip = pckt.flow.dst;
0735 vip.port = pckt.flow.port16[1];
0736 vip.proto = pckt.flow.proto;
0737 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
0738 if (!vip_info) {
0739 vip.port = 0;
0740 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
0741 if (!vip_info)
0742 return XDP_PASS;
0743 if (!(vip_info->flags & (1 << 4)))
0744 pckt.flow.port16[1] = 0;
0745 }
0746 if (data_end - data > 1400)
0747 return XDP_DROP;
0748 data_stats = bpf_map_lookup_elem(&stats, &stats_key);
0749 if (!data_stats)
0750 return XDP_DROP;
0751 data_stats->v1 += 1;
0752 if (!dst) {
0753 if (vip_info->flags & (1 << 0))
0754 pckt.flow.port16[0] = 0;
0755 if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
0756 connection_table_lookup(&dst, &pckt, lru_map);
0757 if (dst)
0758 goto out;
0759 if (pckt.flow.proto == IPPROTO_TCP) {
0760 struct lb_stats *lru_stats =
0761 bpf_map_lookup_elem(&stats, &lru_stats_key);
0762
0763 if (!lru_stats)
0764 return XDP_DROP;
0765 if (pckt.flags & (1 << 1))
0766 lru_stats->v1 += 1;
0767 else
0768 lru_stats->v2 += 1;
0769 }
0770 if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
0771 return XDP_DROP;
0772 data_stats->v2 += 1;
0773 }
0774 out:
0775 cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
0776 if (!cval)
0777 return XDP_DROP;
0778 if (dst->flags & (1 << 0)) {
0779 if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
0780 return XDP_DROP;
0781 } else {
0782 if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
0783 return XDP_DROP;
0784 }
0785 vip_num = vip_info->vip_num;
0786 data_stats = bpf_map_lookup_elem(&stats, &vip_num);
0787 if (!data_stats)
0788 return XDP_DROP;
0789 data_stats->v1 += 1;
0790 data_stats->v2 += pkt_bytes;
0791
0792 data = (void *)(long)xdp->data;
0793 data_end = (void *)(long)xdp->data_end;
0794 if (data + 4 > data_end)
0795 return XDP_DROP;
0796 *(u32 *)data = dst->dst;
0797 return XDP_DROP;
0798 }
0799
0800 SEC("xdp")
0801 int balancer_ingress_v4(struct xdp_md *ctx)
0802 {
0803 void *data = (void *)(long)ctx->data;
0804 void *data_end = (void *)(long)ctx->data_end;
0805 struct eth_hdr *eth = data;
0806 __u32 eth_proto;
0807 __u32 nh_off;
0808
0809 nh_off = sizeof(struct eth_hdr);
0810 if (data + nh_off > data_end)
0811 return XDP_DROP;
0812 eth_proto = bpf_ntohs(eth->eth_proto);
0813 if (eth_proto == ETH_P_IP)
0814 return process_packet(data, nh_off, data_end, 0, ctx);
0815 else
0816 return XDP_DROP;
0817 }
0818
0819 SEC("xdp")
0820 int balancer_ingress_v6(struct xdp_md *ctx)
0821 {
0822 void *data = (void *)(long)ctx->data;
0823 void *data_end = (void *)(long)ctx->data_end;
0824 struct eth_hdr *eth = data;
0825 __u32 eth_proto;
0826 __u32 nh_off;
0827
0828 nh_off = sizeof(struct eth_hdr);
0829 if (data + nh_off > data_end)
0830 return XDP_DROP;
0831 eth_proto = bpf_ntohs(eth->eth_proto);
0832 if (eth_proto == ETH_P_IPV6)
0833 return process_packet(data, nh_off, data_end, 1, ctx);
0834 else
0835 return XDP_DROP;
0836 }
0837
0838 char _license[] SEC("license") = "GPL";