Back to home page

OSCL-LXR

 
 

    


0001 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
0002  *
0003  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
0004  */
0005 #include "vmlinux.h"
0006 #include "xdp_sample.bpf.h"
0007 #include "xdp_sample_shared.h"
0008 #include "hash_func01.h"
0009 
0010 /* Special map type that can XDP_REDIRECT frames to another CPU */
0011 struct {
0012     __uint(type, BPF_MAP_TYPE_CPUMAP);
0013     __uint(key_size, sizeof(u32));
0014     __uint(value_size, sizeof(struct bpf_cpumap_val));
0015 } cpu_map SEC(".maps");
0016 
0017 /* Set of maps controlling available CPU, and for iterating through
0018  * selectable redirect CPUs.
0019  */
0020 struct {
0021     __uint(type, BPF_MAP_TYPE_ARRAY);
0022     __type(key, u32);
0023     __type(value, u32);
0024 } cpus_available SEC(".maps");
0025 
0026 struct {
0027     __uint(type, BPF_MAP_TYPE_ARRAY);
0028     __type(key, u32);
0029     __type(value, u32);
0030     __uint(max_entries, 1);
0031 } cpus_count SEC(".maps");
0032 
0033 struct {
0034     __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
0035     __type(key, u32);
0036     __type(value, u32);
0037     __uint(max_entries, 1);
0038 } cpus_iterator SEC(".maps");
0039 
0040 struct {
0041     __uint(type, BPF_MAP_TYPE_DEVMAP);
0042     __uint(key_size, sizeof(int));
0043     __uint(value_size, sizeof(struct bpf_devmap_val));
0044     __uint(max_entries, 1);
0045 } tx_port SEC(".maps");
0046 
0047 char tx_mac_addr[ETH_ALEN];
0048 
0049 /* Helper parse functions */
0050 
0051 static __always_inline
0052 bool parse_eth(struct ethhdr *eth, void *data_end,
0053            u16 *eth_proto, u64 *l3_offset)
0054 {
0055     u16 eth_type;
0056     u64 offset;
0057 
0058     offset = sizeof(*eth);
0059     if ((void *)eth + offset > data_end)
0060         return false;
0061 
0062     eth_type = eth->h_proto;
0063 
0064     /* Skip non 802.3 Ethertypes */
0065     if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
0066         return false;
0067 
0068     /* Handle VLAN tagged packet */
0069     if (eth_type == bpf_htons(ETH_P_8021Q) ||
0070         eth_type == bpf_htons(ETH_P_8021AD)) {
0071         struct vlan_hdr *vlan_hdr;
0072 
0073         vlan_hdr = (void *)eth + offset;
0074         offset += sizeof(*vlan_hdr);
0075         if ((void *)eth + offset > data_end)
0076             return false;
0077         eth_type = vlan_hdr->h_vlan_encapsulated_proto;
0078     }
0079     /* Handle double VLAN tagged packet */
0080     if (eth_type == bpf_htons(ETH_P_8021Q) ||
0081         eth_type == bpf_htons(ETH_P_8021AD)) {
0082         struct vlan_hdr *vlan_hdr;
0083 
0084         vlan_hdr = (void *)eth + offset;
0085         offset += sizeof(*vlan_hdr);
0086         if ((void *)eth + offset > data_end)
0087             return false;
0088         eth_type = vlan_hdr->h_vlan_encapsulated_proto;
0089     }
0090 
0091     *eth_proto = bpf_ntohs(eth_type);
0092     *l3_offset = offset;
0093     return true;
0094 }
0095 
0096 static __always_inline
0097 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
0098 {
0099     void *data_end = (void *)(long)ctx->data_end;
0100     void *data     = (void *)(long)ctx->data;
0101     struct iphdr *iph = data + nh_off;
0102     struct udphdr *udph;
0103 
0104     if (iph + 1 > data_end)
0105         return 0;
0106     if (!(iph->protocol == IPPROTO_UDP))
0107         return 0;
0108 
0109     udph = (void *)(iph + 1);
0110     if (udph + 1 > data_end)
0111         return 0;
0112 
0113     return bpf_ntohs(udph->dest);
0114 }
0115 
0116 static __always_inline
0117 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
0118 {
0119     void *data_end = (void *)(long)ctx->data_end;
0120     void *data     = (void *)(long)ctx->data;
0121     struct iphdr *iph = data + nh_off;
0122 
0123     if (iph + 1 > data_end)
0124         return 0;
0125     return iph->protocol;
0126 }
0127 
0128 static __always_inline
0129 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
0130 {
0131     void *data_end = (void *)(long)ctx->data_end;
0132     void *data     = (void *)(long)ctx->data;
0133     struct ipv6hdr *ip6h = data + nh_off;
0134 
0135     if (ip6h + 1 > data_end)
0136         return 0;
0137     return ip6h->nexthdr;
0138 }
0139 
0140 SEC("xdp")
0141 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
0142 {
0143     u32 key = bpf_get_smp_processor_id();
0144     struct datarec *rec;
0145     u32 *cpu_selected;
0146     u32 cpu_dest = 0;
0147     u32 key0 = 0;
0148 
0149     /* Only use first entry in cpus_available */
0150     cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
0151     if (!cpu_selected)
0152         return XDP_ABORTED;
0153     cpu_dest = *cpu_selected;
0154 
0155     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0156     if (!rec)
0157         return XDP_PASS;
0158     NO_TEAR_INC(rec->processed);
0159 
0160     if (cpu_dest >= nr_cpus) {
0161         NO_TEAR_INC(rec->issue);
0162         return XDP_ABORTED;
0163     }
0164     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0165 }
0166 
0167 SEC("xdp")
0168 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
0169 {
0170     void *data_end = (void *)(long)ctx->data_end;
0171     void *data     = (void *)(long)ctx->data;
0172     u32 key = bpf_get_smp_processor_id();
0173     struct ethhdr *eth = data;
0174     struct datarec *rec;
0175     u32 *cpu_selected;
0176     u32 cpu_dest = 0;
0177     u32 key0 = 0;
0178     u16 eth_type;
0179 
0180     /* Only use first entry in cpus_available */
0181     cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
0182     if (!cpu_selected)
0183         return XDP_ABORTED;
0184     cpu_dest = *cpu_selected;
0185 
0186     /* Validate packet length is minimum Eth header size */
0187     if (eth + 1 > data_end)
0188         return XDP_ABORTED;
0189 
0190     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0191     if (!rec)
0192         return XDP_PASS;
0193     NO_TEAR_INC(rec->processed);
0194 
0195     /* Read packet data, and use it (drop non 802.3 Ethertypes) */
0196     eth_type = eth->h_proto;
0197     if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
0198         NO_TEAR_INC(rec->dropped);
0199         return XDP_DROP;
0200     }
0201 
0202     if (cpu_dest >= nr_cpus) {
0203         NO_TEAR_INC(rec->issue);
0204         return XDP_ABORTED;
0205     }
0206     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0207 }
0208 
0209 SEC("xdp")
0210 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
0211 {
0212     void *data_end = (void *)(long)ctx->data_end;
0213     void *data     = (void *)(long)ctx->data;
0214     u32 key = bpf_get_smp_processor_id();
0215     struct datarec *rec;
0216     u32 cpu_dest = 0;
0217     u32 key0 = 0;
0218 
0219     u32 *cpu_selected;
0220     u32 *cpu_iterator;
0221     u32 *cpu_max;
0222     u32 cpu_idx;
0223 
0224     cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
0225     if (!cpu_max)
0226         return XDP_ABORTED;
0227 
0228     cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
0229     if (!cpu_iterator)
0230         return XDP_ABORTED;
0231     cpu_idx = *cpu_iterator;
0232 
0233     *cpu_iterator += 1;
0234     if (*cpu_iterator == *cpu_max)
0235         *cpu_iterator = 0;
0236 
0237     cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
0238     if (!cpu_selected)
0239         return XDP_ABORTED;
0240     cpu_dest = *cpu_selected;
0241 
0242     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0243     if (!rec)
0244         return XDP_PASS;
0245     NO_TEAR_INC(rec->processed);
0246 
0247     if (cpu_dest >= nr_cpus) {
0248         NO_TEAR_INC(rec->issue);
0249         return XDP_ABORTED;
0250     }
0251     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0252 }
0253 
0254 SEC("xdp")
0255 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
0256 {
0257     void *data_end = (void *)(long)ctx->data_end;
0258     void *data     = (void *)(long)ctx->data;
0259     u32 key = bpf_get_smp_processor_id();
0260     struct ethhdr *eth = data;
0261     u8 ip_proto = IPPROTO_UDP;
0262     struct datarec *rec;
0263     u16 eth_proto = 0;
0264     u64 l3_offset = 0;
0265     u32 cpu_dest = 0;
0266     u32 *cpu_lookup;
0267     u32 cpu_idx = 0;
0268 
0269     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0270     if (!rec)
0271         return XDP_PASS;
0272     NO_TEAR_INC(rec->processed);
0273 
0274     if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
0275         return XDP_PASS; /* Just skip */
0276 
0277     /* Extract L4 protocol */
0278     switch (eth_proto) {
0279     case ETH_P_IP:
0280         ip_proto = get_proto_ipv4(ctx, l3_offset);
0281         break;
0282     case ETH_P_IPV6:
0283         ip_proto = get_proto_ipv6(ctx, l3_offset);
0284         break;
0285     case ETH_P_ARP:
0286         cpu_idx = 0; /* ARP packet handled on separate CPU */
0287         break;
0288     default:
0289         cpu_idx = 0;
0290     }
0291 
0292     /* Choose CPU based on L4 protocol */
0293     switch (ip_proto) {
0294     case IPPROTO_ICMP:
0295     case IPPROTO_ICMPV6:
0296         cpu_idx = 2;
0297         break;
0298     case IPPROTO_TCP:
0299         cpu_idx = 0;
0300         break;
0301     case IPPROTO_UDP:
0302         cpu_idx = 1;
0303         break;
0304     default:
0305         cpu_idx = 0;
0306     }
0307 
0308     cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
0309     if (!cpu_lookup)
0310         return XDP_ABORTED;
0311     cpu_dest = *cpu_lookup;
0312 
0313     if (cpu_dest >= nr_cpus) {
0314         NO_TEAR_INC(rec->issue);
0315         return XDP_ABORTED;
0316     }
0317     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0318 }
0319 
0320 SEC("xdp")
0321 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
0322 {
0323     void *data_end = (void *)(long)ctx->data_end;
0324     void *data     = (void *)(long)ctx->data;
0325     u32 key = bpf_get_smp_processor_id();
0326     struct ethhdr *eth = data;
0327     u8 ip_proto = IPPROTO_UDP;
0328     struct datarec *rec;
0329     u16 eth_proto = 0;
0330     u64 l3_offset = 0;
0331     u32 cpu_dest = 0;
0332     u32 *cpu_lookup;
0333     u32 cpu_idx = 0;
0334     u16 dest_port;
0335 
0336     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0337     if (!rec)
0338         return XDP_PASS;
0339     NO_TEAR_INC(rec->processed);
0340 
0341     if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
0342         return XDP_PASS; /* Just skip */
0343 
0344     /* Extract L4 protocol */
0345     switch (eth_proto) {
0346     case ETH_P_IP:
0347         ip_proto = get_proto_ipv4(ctx, l3_offset);
0348         break;
0349     case ETH_P_IPV6:
0350         ip_proto = get_proto_ipv6(ctx, l3_offset);
0351         break;
0352     case ETH_P_ARP:
0353         cpu_idx = 0; /* ARP packet handled on separate CPU */
0354         break;
0355     default:
0356         cpu_idx = 0;
0357     }
0358 
0359     /* Choose CPU based on L4 protocol */
0360     switch (ip_proto) {
0361     case IPPROTO_ICMP:
0362     case IPPROTO_ICMPV6:
0363         cpu_idx = 2;
0364         break;
0365     case IPPROTO_TCP:
0366         cpu_idx = 0;
0367         break;
0368     case IPPROTO_UDP:
0369         cpu_idx = 1;
0370         /* DDoS filter UDP port 9 (pktgen) */
0371         dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
0372         if (dest_port == 9) {
0373             NO_TEAR_INC(rec->dropped);
0374             return XDP_DROP;
0375         }
0376         break;
0377     default:
0378         cpu_idx = 0;
0379     }
0380 
0381     cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
0382     if (!cpu_lookup)
0383         return XDP_ABORTED;
0384     cpu_dest = *cpu_lookup;
0385 
0386     if (cpu_dest >= nr_cpus) {
0387         NO_TEAR_INC(rec->issue);
0388         return XDP_ABORTED;
0389     }
0390     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0391 }
0392 
0393 /* Hashing initval */
0394 #define INITVAL 15485863
0395 
0396 static __always_inline
0397 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
0398 {
0399     void *data_end = (void *)(long)ctx->data_end;
0400     void *data     = (void *)(long)ctx->data;
0401     struct iphdr *iph = data + nh_off;
0402     u32 cpu_hash;
0403 
0404     if (iph + 1 > data_end)
0405         return 0;
0406 
0407     cpu_hash = iph->saddr + iph->daddr;
0408     cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
0409 
0410     return cpu_hash;
0411 }
0412 
0413 static __always_inline
0414 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
0415 {
0416     void *data_end = (void *)(long)ctx->data_end;
0417     void *data     = (void *)(long)ctx->data;
0418     struct ipv6hdr *ip6h = data + nh_off;
0419     u32 cpu_hash;
0420 
0421     if (ip6h + 1 > data_end)
0422         return 0;
0423 
0424     cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
0425     cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
0426     cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
0427     cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
0428     cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
0429 
0430     return cpu_hash;
0431 }
0432 
0433 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
0434  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
0435  * same CPU.
0436  */
0437 SEC("xdp")
0438 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
0439 {
0440     void *data_end = (void *)(long)ctx->data_end;
0441     void *data     = (void *)(long)ctx->data;
0442     u32 key = bpf_get_smp_processor_id();
0443     struct ethhdr *eth = data;
0444     struct datarec *rec;
0445     u16 eth_proto = 0;
0446     u64 l3_offset = 0;
0447     u32 cpu_dest = 0;
0448     u32 cpu_idx = 0;
0449     u32 *cpu_lookup;
0450     u32 key0 = 0;
0451     u32 *cpu_max;
0452     u32 cpu_hash;
0453 
0454     rec = bpf_map_lookup_elem(&rx_cnt, &key);
0455     if (!rec)
0456         return XDP_PASS;
0457     NO_TEAR_INC(rec->processed);
0458 
0459     cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
0460     if (!cpu_max)
0461         return XDP_ABORTED;
0462 
0463     if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
0464         return XDP_PASS; /* Just skip */
0465 
0466     /* Hash for IPv4 and IPv6 */
0467     switch (eth_proto) {
0468     case ETH_P_IP:
0469         cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
0470         break;
0471     case ETH_P_IPV6:
0472         cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
0473         break;
0474     case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
0475     default:
0476         cpu_hash = 0;
0477     }
0478 
0479     /* Choose CPU based on hash */
0480     cpu_idx = cpu_hash % *cpu_max;
0481 
0482     cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
0483     if (!cpu_lookup)
0484         return XDP_ABORTED;
0485     cpu_dest = *cpu_lookup;
0486 
0487     if (cpu_dest >= nr_cpus) {
0488         NO_TEAR_INC(rec->issue);
0489         return XDP_ABORTED;
0490     }
0491     return bpf_redirect_map(&cpu_map, cpu_dest, 0);
0492 }
0493 
0494 SEC("xdp/cpumap")
0495 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
0496 {
0497     void *data_end = (void *)(long)ctx->data_end;
0498     void *data = (void *)(long)ctx->data;
0499     struct ethhdr *eth = data;
0500     u64 nh_off;
0501 
0502     nh_off = sizeof(*eth);
0503     if (data + nh_off > data_end)
0504         return XDP_DROP;
0505 
0506     swap_src_dst_mac(data);
0507     return bpf_redirect_map(&tx_port, 0, 0);
0508 }
0509 
0510 SEC("xdp/cpumap")
0511 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
0512 {
0513     return XDP_PASS;
0514 }
0515 
0516 SEC("xdp/cpumap")
0517 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
0518 {
0519     return XDP_DROP;
0520 }
0521 
0522 SEC("xdp/devmap")
0523 int xdp_redirect_egress_prog(struct xdp_md *ctx)
0524 {
0525     void *data_end = (void *)(long)ctx->data_end;
0526     void *data = (void *)(long)ctx->data;
0527     struct ethhdr *eth = data;
0528     u64 nh_off;
0529 
0530     nh_off = sizeof(*eth);
0531     if (data + nh_off > data_end)
0532         return XDP_DROP;
0533 
0534     __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
0535 
0536     return XDP_PASS;
0537 }
0538 
0539 char _license[] SEC("license") = "GPL";