0001
0002
0003
0004
0005 #include <stdbool.h>
0006 #include <string.h>
0007
0008 #include <linux/stddef.h>
0009 #include <linux/bpf.h>
0010 #include <linux/if_ether.h>
0011 #include <linux/in.h>
0012 #include <linux/ip.h>
0013 #include <linux/ipv6.h>
0014 #include <linux/mpls.h>
0015 #include <linux/tcp.h>
0016 #include <linux/udp.h>
0017 #include <linux/pkt_cls.h>
0018 #include <linux/types.h>
0019
0020 #include <bpf/bpf_endian.h>
0021 #include <bpf/bpf_helpers.h>
0022
0023 static const int cfg_port = 8000;
0024
0025 static const int cfg_udp_src = 20000;
0026
0027 #define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
0028
0029 #define UDP_PORT 5555
0030 #define MPLS_OVER_UDP_PORT 6635
0031 #define ETH_OVER_UDP_PORT 7777
0032 #define VXLAN_UDP_PORT 8472
0033
0034 #define EXTPROTO_VXLAN 0x1
0035
0036 #define VXLAN_N_VID (1u << 24)
0037 #define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
0038 #define VXLAN_FLAGS 0x8
0039 #define VXLAN_VNI 1
0040
0041
0042 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
0043 MPLS_LS_S_MASK | 0xff);
0044
0045 struct vxlanhdr {
0046 __be32 vx_flags;
0047 __be32 vx_vni;
0048 } __attribute__((packed));
0049
0050 struct gre_hdr {
0051 __be16 flags;
0052 __be16 protocol;
0053 } __attribute__((packed));
0054
0055 union l4hdr {
0056 struct udphdr udp;
0057 struct gre_hdr gre;
0058 };
0059
0060 struct v4hdr {
0061 struct iphdr ip;
0062 union l4hdr l4hdr;
0063 __u8 pad[L2_PAD_SZ];
0064 } __attribute__((packed));
0065
0066 struct v6hdr {
0067 struct ipv6hdr ip;
0068 union l4hdr l4hdr;
0069 __u8 pad[L2_PAD_SZ];
0070 } __attribute__((packed));
0071
0072 static __always_inline void set_ipv4_csum(struct iphdr *iph)
0073 {
0074 __u16 *iph16 = (__u16 *)iph;
0075 __u32 csum;
0076 int i;
0077
0078 iph->check = 0;
0079
0080 #pragma clang loop unroll(full)
0081 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
0082 csum += *iph16++;
0083
0084 iph->check = ~((csum & 0xffff) + (csum >> 16));
0085 }
0086
0087 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
0088 __u16 l2_proto, __u16 ext_proto)
0089 {
0090 __u16 udp_dst = UDP_PORT;
0091 struct iphdr iph_inner;
0092 struct v4hdr h_outer;
0093 struct tcphdr tcph;
0094 int olen, l2_len;
0095 __u8 *l2_hdr = NULL;
0096 int tcp_off;
0097 __u64 flags;
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107 if (encap_proto == IPPROTO_IPV6) {
0108 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
0109 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
0110 struct ipv6hdr iph6_inner;
0111
0112
0113 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
0114 sizeof(iph6_inner)) < 0)
0115 return TC_ACT_OK;
0116
0117
0118 memset(&iph_inner, 0, sizeof(iph_inner));
0119 iph_inner.version = 4;
0120 iph_inner.ihl = 5;
0121 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
0122 bpf_ntohs(iph6_inner.payload_len));
0123 iph_inner.ttl = iph6_inner.hop_limit - 1;
0124 iph_inner.protocol = iph6_inner.nexthdr;
0125 iph_inner.saddr = __bpf_constant_htonl(saddr);
0126 iph_inner.daddr = __bpf_constant_htonl(daddr);
0127
0128 tcp_off = sizeof(iph6_inner);
0129 } else {
0130 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
0131 sizeof(iph_inner)) < 0)
0132 return TC_ACT_OK;
0133
0134 tcp_off = sizeof(iph_inner);
0135 }
0136
0137
0138 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
0139 return TC_ACT_OK;
0140
0141 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
0142 &tcph, sizeof(tcph)) < 0)
0143 return TC_ACT_OK;
0144
0145 if (tcph.dest != __bpf_constant_htons(cfg_port))
0146 return TC_ACT_OK;
0147
0148 olen = sizeof(h_outer.ip);
0149 l2_len = 0;
0150
0151 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
0152
0153 switch (l2_proto) {
0154 case ETH_P_MPLS_UC:
0155 l2_len = sizeof(mpls_label);
0156 udp_dst = MPLS_OVER_UDP_PORT;
0157 break;
0158 case ETH_P_TEB:
0159 l2_len = ETH_HLEN;
0160 if (ext_proto & EXTPROTO_VXLAN) {
0161 udp_dst = VXLAN_UDP_PORT;
0162 l2_len += sizeof(struct vxlanhdr);
0163 } else
0164 udp_dst = ETH_OVER_UDP_PORT;
0165 break;
0166 }
0167 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
0168
0169 switch (encap_proto) {
0170 case IPPROTO_GRE:
0171 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
0172 olen += sizeof(h_outer.l4hdr.gre);
0173 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
0174 h_outer.l4hdr.gre.flags = 0;
0175 break;
0176 case IPPROTO_UDP:
0177 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
0178 olen += sizeof(h_outer.l4hdr.udp);
0179 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
0180 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
0181 h_outer.l4hdr.udp.check = 0;
0182 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
0183 sizeof(h_outer.l4hdr.udp) +
0184 l2_len);
0185 break;
0186 case IPPROTO_IPIP:
0187 case IPPROTO_IPV6:
0188 break;
0189 default:
0190 return TC_ACT_OK;
0191 }
0192
0193
0194 l2_hdr = (__u8 *)&h_outer + olen;
0195 switch (l2_proto) {
0196 case ETH_P_MPLS_UC:
0197 *(__u32 *)l2_hdr = mpls_label;
0198 break;
0199 case ETH_P_TEB:
0200 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
0201
0202 if (ext_proto & EXTPROTO_VXLAN) {
0203 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
0204
0205 vxlan_hdr->vx_flags = VXLAN_FLAGS;
0206 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
0207
0208 l2_hdr += sizeof(struct vxlanhdr);
0209 }
0210
0211 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
0212 return TC_ACT_SHOT;
0213
0214 break;
0215 }
0216 olen += l2_len;
0217
0218
0219 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
0220 return TC_ACT_SHOT;
0221
0222
0223 h_outer.ip = iph_inner;
0224 h_outer.ip.tot_len = bpf_htons(olen +
0225 bpf_ntohs(h_outer.ip.tot_len));
0226 h_outer.ip.protocol = encap_proto;
0227
0228 set_ipv4_csum((void *)&h_outer.ip);
0229
0230
0231 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
0232 BPF_F_INVALIDATE_HASH) < 0)
0233 return TC_ACT_SHOT;
0234
0235
0236 if (encap_proto == IPPROTO_IPV6) {
0237 struct ethhdr eth;
0238
0239 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
0240 return TC_ACT_SHOT;
0241 eth.h_proto = bpf_htons(ETH_P_IP);
0242 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
0243 return TC_ACT_SHOT;
0244 }
0245
0246 return TC_ACT_OK;
0247 }
0248
0249 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
0250 __u16 l2_proto)
0251 {
0252 return __encap_ipv4(skb, encap_proto, l2_proto, 0);
0253 }
0254
0255 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
0256 __u16 l2_proto, __u16 ext_proto)
0257 {
0258 __u16 udp_dst = UDP_PORT;
0259 struct ipv6hdr iph_inner;
0260 struct v6hdr h_outer;
0261 struct tcphdr tcph;
0262 int olen, l2_len;
0263 __u8 *l2_hdr = NULL;
0264 __u16 tot_len;
0265 __u64 flags;
0266
0267 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
0268 sizeof(iph_inner)) < 0)
0269 return TC_ACT_OK;
0270
0271
0272 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
0273 &tcph, sizeof(tcph)) < 0)
0274 return TC_ACT_OK;
0275
0276 if (tcph.dest != __bpf_constant_htons(cfg_port))
0277 return TC_ACT_OK;
0278
0279 olen = sizeof(h_outer.ip);
0280 l2_len = 0;
0281
0282 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
0283
0284 switch (l2_proto) {
0285 case ETH_P_MPLS_UC:
0286 l2_len = sizeof(mpls_label);
0287 udp_dst = MPLS_OVER_UDP_PORT;
0288 break;
0289 case ETH_P_TEB:
0290 l2_len = ETH_HLEN;
0291 if (ext_proto & EXTPROTO_VXLAN) {
0292 udp_dst = VXLAN_UDP_PORT;
0293 l2_len += sizeof(struct vxlanhdr);
0294 } else
0295 udp_dst = ETH_OVER_UDP_PORT;
0296 break;
0297 }
0298 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
0299
0300 switch (encap_proto) {
0301 case IPPROTO_GRE:
0302 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
0303 olen += sizeof(h_outer.l4hdr.gre);
0304 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
0305 h_outer.l4hdr.gre.flags = 0;
0306 break;
0307 case IPPROTO_UDP:
0308 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
0309 olen += sizeof(h_outer.l4hdr.udp);
0310 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
0311 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
0312 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
0313 sizeof(h_outer.l4hdr.udp) + l2_len;
0314 h_outer.l4hdr.udp.check = 0;
0315 h_outer.l4hdr.udp.len = bpf_htons(tot_len);
0316 break;
0317 case IPPROTO_IPV6:
0318 break;
0319 default:
0320 return TC_ACT_OK;
0321 }
0322
0323
0324 l2_hdr = (__u8 *)&h_outer + olen;
0325 switch (l2_proto) {
0326 case ETH_P_MPLS_UC:
0327 *(__u32 *)l2_hdr = mpls_label;
0328 break;
0329 case ETH_P_TEB:
0330 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
0331
0332 if (ext_proto & EXTPROTO_VXLAN) {
0333 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
0334
0335 vxlan_hdr->vx_flags = VXLAN_FLAGS;
0336 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
0337
0338 l2_hdr += sizeof(struct vxlanhdr);
0339 }
0340
0341 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
0342 return TC_ACT_SHOT;
0343 break;
0344 }
0345 olen += l2_len;
0346
0347
0348 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
0349 return TC_ACT_SHOT;
0350
0351
0352 h_outer.ip = iph_inner;
0353 h_outer.ip.payload_len = bpf_htons(olen +
0354 bpf_ntohs(h_outer.ip.payload_len));
0355
0356 h_outer.ip.nexthdr = encap_proto;
0357
0358
0359 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
0360 BPF_F_INVALIDATE_HASH) < 0)
0361 return TC_ACT_SHOT;
0362
0363 return TC_ACT_OK;
0364 }
0365
0366 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
0367 __u16 l2_proto)
0368 {
0369 return __encap_ipv6(skb, encap_proto, l2_proto, 0);
0370 }
0371
0372 SEC("encap_ipip_none")
0373 int __encap_ipip_none(struct __sk_buff *skb)
0374 {
0375 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0376 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
0377 else
0378 return TC_ACT_OK;
0379 }
0380
0381 SEC("encap_gre_none")
0382 int __encap_gre_none(struct __sk_buff *skb)
0383 {
0384 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0385 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
0386 else
0387 return TC_ACT_OK;
0388 }
0389
0390 SEC("encap_gre_mpls")
0391 int __encap_gre_mpls(struct __sk_buff *skb)
0392 {
0393 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0394 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
0395 else
0396 return TC_ACT_OK;
0397 }
0398
0399 SEC("encap_gre_eth")
0400 int __encap_gre_eth(struct __sk_buff *skb)
0401 {
0402 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0403 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
0404 else
0405 return TC_ACT_OK;
0406 }
0407
0408 SEC("encap_udp_none")
0409 int __encap_udp_none(struct __sk_buff *skb)
0410 {
0411 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0412 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
0413 else
0414 return TC_ACT_OK;
0415 }
0416
0417 SEC("encap_udp_mpls")
0418 int __encap_udp_mpls(struct __sk_buff *skb)
0419 {
0420 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0421 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
0422 else
0423 return TC_ACT_OK;
0424 }
0425
0426 SEC("encap_udp_eth")
0427 int __encap_udp_eth(struct __sk_buff *skb)
0428 {
0429 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0430 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
0431 else
0432 return TC_ACT_OK;
0433 }
0434
0435 SEC("encap_vxlan_eth")
0436 int __encap_vxlan_eth(struct __sk_buff *skb)
0437 {
0438 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
0439 return __encap_ipv4(skb, IPPROTO_UDP,
0440 ETH_P_TEB,
0441 EXTPROTO_VXLAN);
0442 else
0443 return TC_ACT_OK;
0444 }
0445
0446 SEC("encap_sit_none")
0447 int __encap_sit_none(struct __sk_buff *skb)
0448 {
0449 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0450 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
0451 else
0452 return TC_ACT_OK;
0453 }
0454
0455 SEC("encap_ip6tnl_none")
0456 int __encap_ip6tnl_none(struct __sk_buff *skb)
0457 {
0458 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0459 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
0460 else
0461 return TC_ACT_OK;
0462 }
0463
0464 SEC("encap_ip6gre_none")
0465 int __encap_ip6gre_none(struct __sk_buff *skb)
0466 {
0467 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0468 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
0469 else
0470 return TC_ACT_OK;
0471 }
0472
0473 SEC("encap_ip6gre_mpls")
0474 int __encap_ip6gre_mpls(struct __sk_buff *skb)
0475 {
0476 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0477 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
0478 else
0479 return TC_ACT_OK;
0480 }
0481
0482 SEC("encap_ip6gre_eth")
0483 int __encap_ip6gre_eth(struct __sk_buff *skb)
0484 {
0485 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0486 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
0487 else
0488 return TC_ACT_OK;
0489 }
0490
0491 SEC("encap_ip6udp_none")
0492 int __encap_ip6udp_none(struct __sk_buff *skb)
0493 {
0494 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0495 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
0496 else
0497 return TC_ACT_OK;
0498 }
0499
0500 SEC("encap_ip6udp_mpls")
0501 int __encap_ip6udp_mpls(struct __sk_buff *skb)
0502 {
0503 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0504 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
0505 else
0506 return TC_ACT_OK;
0507 }
0508
0509 SEC("encap_ip6udp_eth")
0510 int __encap_ip6udp_eth(struct __sk_buff *skb)
0511 {
0512 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0513 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
0514 else
0515 return TC_ACT_OK;
0516 }
0517
0518 SEC("encap_ip6vxlan_eth")
0519 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
0520 {
0521 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
0522 return __encap_ipv6(skb, IPPROTO_UDP,
0523 ETH_P_TEB,
0524 EXTPROTO_VXLAN);
0525 else
0526 return TC_ACT_OK;
0527 }
0528
0529 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
0530 {
0531 struct gre_hdr greh;
0532 struct udphdr udph;
0533 int olen = len;
0534
0535 switch (proto) {
0536 case IPPROTO_IPIP:
0537 case IPPROTO_IPV6:
0538 break;
0539 case IPPROTO_GRE:
0540 olen += sizeof(struct gre_hdr);
0541 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
0542 return TC_ACT_OK;
0543 switch (bpf_ntohs(greh.protocol)) {
0544 case ETH_P_MPLS_UC:
0545 olen += sizeof(mpls_label);
0546 break;
0547 case ETH_P_TEB:
0548 olen += ETH_HLEN;
0549 break;
0550 }
0551 break;
0552 case IPPROTO_UDP:
0553 olen += sizeof(struct udphdr);
0554 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
0555 return TC_ACT_OK;
0556 switch (bpf_ntohs(udph.dest)) {
0557 case MPLS_OVER_UDP_PORT:
0558 olen += sizeof(mpls_label);
0559 break;
0560 case ETH_OVER_UDP_PORT:
0561 olen += ETH_HLEN;
0562 break;
0563 case VXLAN_UDP_PORT:
0564 olen += ETH_HLEN + sizeof(struct vxlanhdr);
0565 break;
0566 }
0567 break;
0568 default:
0569 return TC_ACT_OK;
0570 }
0571
0572 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
0573 BPF_F_ADJ_ROOM_FIXED_GSO))
0574 return TC_ACT_SHOT;
0575
0576 return TC_ACT_OK;
0577 }
0578
0579 static int decap_ipv4(struct __sk_buff *skb)
0580 {
0581 struct iphdr iph_outer;
0582
0583 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
0584 sizeof(iph_outer)) < 0)
0585 return TC_ACT_OK;
0586
0587 if (iph_outer.ihl != 5)
0588 return TC_ACT_OK;
0589
0590 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
0591 iph_outer.protocol);
0592 }
0593
0594 static int decap_ipv6(struct __sk_buff *skb)
0595 {
0596 struct ipv6hdr iph_outer;
0597
0598 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
0599 sizeof(iph_outer)) < 0)
0600 return TC_ACT_OK;
0601
0602 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
0603 iph_outer.nexthdr);
0604 }
0605
0606 SEC("decap")
0607 int decap_f(struct __sk_buff *skb)
0608 {
0609 switch (skb->protocol) {
0610 case __bpf_constant_htons(ETH_P_IP):
0611 return decap_ipv4(skb);
0612 case __bpf_constant_htons(ETH_P_IPV6):
0613 return decap_ipv6(skb);
0614 default:
0615
0616 return TC_ACT_OK;
0617 }
0618 }
0619
0620 char __license[] SEC("license") = "GPL";