Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 // Copyright (c) 2022 Meta
0003 
0004 #include <stddef.h>
0005 #include <stdint.h>
0006 #include <stdbool.h>
0007 #include <linux/bpf.h>
0008 #include <linux/stddef.h>
0009 #include <linux/pkt_cls.h>
0010 #include <linux/if_ether.h>
0011 #include <linux/in.h>
0012 #include <linux/ip.h>
0013 #include <linux/ipv6.h>
0014 #include <linux/tcp.h>
0015 #include <linux/udp.h>
0016 #include <bpf/bpf_helpers.h>
0017 #include <bpf/bpf_endian.h>
0018 #include <sys/socket.h>
0019 
0020 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
0021  *           |                                 |
0022  *  ns_src   |              ns_fwd             |   ns_dst
0023  *
0024  * ns_src and ns_dst: ENDHOST namespace
0025  *            ns_fwd: Fowarding namespace
0026  */
0027 
0028 #define ctx_ptr(field)      (void *)(long)(field)
0029 
0030 #define ip4_src         __bpf_htonl(0xac100164) /* 172.16.1.100 */
0031 #define ip4_dst         __bpf_htonl(0xac100264) /* 172.16.2.100 */
0032 
0033 #define ip6_src         { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0034                   0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
0035 #define ip6_dst         { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0036                   0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
0037 
0038 #define v6_equal(a, b)      (a.s6_addr32[0] == b.s6_addr32[0] && \
0039                  a.s6_addr32[1] == b.s6_addr32[1] && \
0040                  a.s6_addr32[2] == b.s6_addr32[2] && \
0041                  a.s6_addr32[3] == b.s6_addr32[3])
0042 
0043 volatile const __u32 IFINDEX_SRC;
0044 volatile const __u32 IFINDEX_DST;
0045 
0046 #define EGRESS_ENDHOST_MAGIC    0x0b9fbeef
0047 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef
0048 #define EGRESS_FWDNS_MAGIC  0x2b9fbeef
0049 
0050 enum {
0051     INGRESS_FWDNS_P100,
0052     INGRESS_FWDNS_P101,
0053     EGRESS_FWDNS_P100,
0054     EGRESS_FWDNS_P101,
0055     INGRESS_ENDHOST,
0056     EGRESS_ENDHOST,
0057     SET_DTIME,
0058     __MAX_CNT,
0059 };
0060 
0061 enum {
0062     TCP_IP6_CLEAR_DTIME,
0063     TCP_IP4,
0064     TCP_IP6,
0065     UDP_IP4,
0066     UDP_IP6,
0067     TCP_IP4_RT_FWD,
0068     TCP_IP6_RT_FWD,
0069     UDP_IP4_RT_FWD,
0070     UDP_IP6_RT_FWD,
0071     UKN_TEST,
0072     __NR_TESTS,
0073 };
0074 
0075 enum {
0076     SRC_NS = 1,
0077     DST_NS,
0078 };
0079 
0080 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
0081 __u32 errs[__NR_TESTS][__MAX_CNT] = {};
0082 __u32 test = 0;
0083 
0084 static void inc_dtimes(__u32 idx)
0085 {
0086     if (test < __NR_TESTS)
0087         dtimes[test][idx]++;
0088     else
0089         dtimes[UKN_TEST][idx]++;
0090 }
0091 
0092 static void inc_errs(__u32 idx)
0093 {
0094     if (test < __NR_TESTS)
0095         errs[test][idx]++;
0096     else
0097         errs[UKN_TEST][idx]++;
0098 }
0099 
0100 static int skb_proto(int type)
0101 {
0102     return type & 0xff;
0103 }
0104 
0105 static int skb_ns(int type)
0106 {
0107     return (type >> 8) & 0xff;
0108 }
0109 
0110 static bool fwdns_clear_dtime(void)
0111 {
0112     return test == TCP_IP6_CLEAR_DTIME;
0113 }
0114 
0115 static bool bpf_fwd(void)
0116 {
0117     return test < TCP_IP4_RT_FWD;
0118 }
0119 
0120 static __u8 get_proto(void)
0121 {
0122     switch (test) {
0123     case UDP_IP4:
0124     case UDP_IP6:
0125     case UDP_IP4_RT_FWD:
0126     case UDP_IP6_RT_FWD:
0127         return IPPROTO_UDP;
0128     default:
0129         return IPPROTO_TCP;
0130     }
0131 }
0132 
0133 /* -1: parse error: TC_ACT_SHOT
0134  *  0: not testing traffic: TC_ACT_OK
0135  * >0: first byte is the inet_proto, second byte has the netns
0136  *     of the sender
0137  */
0138 static int skb_get_type(struct __sk_buff *skb)
0139 {
0140     __u16 dst_ns_port = __bpf_htons(50000 + test);
0141     void *data_end = ctx_ptr(skb->data_end);
0142     void *data = ctx_ptr(skb->data);
0143     __u8 inet_proto = 0, ns = 0;
0144     struct ipv6hdr *ip6h;
0145     __u16 sport, dport;
0146     struct iphdr *iph;
0147     struct tcphdr *th;
0148     struct udphdr *uh;
0149     void *trans;
0150 
0151     switch (skb->protocol) {
0152     case __bpf_htons(ETH_P_IP):
0153         iph = data + sizeof(struct ethhdr);
0154         if (iph + 1 > data_end)
0155             return -1;
0156         if (iph->saddr == ip4_src)
0157             ns = SRC_NS;
0158         else if (iph->saddr == ip4_dst)
0159             ns = DST_NS;
0160         inet_proto = iph->protocol;
0161         trans = iph + 1;
0162         break;
0163     case __bpf_htons(ETH_P_IPV6):
0164         ip6h = data + sizeof(struct ethhdr);
0165         if (ip6h + 1 > data_end)
0166             return -1;
0167         if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
0168             ns = SRC_NS;
0169         else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
0170             ns = DST_NS;
0171         inet_proto = ip6h->nexthdr;
0172         trans = ip6h + 1;
0173         break;
0174     default:
0175         return 0;
0176     }
0177 
0178     /* skb is not from src_ns or dst_ns.
0179      * skb is not the testing IPPROTO.
0180      */
0181     if (!ns || inet_proto != get_proto())
0182         return 0;
0183 
0184     switch (inet_proto) {
0185     case IPPROTO_TCP:
0186         th = trans;
0187         if (th + 1 > data_end)
0188             return -1;
0189         sport = th->source;
0190         dport = th->dest;
0191         break;
0192     case IPPROTO_UDP:
0193         uh = trans;
0194         if (uh + 1 > data_end)
0195             return -1;
0196         sport = uh->source;
0197         dport = uh->dest;
0198         break;
0199     default:
0200         return 0;
0201     }
0202 
0203     /* The skb is the testing traffic */
0204     if ((ns == SRC_NS && dport == dst_ns_port) ||
0205         (ns == DST_NS && sport == dst_ns_port))
0206         return (ns << 8 | inet_proto);
0207 
0208     return 0;
0209 }
0210 
0211 /* format: direction@iface@netns
0212  * egress@veth_(src|dst)@ns_(src|dst)
0213  */
0214 SEC("tc")
0215 int egress_host(struct __sk_buff *skb)
0216 {
0217     int skb_type;
0218 
0219     skb_type = skb_get_type(skb);
0220     if (skb_type == -1)
0221         return TC_ACT_SHOT;
0222     if (!skb_type)
0223         return TC_ACT_OK;
0224 
0225     if (skb_proto(skb_type) == IPPROTO_TCP) {
0226         if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
0227             skb->tstamp)
0228             inc_dtimes(EGRESS_ENDHOST);
0229         else
0230             inc_errs(EGRESS_ENDHOST);
0231     } else {
0232         if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
0233             skb->tstamp)
0234             inc_dtimes(EGRESS_ENDHOST);
0235         else
0236             inc_errs(EGRESS_ENDHOST);
0237     }
0238 
0239     skb->tstamp = EGRESS_ENDHOST_MAGIC;
0240 
0241     return TC_ACT_OK;
0242 }
0243 
0244 /* ingress@veth_(src|dst)@ns_(src|dst) */
0245 SEC("tc")
0246 int ingress_host(struct __sk_buff *skb)
0247 {
0248     int skb_type;
0249 
0250     skb_type = skb_get_type(skb);
0251     if (skb_type == -1)
0252         return TC_ACT_SHOT;
0253     if (!skb_type)
0254         return TC_ACT_OK;
0255 
0256     if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
0257         skb->tstamp == EGRESS_FWDNS_MAGIC)
0258         inc_dtimes(INGRESS_ENDHOST);
0259     else
0260         inc_errs(INGRESS_ENDHOST);
0261 
0262     return TC_ACT_OK;
0263 }
0264 
0265 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
0266 SEC("tc")
0267 int ingress_fwdns_prio100(struct __sk_buff *skb)
0268 {
0269     int skb_type;
0270 
0271     skb_type = skb_get_type(skb);
0272     if (skb_type == -1)
0273         return TC_ACT_SHOT;
0274     if (!skb_type)
0275         return TC_ACT_OK;
0276 
0277     /* delivery_time is only available to the ingress
0278      * if the tc-bpf checks the skb->tstamp_type.
0279      */
0280     if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
0281         inc_errs(INGRESS_FWDNS_P100);
0282 
0283     if (fwdns_clear_dtime())
0284         skb->tstamp = 0;
0285 
0286     return TC_ACT_UNSPEC;
0287 }
0288 
0289 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
0290 SEC("tc")
0291 int egress_fwdns_prio100(struct __sk_buff *skb)
0292 {
0293     int skb_type;
0294 
0295     skb_type = skb_get_type(skb);
0296     if (skb_type == -1)
0297         return TC_ACT_SHOT;
0298     if (!skb_type)
0299         return TC_ACT_OK;
0300 
0301     /* delivery_time is always available to egress even
0302      * the tc-bpf did not use the tstamp_type.
0303      */
0304     if (skb->tstamp == INGRESS_FWDNS_MAGIC)
0305         inc_dtimes(EGRESS_FWDNS_P100);
0306     else
0307         inc_errs(EGRESS_FWDNS_P100);
0308 
0309     if (fwdns_clear_dtime())
0310         skb->tstamp = 0;
0311 
0312     return TC_ACT_UNSPEC;
0313 }
0314 
0315 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
0316 SEC("tc")
0317 int ingress_fwdns_prio101(struct __sk_buff *skb)
0318 {
0319     __u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
0320     int skb_type;
0321 
0322     skb_type = skb_get_type(skb);
0323     if (skb_type == -1 || !skb_type)
0324         /* Should have handled in prio100 */
0325         return TC_ACT_SHOT;
0326 
0327     if (skb_proto(skb_type) == IPPROTO_UDP)
0328         expected_dtime = 0;
0329 
0330     if (skb->tstamp_type) {
0331         if (fwdns_clear_dtime() ||
0332             skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
0333             skb->tstamp != expected_dtime)
0334             inc_errs(INGRESS_FWDNS_P101);
0335         else
0336             inc_dtimes(INGRESS_FWDNS_P101);
0337     } else {
0338         if (!fwdns_clear_dtime() && expected_dtime)
0339             inc_errs(INGRESS_FWDNS_P101);
0340     }
0341 
0342     if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
0343         skb->tstamp = INGRESS_FWDNS_MAGIC;
0344     } else {
0345         if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
0346                        BPF_SKB_TSTAMP_DELIVERY_MONO))
0347             inc_errs(SET_DTIME);
0348         if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
0349                     BPF_SKB_TSTAMP_UNSPEC))
0350             inc_errs(SET_DTIME);
0351     }
0352 
0353     if (skb_ns(skb_type) == SRC_NS)
0354         return bpf_fwd() ?
0355             bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
0356     else
0357         return bpf_fwd() ?
0358             bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
0359 }
0360 
0361 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
0362 SEC("tc")
0363 int egress_fwdns_prio101(struct __sk_buff *skb)
0364 {
0365     int skb_type;
0366 
0367     skb_type = skb_get_type(skb);
0368     if (skb_type == -1 || !skb_type)
0369         /* Should have handled in prio100 */
0370         return TC_ACT_SHOT;
0371 
0372     if (skb->tstamp_type) {
0373         if (fwdns_clear_dtime() ||
0374             skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
0375             skb->tstamp != INGRESS_FWDNS_MAGIC)
0376             inc_errs(EGRESS_FWDNS_P101);
0377         else
0378             inc_dtimes(EGRESS_FWDNS_P101);
0379     } else {
0380         if (!fwdns_clear_dtime())
0381             inc_errs(EGRESS_FWDNS_P101);
0382     }
0383 
0384     if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
0385         skb->tstamp = EGRESS_FWDNS_MAGIC;
0386     } else {
0387         if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
0388                        BPF_SKB_TSTAMP_DELIVERY_MONO))
0389             inc_errs(SET_DTIME);
0390         if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
0391                     BPF_SKB_TSTAMP_UNSPEC))
0392             inc_errs(SET_DTIME);
0393     }
0394 
0395     return TC_ACT_OK;
0396 }
0397 
0398 char __license[] SEC("license") = "GPL";