Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2019 Facebook */
0003 
0004 #include <linux/bpf.h>
0005 #include <netinet/in.h>
0006 #include <stdbool.h>
0007 
0008 #include <bpf/bpf_helpers.h>
0009 #include <bpf/bpf_endian.h>
0010 #include "bpf_tcp_helpers.h"
0011 
0012 enum bpf_linum_array_idx {
0013     EGRESS_LINUM_IDX,
0014     INGRESS_LINUM_IDX,
0015     READ_SK_DST_PORT_LINUM_IDX,
0016     __NR_BPF_LINUM_ARRAY_IDX,
0017 };
0018 
0019 struct {
0020     __uint(type, BPF_MAP_TYPE_ARRAY);
0021     __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
0022     __type(key, __u32);
0023     __type(value, __u32);
0024 } linum_map SEC(".maps");
0025 
0026 struct bpf_spinlock_cnt {
0027     struct bpf_spin_lock lock;
0028     __u32 cnt;
0029 };
0030 
0031 struct {
0032     __uint(type, BPF_MAP_TYPE_SK_STORAGE);
0033     __uint(map_flags, BPF_F_NO_PREALLOC);
0034     __type(key, int);
0035     __type(value, struct bpf_spinlock_cnt);
0036 } sk_pkt_out_cnt SEC(".maps");
0037 
0038 struct {
0039     __uint(type, BPF_MAP_TYPE_SK_STORAGE);
0040     __uint(map_flags, BPF_F_NO_PREALLOC);
0041     __type(key, int);
0042     __type(value, struct bpf_spinlock_cnt);
0043 } sk_pkt_out_cnt10 SEC(".maps");
0044 
0045 struct bpf_tcp_sock listen_tp = {};
0046 struct sockaddr_in6 srv_sa6 = {};
0047 struct bpf_tcp_sock cli_tp = {};
0048 struct bpf_tcp_sock srv_tp = {};
0049 struct bpf_sock listen_sk = {};
0050 struct bpf_sock srv_sk = {};
0051 struct bpf_sock cli_sk = {};
0052 __u64 parent_cg_id = 0;
0053 __u64 child_cg_id = 0;
0054 __u64 lsndtime = 0;
0055 
0056 static bool is_loopback6(__u32 *a6)
0057 {
0058     return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
0059 }
0060 
0061 static void skcpy(struct bpf_sock *dst,
0062           const struct bpf_sock *src)
0063 {
0064     dst->bound_dev_if = src->bound_dev_if;
0065     dst->family = src->family;
0066     dst->type = src->type;
0067     dst->protocol = src->protocol;
0068     dst->mark = src->mark;
0069     dst->priority = src->priority;
0070     dst->src_ip4 = src->src_ip4;
0071     dst->src_ip6[0] = src->src_ip6[0];
0072     dst->src_ip6[1] = src->src_ip6[1];
0073     dst->src_ip6[2] = src->src_ip6[2];
0074     dst->src_ip6[3] = src->src_ip6[3];
0075     dst->src_port = src->src_port;
0076     dst->dst_ip4 = src->dst_ip4;
0077     dst->dst_ip6[0] = src->dst_ip6[0];
0078     dst->dst_ip6[1] = src->dst_ip6[1];
0079     dst->dst_ip6[2] = src->dst_ip6[2];
0080     dst->dst_ip6[3] = src->dst_ip6[3];
0081     dst->dst_port = src->dst_port;
0082     dst->state = src->state;
0083 }
0084 
0085 static void tpcpy(struct bpf_tcp_sock *dst,
0086           const struct bpf_tcp_sock *src)
0087 {
0088     dst->snd_cwnd = src->snd_cwnd;
0089     dst->srtt_us = src->srtt_us;
0090     dst->rtt_min = src->rtt_min;
0091     dst->snd_ssthresh = src->snd_ssthresh;
0092     dst->rcv_nxt = src->rcv_nxt;
0093     dst->snd_nxt = src->snd_nxt;
0094     dst->snd_una = src->snd_una;
0095     dst->mss_cache = src->mss_cache;
0096     dst->ecn_flags = src->ecn_flags;
0097     dst->rate_delivered = src->rate_delivered;
0098     dst->rate_interval_us = src->rate_interval_us;
0099     dst->packets_out = src->packets_out;
0100     dst->retrans_out = src->retrans_out;
0101     dst->total_retrans = src->total_retrans;
0102     dst->segs_in = src->segs_in;
0103     dst->data_segs_in = src->data_segs_in;
0104     dst->segs_out = src->segs_out;
0105     dst->data_segs_out = src->data_segs_out;
0106     dst->lost_out = src->lost_out;
0107     dst->sacked_out = src->sacked_out;
0108     dst->bytes_received = src->bytes_received;
0109     dst->bytes_acked = src->bytes_acked;
0110 }
0111 
0112 /* Always return CG_OK so that no pkt will be filtered out */
0113 #define CG_OK 1
0114 
0115 #define RET_LOG() ({                        \
0116     linum = __LINE__;                   \
0117     bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY);   \
0118     return CG_OK;                       \
0119 })
0120 
0121 SEC("cgroup_skb/egress")
0122 int egress_read_sock_fields(struct __sk_buff *skb)
0123 {
0124     struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
0125     struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
0126     struct bpf_tcp_sock *tp, *tp_ret;
0127     struct bpf_sock *sk, *sk_ret;
0128     __u32 linum, linum_idx;
0129     struct tcp_sock *ktp;
0130 
0131     linum_idx = EGRESS_LINUM_IDX;
0132 
0133     sk = skb->sk;
0134     if (!sk)
0135         RET_LOG();
0136 
0137     /* Not testing the egress traffic or the listening socket,
0138      * which are covered by the cgroup_skb/ingress test program.
0139      */
0140     if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
0141         sk->state == BPF_TCP_LISTEN)
0142         return CG_OK;
0143 
0144     if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
0145         /* Server socket */
0146         sk_ret = &srv_sk;
0147         tp_ret = &srv_tp;
0148     } else if (sk->dst_port == srv_sa6.sin6_port) {
0149         /* Client socket */
0150         sk_ret = &cli_sk;
0151         tp_ret = &cli_tp;
0152     } else {
0153         /* Not the testing egress traffic */
0154         return CG_OK;
0155     }
0156 
0157     /* It must be a fullsock for cgroup_skb/egress prog */
0158     sk = bpf_sk_fullsock(sk);
0159     if (!sk)
0160         RET_LOG();
0161 
0162     /* Not the testing egress traffic */
0163     if (sk->protocol != IPPROTO_TCP)
0164         return CG_OK;
0165 
0166     tp = bpf_tcp_sock(sk);
0167     if (!tp)
0168         RET_LOG();
0169 
0170     skcpy(sk_ret, sk);
0171     tpcpy(tp_ret, tp);
0172 
0173     if (sk_ret == &srv_sk) {
0174         ktp = bpf_skc_to_tcp_sock(sk);
0175 
0176         if (!ktp)
0177             RET_LOG();
0178 
0179         lsndtime = ktp->lsndtime;
0180 
0181         child_cg_id = bpf_sk_cgroup_id(ktp);
0182         if (!child_cg_id)
0183             RET_LOG();
0184 
0185         parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
0186         if (!parent_cg_id)
0187             RET_LOG();
0188 
0189         /* The userspace has created it for srv sk */
0190         pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
0191         pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
0192                            0, 0);
0193     } else {
0194         pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
0195                          &cli_cnt_init,
0196                          BPF_SK_STORAGE_GET_F_CREATE);
0197         pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
0198                            sk, &cli_cnt_init,
0199                            BPF_SK_STORAGE_GET_F_CREATE);
0200     }
0201 
0202     if (!pkt_out_cnt || !pkt_out_cnt10)
0203         RET_LOG();
0204 
0205     /* Even both cnt and cnt10 have lock defined in their BTF,
0206      * intentionally one cnt takes lock while one does not
0207      * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
0208      */
0209     pkt_out_cnt->cnt += 1;
0210     bpf_spin_lock(&pkt_out_cnt10->lock);
0211     pkt_out_cnt10->cnt += 10;
0212     bpf_spin_unlock(&pkt_out_cnt10->lock);
0213 
0214     return CG_OK;
0215 }
0216 
0217 SEC("cgroup_skb/ingress")
0218 int ingress_read_sock_fields(struct __sk_buff *skb)
0219 {
0220     struct bpf_tcp_sock *tp;
0221     __u32 linum, linum_idx;
0222     struct bpf_sock *sk;
0223 
0224     linum_idx = INGRESS_LINUM_IDX;
0225 
0226     sk = skb->sk;
0227     if (!sk)
0228         RET_LOG();
0229 
0230     /* Not the testing ingress traffic to the server */
0231     if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
0232         sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
0233         return CG_OK;
0234 
0235     /* Only interested in the listening socket */
0236     if (sk->state != BPF_TCP_LISTEN)
0237         return CG_OK;
0238 
0239     /* It must be a fullsock for cgroup_skb/ingress prog */
0240     sk = bpf_sk_fullsock(sk);
0241     if (!sk)
0242         RET_LOG();
0243 
0244     tp = bpf_tcp_sock(sk);
0245     if (!tp)
0246         RET_LOG();
0247 
0248     skcpy(&listen_sk, sk);
0249     tpcpy(&listen_tp, tp);
0250 
0251     return CG_OK;
0252 }
0253 
0254 /*
0255  * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
0256  * gets rewritten by the access converter to a 2-byte load for
0257  * backward compatibility. Treating the load result as a be16 value
0258  * makes the code portable across little- and big-endian platforms.
0259  */
0260 static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
0261 {
0262     __u32 *word = (__u32 *)&sk->dst_port;
0263     return word[0] == bpf_htons(0xcafe);
0264 }
0265 
0266 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
0267 {
0268     __u16 *half = (__u16 *)&sk->dst_port;
0269     return half[0] == bpf_htons(0xcafe);
0270 }
0271 
0272 static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
0273 {
0274     __u8 *byte = (__u8 *)&sk->dst_port;
0275     return byte[0] == 0xca && byte[1] == 0xfe;
0276 }
0277 
0278 SEC("cgroup_skb/egress")
0279 int read_sk_dst_port(struct __sk_buff *skb)
0280 {
0281     __u32 linum, linum_idx;
0282     struct bpf_sock *sk;
0283 
0284     linum_idx = READ_SK_DST_PORT_LINUM_IDX;
0285 
0286     sk = skb->sk;
0287     if (!sk)
0288         RET_LOG();
0289 
0290     /* Ignore everything but the SYN from the client socket */
0291     if (sk->state != BPF_TCP_SYN_SENT)
0292         return CG_OK;
0293 
0294     if (!sk_dst_port__load_word(sk))
0295         RET_LOG();
0296     if (!sk_dst_port__load_half(sk))
0297         RET_LOG();
0298     if (!sk_dst_port__load_byte(sk))
0299         RET_LOG();
0300 
0301     return CG_OK;
0302 }
0303 
0304 char _license[] SEC("license") = "GPL";