Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2019 Facebook */
0003 
0004 #define _GNU_SOURCE
0005 #include <netinet/in.h>
0006 #include <arpa/inet.h>
0007 #include <unistd.h>
0008 #include <sched.h>
0009 #include <stdlib.h>
0010 #include <string.h>
0011 #include <errno.h>
0012 
0013 #include <bpf/bpf.h>
0014 #include <bpf/libbpf.h>
0015 #include <linux/compiler.h>
0016 
0017 #include "network_helpers.h"
0018 #include "cgroup_helpers.h"
0019 #include "test_progs.h"
0020 #include "test_sock_fields.skel.h"
0021 
0022 enum bpf_linum_array_idx {
0023     EGRESS_LINUM_IDX,
0024     INGRESS_LINUM_IDX,
0025     READ_SK_DST_PORT_LINUM_IDX,
0026     __NR_BPF_LINUM_ARRAY_IDX,
0027 };
0028 
0029 struct bpf_spinlock_cnt {
0030     struct bpf_spin_lock lock;
0031     __u32 cnt;
0032 };
0033 
0034 #define PARENT_CGROUP   "/test-bpf-sock-fields"
0035 #define CHILD_CGROUP    "/test-bpf-sock-fields/child"
0036 #define DATA "Hello BPF!"
0037 #define DATA_LEN sizeof(DATA)
0038 
0039 static struct sockaddr_in6 srv_sa6, cli_sa6;
0040 static int sk_pkt_out_cnt10_fd;
0041 static struct test_sock_fields *skel;
0042 static int sk_pkt_out_cnt_fd;
0043 static __u64 parent_cg_id;
0044 static __u64 child_cg_id;
0045 static int linum_map_fd;
0046 static __u32 duration;
0047 
0048 static bool create_netns(void)
0049 {
0050     if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
0051         return false;
0052 
0053     if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
0054         return false;
0055 
0056     return true;
0057 }
0058 
0059 static void print_sk(const struct bpf_sock *sk, const char *prefix)
0060 {
0061     char src_ip4[24], dst_ip4[24];
0062     char src_ip6[64], dst_ip6[64];
0063 
0064     inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
0065     inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
0066     inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
0067     inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
0068 
0069     printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
0070            "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
0071            "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
0072            prefix,
0073            sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
0074            sk->mark, sk->priority,
0075            sk->src_ip4, src_ip4,
0076            sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
0077            src_ip6, sk->src_port,
0078            sk->dst_ip4, dst_ip4,
0079            sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
0080            dst_ip6, ntohs(sk->dst_port));
0081 }
0082 
0083 static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
0084 {
0085     printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
0086            "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
0087            "rate_delivered:%u rate_interval_us:%u packets_out:%u "
0088            "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
0089            "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
0090            "bytes_received:%llu bytes_acked:%llu\n",
0091            prefix,
0092            tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
0093            tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
0094            tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
0095            tp->packets_out, tp->retrans_out, tp->total_retrans,
0096            tp->segs_in, tp->data_segs_in, tp->segs_out,
0097            tp->data_segs_out, tp->lost_out, tp->sacked_out,
0098            tp->bytes_received, tp->bytes_acked);
0099 }
0100 
0101 static void check_result(void)
0102 {
0103     struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
0104     struct bpf_sock srv_sk, cli_sk, listen_sk;
0105     __u32 idx, ingress_linum, egress_linum, linum;
0106     int err;
0107 
0108     idx = EGRESS_LINUM_IDX;
0109     err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
0110     CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
0111           "err:%d errno:%d\n", err, errno);
0112 
0113     idx = INGRESS_LINUM_IDX;
0114     err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
0115     CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
0116           "err:%d errno:%d\n", err, errno);
0117 
0118     idx = READ_SK_DST_PORT_LINUM_IDX;
0119     err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
0120     ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
0121     ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
0122 
0123     memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
0124     memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
0125     memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
0126     memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
0127     memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
0128     memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
0129 
0130     print_sk(&listen_sk, "listen_sk");
0131     print_sk(&srv_sk, "srv_sk");
0132     print_sk(&cli_sk, "cli_sk");
0133     print_tp(&listen_tp, "listen_tp");
0134     print_tp(&srv_tp, "srv_tp");
0135     print_tp(&cli_tp, "cli_tp");
0136 
0137     CHECK(listen_sk.state != 10 ||
0138           listen_sk.family != AF_INET6 ||
0139           listen_sk.protocol != IPPROTO_TCP ||
0140           memcmp(listen_sk.src_ip6, &in6addr_loopback,
0141              sizeof(listen_sk.src_ip6)) ||
0142           listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
0143           listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
0144           listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
0145           listen_sk.dst_port,
0146           "listen_sk",
0147           "Unexpected. Check listen_sk output. ingress_linum:%u\n",
0148           ingress_linum);
0149 
0150     CHECK(srv_sk.state == 10 ||
0151           !srv_sk.state ||
0152           srv_sk.family != AF_INET6 ||
0153           srv_sk.protocol != IPPROTO_TCP ||
0154           memcmp(srv_sk.src_ip6, &in6addr_loopback,
0155              sizeof(srv_sk.src_ip6)) ||
0156           memcmp(srv_sk.dst_ip6, &in6addr_loopback,
0157              sizeof(srv_sk.dst_ip6)) ||
0158           srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
0159           srv_sk.dst_port != cli_sa6.sin6_port,
0160           "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
0161           egress_linum);
0162 
0163     CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
0164 
0165     CHECK(cli_sk.state == 10 ||
0166           !cli_sk.state ||
0167           cli_sk.family != AF_INET6 ||
0168           cli_sk.protocol != IPPROTO_TCP ||
0169           memcmp(cli_sk.src_ip6, &in6addr_loopback,
0170              sizeof(cli_sk.src_ip6)) ||
0171           memcmp(cli_sk.dst_ip6, &in6addr_loopback,
0172              sizeof(cli_sk.dst_ip6)) ||
0173           cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
0174           cli_sk.dst_port != srv_sa6.sin6_port,
0175           "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
0176           egress_linum);
0177 
0178     CHECK(listen_tp.data_segs_out ||
0179           listen_tp.data_segs_in ||
0180           listen_tp.total_retrans ||
0181           listen_tp.bytes_acked,
0182           "listen_tp",
0183           "Unexpected. Check listen_tp output. ingress_linum:%u\n",
0184           ingress_linum);
0185 
0186     CHECK(srv_tp.data_segs_out != 2 ||
0187           srv_tp.data_segs_in ||
0188           srv_tp.snd_cwnd != 10 ||
0189           srv_tp.total_retrans ||
0190           srv_tp.bytes_acked < 2 * DATA_LEN,
0191           "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
0192           egress_linum);
0193 
0194     CHECK(cli_tp.data_segs_out ||
0195           cli_tp.data_segs_in != 2 ||
0196           cli_tp.snd_cwnd != 10 ||
0197           cli_tp.total_retrans ||
0198           cli_tp.bytes_received < 2 * DATA_LEN,
0199           "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
0200           egress_linum);
0201 
0202     CHECK(skel->bss->parent_cg_id != parent_cg_id,
0203           "parent_cg_id", "%zu != %zu\n",
0204           (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
0205 
0206     CHECK(skel->bss->child_cg_id != child_cg_id,
0207           "child_cg_id", "%zu != %zu\n",
0208            (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
0209 }
0210 
0211 static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
0212 {
0213     struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
0214     int err;
0215 
0216     pkt_out_cnt.cnt = ~0;
0217     pkt_out_cnt10.cnt = ~0;
0218     err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
0219     if (!err)
0220         err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
0221                       &pkt_out_cnt10);
0222 
0223     /* The bpf prog only counts for fullsock and
0224      * passive connection did not become fullsock until 3WHS
0225      * had been finished, so the bpf prog only counted two data
0226      * packet out.
0227      */
0228     CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
0229           pkt_out_cnt10.cnt < 0xeB9F + 20,
0230           "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
0231           "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
0232           err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
0233 
0234     pkt_out_cnt.cnt = ~0;
0235     pkt_out_cnt10.cnt = ~0;
0236     err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
0237     if (!err)
0238         err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
0239                       &pkt_out_cnt10);
0240     /* Active connection is fullsock from the beginning.
0241      * 1 SYN and 1 ACK during 3WHS
0242      * 2 Acks on data packet.
0243      *
0244      * The bpf_prog initialized it to 0xeB9F.
0245      */
0246     CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
0247           pkt_out_cnt10.cnt < 0xeB9F + 40,
0248           "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
0249           "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
0250           err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
0251 }
0252 
0253 static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
0254 {
0255     struct bpf_spinlock_cnt scnt = {};
0256     int err;
0257 
0258     scnt.cnt = pkt_out_cnt;
0259     err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
0260                   BPF_NOEXIST);
0261     if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
0262           "err:%d errno:%d\n", err, errno))
0263         return err;
0264 
0265     err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
0266                   BPF_NOEXIST);
0267     if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
0268           "err:%d errno:%d\n", err, errno))
0269         return err;
0270 
0271     return 0;
0272 }
0273 
0274 static void test(void)
0275 {
0276     int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
0277     socklen_t addrlen = sizeof(struct sockaddr_in6);
0278     char buf[DATA_LEN];
0279 
0280     /* Prepare listen_fd */
0281     listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
0282     /* start_server() has logged the error details */
0283     if (CHECK_FAIL(listen_fd == -1))
0284         goto done;
0285 
0286     err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
0287     if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
0288           errno))
0289         goto done;
0290     memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
0291 
0292     cli_fd = connect_to_fd(listen_fd, 0);
0293     if (CHECK_FAIL(cli_fd == -1))
0294         goto done;
0295 
0296     err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
0297     if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
0298           err, errno))
0299         goto done;
0300 
0301     accept_fd = accept(listen_fd, NULL, NULL);
0302     if (CHECK(accept_fd == -1, "accept(listen_fd)",
0303           "accept_fd:%d errno:%d\n",
0304           accept_fd, errno))
0305         goto done;
0306 
0307     if (init_sk_storage(accept_fd, 0xeB9F))
0308         goto done;
0309 
0310     for (i = 0; i < 2; i++) {
0311         /* Send some data from accept_fd to cli_fd.
0312          * MSG_EOR to stop kernel from coalescing two pkts.
0313          */
0314         err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
0315         if (CHECK(err != DATA_LEN, "send(accept_fd)",
0316               "err:%d errno:%d\n", err, errno))
0317             goto done;
0318 
0319         err = recv(cli_fd, buf, DATA_LEN, 0);
0320         if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
0321               err, errno))
0322             goto done;
0323     }
0324 
0325     shutdown(cli_fd, SHUT_WR);
0326     err = recv(accept_fd, buf, 1, 0);
0327     if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
0328           err, errno))
0329         goto done;
0330     shutdown(accept_fd, SHUT_WR);
0331     err = recv(cli_fd, buf, 1, 0);
0332     if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
0333           err, errno))
0334         goto done;
0335     check_sk_pkt_out_cnt(accept_fd, cli_fd);
0336     check_result();
0337 
0338 done:
0339     if (accept_fd != -1)
0340         close(accept_fd);
0341     if (cli_fd != -1)
0342         close(cli_fd);
0343     if (listen_fd != -1)
0344         close(listen_fd);
0345 }
0346 
0347 void serial_test_sock_fields(void)
0348 {
0349     int parent_cg_fd = -1, child_cg_fd = -1;
0350     struct bpf_link *link;
0351 
0352     /* Use a dedicated netns to have a fixed listen port */
0353     if (!create_netns())
0354         return;
0355 
0356     /* Create a cgroup, get fd, and join it */
0357     parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
0358     if (CHECK_FAIL(parent_cg_fd < 0))
0359         return;
0360     parent_cg_id = get_cgroup_id(PARENT_CGROUP);
0361     if (CHECK_FAIL(!parent_cg_id))
0362         goto done;
0363 
0364     child_cg_fd = test__join_cgroup(CHILD_CGROUP);
0365     if (CHECK_FAIL(child_cg_fd < 0))
0366         goto done;
0367     child_cg_id = get_cgroup_id(CHILD_CGROUP);
0368     if (CHECK_FAIL(!child_cg_id))
0369         goto done;
0370 
0371     skel = test_sock_fields__open_and_load();
0372     if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
0373         goto done;
0374 
0375     link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
0376     if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
0377         goto done;
0378     skel->links.egress_read_sock_fields = link;
0379 
0380     link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
0381     if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
0382         goto done;
0383     skel->links.ingress_read_sock_fields = link;
0384 
0385     link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
0386     if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
0387         goto done;
0388     skel->links.read_sk_dst_port = link;
0389 
0390     linum_map_fd = bpf_map__fd(skel->maps.linum_map);
0391     sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
0392     sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
0393 
0394     test();
0395 
0396 done:
0397     test_sock_fields__destroy(skel);
0398     if (child_cg_fd >= 0)
0399         close(child_cg_fd);
0400     if (parent_cg_fd >= 0)
0401         close(parent_cg_fd);
0402 }