Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Check if we can migrate child sockets.
0004  *
0005  *   1. call listen() for 4 server sockets.
0006  *   2. call connect() for 25 client sockets.
0007  *   3. call listen() for 1 server socket. (migration target)
0008  *   4. update a map to migrate all child sockets
0009  *        to the last server socket (migrate_map[cookie] = 4)
0010  *   5. call shutdown() for first 4 server sockets
0011  *        and migrate the requests in the accept queue
0012  *        to the last server socket.
0013  *   6. call listen() for the second server socket.
0014  *   7. call shutdown() for the last server
0015  *        and migrate the requests in the accept queue
0016  *        to the second server socket.
0017  *   8. call listen() for the last server.
0018  *   9. call shutdown() for the second server
0019  *        and migrate the requests in the accept queue
0020  *        to the last server socket.
0021  *  10. call accept() for the last server socket.
0022  *
0023  * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
0024  */
0025 
0026 #include <bpf/bpf.h>
0027 #include <bpf/libbpf.h>
0028 
0029 #include "test_progs.h"
0030 #include "test_migrate_reuseport.skel.h"
0031 #include "network_helpers.h"
0032 
0033 #ifndef TCP_FASTOPEN_CONNECT
0034 #define TCP_FASTOPEN_CONNECT 30
0035 #endif
0036 
0037 #define IFINDEX_LO 1
0038 
0039 #define NR_SERVERS 5
0040 #define NR_CLIENTS (NR_SERVERS * 5)
0041 #define MIGRATED_TO (NR_SERVERS - 1)
0042 
0043 /* fastopenq->max_qlen and sk->sk_max_ack_backlog */
0044 #define QLEN (NR_CLIENTS * 5)
0045 
0046 #define MSG "Hello World\0"
0047 #define MSGLEN 12
0048 
0049 static struct migrate_reuseport_test_case {
0050     const char *name;
0051     __s64 servers[NR_SERVERS];
0052     __s64 clients[NR_CLIENTS];
0053     struct sockaddr_storage addr;
0054     socklen_t addrlen;
0055     int family;
0056     int state;
0057     bool drop_ack;
0058     bool expire_synack_timer;
0059     bool fastopen;
0060     struct bpf_link *link;
0061 } test_cases[] = {
0062     {
0063         .name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
0064         .family = AF_INET,
0065         .state = BPF_TCP_ESTABLISHED,
0066         .drop_ack = false,
0067         .expire_synack_timer = false,
0068         .fastopen = false,
0069     },
0070     {
0071         .name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
0072         .family = AF_INET,
0073         .state = BPF_TCP_SYN_RECV,
0074         .drop_ack = true,
0075         .expire_synack_timer = false,
0076         .fastopen = true,
0077     },
0078     {
0079         .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
0080         .family = AF_INET,
0081         .state = BPF_TCP_NEW_SYN_RECV,
0082         .drop_ack = true,
0083         .expire_synack_timer = true,
0084         .fastopen = false,
0085     },
0086     {
0087         .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
0088         .family = AF_INET,
0089         .state = BPF_TCP_NEW_SYN_RECV,
0090         .drop_ack = true,
0091         .expire_synack_timer = false,
0092         .fastopen = false,
0093     },
0094     {
0095         .name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
0096         .family = AF_INET6,
0097         .state = BPF_TCP_ESTABLISHED,
0098         .drop_ack = false,
0099         .expire_synack_timer = false,
0100         .fastopen = false,
0101     },
0102     {
0103         .name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
0104         .family = AF_INET6,
0105         .state = BPF_TCP_SYN_RECV,
0106         .drop_ack = true,
0107         .expire_synack_timer = false,
0108         .fastopen = true,
0109     },
0110     {
0111         .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
0112         .family = AF_INET6,
0113         .state = BPF_TCP_NEW_SYN_RECV,
0114         .drop_ack = true,
0115         .expire_synack_timer = true,
0116         .fastopen = false,
0117     },
0118     {
0119         .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
0120         .family = AF_INET6,
0121         .state = BPF_TCP_NEW_SYN_RECV,
0122         .drop_ack = true,
0123         .expire_synack_timer = false,
0124         .fastopen = false,
0125     }
0126 };
0127 
0128 static void init_fds(__s64 fds[], int len)
0129 {
0130     int i;
0131 
0132     for (i = 0; i < len; i++)
0133         fds[i] = -1;
0134 }
0135 
0136 static void close_fds(__s64 fds[], int len)
0137 {
0138     int i;
0139 
0140     for (i = 0; i < len; i++) {
0141         if (fds[i] != -1) {
0142             close(fds[i]);
0143             fds[i] = -1;
0144         }
0145     }
0146 }
0147 
0148 static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
0149 {
0150     int err = 0, fd, len;
0151 
0152     fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
0153     if (!ASSERT_NEQ(fd, -1, "open"))
0154         return -1;
0155 
0156     if (restore) {
0157         len = write(fd, buf, *saved_len);
0158         if (!ASSERT_EQ(len, *saved_len, "write - restore"))
0159             err = -1;
0160     } else {
0161         *saved_len = read(fd, buf, size);
0162         if (!ASSERT_GE(*saved_len, 1, "read")) {
0163             err = -1;
0164             goto close;
0165         }
0166 
0167         err = lseek(fd, 0, SEEK_SET);
0168         if (!ASSERT_OK(err, "lseek"))
0169             goto close;
0170 
0171         /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
0172          *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
0173          */
0174         len = write(fd, "519", 3);
0175         if (!ASSERT_EQ(len, 3, "write - setup"))
0176             err = -1;
0177     }
0178 
0179 close:
0180     close(fd);
0181 
0182     return err;
0183 }
0184 
0185 static int drop_ack(struct migrate_reuseport_test_case *test_case,
0186             struct test_migrate_reuseport *skel)
0187 {
0188     if (test_case->family == AF_INET)
0189         skel->bss->server_port = ((struct sockaddr_in *)
0190                       &test_case->addr)->sin_port;
0191     else
0192         skel->bss->server_port = ((struct sockaddr_in6 *)
0193                       &test_case->addr)->sin6_port;
0194 
0195     test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
0196                           IFINDEX_LO);
0197     if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
0198         return -1;
0199 
0200     return 0;
0201 }
0202 
0203 static int pass_ack(struct migrate_reuseport_test_case *test_case)
0204 {
0205     int err;
0206 
0207     err = bpf_link__destroy(test_case->link);
0208     if (!ASSERT_OK(err, "bpf_link__destroy"))
0209         return -1;
0210 
0211     test_case->link = NULL;
0212 
0213     return 0;
0214 }
0215 
0216 static int start_servers(struct migrate_reuseport_test_case *test_case,
0217              struct test_migrate_reuseport *skel)
0218 {
0219     int i, err, prog_fd, reuseport = 1, qlen = QLEN;
0220 
0221     prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
0222 
0223     make_sockaddr(test_case->family,
0224               test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
0225               &test_case->addr, &test_case->addrlen);
0226 
0227     for (i = 0; i < NR_SERVERS; i++) {
0228         test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
0229                            IPPROTO_TCP);
0230         if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
0231             return -1;
0232 
0233         err = setsockopt(test_case->servers[i], SOL_SOCKET,
0234                  SO_REUSEPORT, &reuseport, sizeof(reuseport));
0235         if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
0236             return -1;
0237 
0238         err = bind(test_case->servers[i],
0239                (struct sockaddr *)&test_case->addr,
0240                test_case->addrlen);
0241         if (!ASSERT_OK(err, "bind"))
0242             return -1;
0243 
0244         if (i == 0) {
0245             err = setsockopt(test_case->servers[i], SOL_SOCKET,
0246                      SO_ATTACH_REUSEPORT_EBPF,
0247                      &prog_fd, sizeof(prog_fd));
0248             if (!ASSERT_OK(err,
0249                        "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
0250                 return -1;
0251 
0252             err = getsockname(test_case->servers[i],
0253                       (struct sockaddr *)&test_case->addr,
0254                       &test_case->addrlen);
0255             if (!ASSERT_OK(err, "getsockname"))
0256                 return -1;
0257         }
0258 
0259         if (test_case->fastopen) {
0260             err = setsockopt(test_case->servers[i],
0261                      SOL_TCP, TCP_FASTOPEN,
0262                      &qlen, sizeof(qlen));
0263             if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
0264                 return -1;
0265         }
0266 
0267         /* All requests will be tied to the first four listeners */
0268         if (i != MIGRATED_TO) {
0269             err = listen(test_case->servers[i], qlen);
0270             if (!ASSERT_OK(err, "listen"))
0271                 return -1;
0272         }
0273     }
0274 
0275     return 0;
0276 }
0277 
0278 static int start_clients(struct migrate_reuseport_test_case *test_case)
0279 {
0280     char buf[MSGLEN] = MSG;
0281     int i, err;
0282 
0283     for (i = 0; i < NR_CLIENTS; i++) {
0284         test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
0285                            IPPROTO_TCP);
0286         if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
0287             return -1;
0288 
0289         /* The attached XDP program drops only the final ACK, so
0290          * clients will transition to TCP_ESTABLISHED immediately.
0291          */
0292         err = settimeo(test_case->clients[i], 100);
0293         if (!ASSERT_OK(err, "settimeo"))
0294             return -1;
0295 
0296         if (test_case->fastopen) {
0297             int fastopen = 1;
0298 
0299             err = setsockopt(test_case->clients[i], IPPROTO_TCP,
0300                      TCP_FASTOPEN_CONNECT, &fastopen,
0301                      sizeof(fastopen));
0302             if (!ASSERT_OK(err,
0303                        "setsockopt - TCP_FASTOPEN_CONNECT"))
0304                 return -1;
0305         }
0306 
0307         err = connect(test_case->clients[i],
0308                   (struct sockaddr *)&test_case->addr,
0309                   test_case->addrlen);
0310         if (!ASSERT_OK(err, "connect"))
0311             return -1;
0312 
0313         err = write(test_case->clients[i], buf, MSGLEN);
0314         if (!ASSERT_EQ(err, MSGLEN, "write"))
0315             return -1;
0316     }
0317 
0318     return 0;
0319 }
0320 
0321 static int update_maps(struct migrate_reuseport_test_case *test_case,
0322                struct test_migrate_reuseport *skel)
0323 {
0324     int i, err, migrated_to = MIGRATED_TO;
0325     int reuseport_map_fd, migrate_map_fd;
0326     __u64 value;
0327 
0328     reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
0329     migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
0330 
0331     for (i = 0; i < NR_SERVERS; i++) {
0332         value = (__u64)test_case->servers[i];
0333         err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
0334                       BPF_NOEXIST);
0335         if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
0336             return -1;
0337 
0338         err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
0339         if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
0340             return -1;
0341 
0342         err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
0343                       BPF_NOEXIST);
0344         if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
0345             return -1;
0346     }
0347 
0348     return 0;
0349 }
0350 
0351 static int migrate_dance(struct migrate_reuseport_test_case *test_case)
0352 {
0353     int i, err;
0354 
0355     /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
0356      * to the last listener based on eBPF.
0357      */
0358     for (i = 0; i < MIGRATED_TO; i++) {
0359         err = shutdown(test_case->servers[i], SHUT_RDWR);
0360         if (!ASSERT_OK(err, "shutdown"))
0361             return -1;
0362     }
0363 
0364     /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
0365     if (test_case->state == BPF_TCP_NEW_SYN_RECV)
0366         return 0;
0367 
0368     /* Note that we use the second listener instead of the
0369      * first one here.
0370      *
0371      * The fist listener is bind()ed with port 0 and,
0372      * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
0373      * calling listen() again will bind() the first listener
0374      * on a new ephemeral port and detach it from the existing
0375      * reuseport group.  (See: __inet_bind(), tcp_set_state())
0376      *
0377      * OTOH, the second one is bind()ed with a specific port,
0378      * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
0379      * resurrect the listener on the existing reuseport group.
0380      */
0381     err = listen(test_case->servers[1], QLEN);
0382     if (!ASSERT_OK(err, "listen"))
0383         return -1;
0384 
0385     /* Migrate from the last listener to the second one.
0386      *
0387      * All listeners were detached out of the reuseport_map,
0388      * so migration will be done by kernel random pick from here.
0389      */
0390     err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
0391     if (!ASSERT_OK(err, "shutdown"))
0392         return -1;
0393 
0394     /* Back to the existing reuseport group */
0395     err = listen(test_case->servers[MIGRATED_TO], QLEN);
0396     if (!ASSERT_OK(err, "listen"))
0397         return -1;
0398 
0399     /* Migrate back to the last one from the second one */
0400     err = shutdown(test_case->servers[1], SHUT_RDWR);
0401     if (!ASSERT_OK(err, "shutdown"))
0402         return -1;
0403 
0404     return 0;
0405 }
0406 
0407 static void count_requests(struct migrate_reuseport_test_case *test_case,
0408                struct test_migrate_reuseport *skel)
0409 {
0410     struct sockaddr_storage addr;
0411     socklen_t len = sizeof(addr);
0412     int err, cnt = 0, client;
0413     char buf[MSGLEN];
0414 
0415     err = settimeo(test_case->servers[MIGRATED_TO], 4000);
0416     if (!ASSERT_OK(err, "settimeo"))
0417         goto out;
0418 
0419     for (; cnt < NR_CLIENTS; cnt++) {
0420         client = accept(test_case->servers[MIGRATED_TO],
0421                 (struct sockaddr *)&addr, &len);
0422         if (!ASSERT_NEQ(client, -1, "accept"))
0423             goto out;
0424 
0425         memset(buf, 0, MSGLEN);
0426         read(client, &buf, MSGLEN);
0427         close(client);
0428 
0429         if (!ASSERT_STREQ(buf, MSG, "read"))
0430             goto out;
0431     }
0432 
0433 out:
0434     ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
0435 
0436     switch (test_case->state) {
0437     case BPF_TCP_ESTABLISHED:
0438         cnt = skel->bss->migrated_at_close;
0439         break;
0440     case BPF_TCP_SYN_RECV:
0441         cnt = skel->bss->migrated_at_close_fastopen;
0442         break;
0443     case BPF_TCP_NEW_SYN_RECV:
0444         if (test_case->expire_synack_timer)
0445             cnt = skel->bss->migrated_at_send_synack;
0446         else
0447             cnt = skel->bss->migrated_at_recv_ack;
0448         break;
0449     default:
0450         cnt = 0;
0451     }
0452 
0453     ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
0454 }
0455 
0456 static void run_test(struct migrate_reuseport_test_case *test_case,
0457              struct test_migrate_reuseport *skel)
0458 {
0459     int err, saved_len;
0460     char buf[16];
0461 
0462     skel->bss->migrated_at_close = 0;
0463     skel->bss->migrated_at_close_fastopen = 0;
0464     skel->bss->migrated_at_send_synack = 0;
0465     skel->bss->migrated_at_recv_ack = 0;
0466 
0467     init_fds(test_case->servers, NR_SERVERS);
0468     init_fds(test_case->clients, NR_CLIENTS);
0469 
0470     if (test_case->fastopen) {
0471         memset(buf, 0, sizeof(buf));
0472 
0473         err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
0474         if (!ASSERT_OK(err, "setup_fastopen - setup"))
0475             return;
0476     }
0477 
0478     err = start_servers(test_case, skel);
0479     if (!ASSERT_OK(err, "start_servers"))
0480         goto close_servers;
0481 
0482     if (test_case->drop_ack) {
0483         /* Drop the final ACK of the 3-way handshake and stick the
0484          * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
0485          */
0486         err = drop_ack(test_case, skel);
0487         if (!ASSERT_OK(err, "drop_ack"))
0488             goto close_servers;
0489     }
0490 
0491     /* Tie requests to the first four listners */
0492     err = start_clients(test_case);
0493     if (!ASSERT_OK(err, "start_clients"))
0494         goto close_clients;
0495 
0496     err = listen(test_case->servers[MIGRATED_TO], QLEN);
0497     if (!ASSERT_OK(err, "listen"))
0498         goto close_clients;
0499 
0500     err = update_maps(test_case, skel);
0501     if (!ASSERT_OK(err, "fill_maps"))
0502         goto close_clients;
0503 
0504     /* Migrate the requests in the accept queue only.
0505      * TCP_NEW_SYN_RECV requests are not migrated at this point.
0506      */
0507     err = migrate_dance(test_case);
0508     if (!ASSERT_OK(err, "migrate_dance"))
0509         goto close_clients;
0510 
0511     if (test_case->expire_synack_timer) {
0512         /* Wait for SYN+ACK timers to expire so that
0513          * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
0514          */
0515         sleep(1);
0516     }
0517 
0518     if (test_case->link) {
0519         /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
0520         err = pass_ack(test_case);
0521         if (!ASSERT_OK(err, "pass_ack"))
0522             goto close_clients;
0523     }
0524 
0525     count_requests(test_case, skel);
0526 
0527 close_clients:
0528     close_fds(test_case->clients, NR_CLIENTS);
0529 
0530     if (test_case->link) {
0531         err = pass_ack(test_case);
0532         ASSERT_OK(err, "pass_ack - clean up");
0533     }
0534 
0535 close_servers:
0536     close_fds(test_case->servers, NR_SERVERS);
0537 
0538     if (test_case->fastopen) {
0539         err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
0540         ASSERT_OK(err, "setup_fastopen - restore");
0541     }
0542 }
0543 
0544 void serial_test_migrate_reuseport(void)
0545 {
0546     struct test_migrate_reuseport *skel;
0547     int i;
0548 
0549     skel = test_migrate_reuseport__open_and_load();
0550     if (!ASSERT_OK_PTR(skel, "open_and_load"))
0551         return;
0552 
0553     for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
0554         test__start_subtest(test_cases[i].name);
0555         run_test(&test_cases[i], skel);
0556     }
0557 
0558     test_migrate_reuseport__destroy(skel);
0559 }