Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2020 Facebook */
0003 
0004 #include <stddef.h>
0005 #include <errno.h>
0006 #include <stdbool.h>
0007 #include <sys/types.h>
0008 #include <sys/socket.h>
0009 #include <linux/tcp.h>
0010 #include <linux/socket.h>
0011 #include <linux/bpf.h>
0012 #include <linux/types.h>
0013 #include <bpf/bpf_helpers.h>
0014 #include <bpf/bpf_endian.h>
0015 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
0016 #include "test_tcp_hdr_options.h"
0017 
0018 #ifndef sizeof_field
0019 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
0020 #endif
0021 
0022 __u8 test_kind = TCPOPT_EXP;
0023 __u16 test_magic = 0xeB9F;
0024 __u32 inherit_cb_flags = 0;
0025 
0026 struct bpf_test_option passive_synack_out = {};
0027 struct bpf_test_option passive_fin_out  = {};
0028 
0029 struct bpf_test_option passive_estab_in = {};
0030 struct bpf_test_option passive_fin_in   = {};
0031 
0032 struct bpf_test_option active_syn_out   = {};
0033 struct bpf_test_option active_fin_out   = {};
0034 
0035 struct bpf_test_option active_estab_in  = {};
0036 struct bpf_test_option active_fin_in    = {};
0037 
0038 struct {
0039     __uint(type, BPF_MAP_TYPE_SK_STORAGE);
0040     __uint(map_flags, BPF_F_NO_PREALLOC);
0041     __type(key, int);
0042     __type(value, struct hdr_stg);
0043 } hdr_stg_map SEC(".maps");
0044 
0045 static bool skops_want_cookie(const struct bpf_sock_ops *skops)
0046 {
0047     return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
0048 }
0049 
0050 static bool skops_current_mss(const struct bpf_sock_ops *skops)
0051 {
0052     return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
0053 }
0054 
0055 static __u8 option_total_len(__u8 flags)
0056 {
0057     __u8 i, len = 1; /* +1 for flags */
0058 
0059     if (!flags)
0060         return 0;
0061 
0062     /* RESEND bit does not use a byte */
0063     for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
0064         len += !!TEST_OPTION_FLAGS(flags, i);
0065 
0066     if (test_kind == TCPOPT_EXP)
0067         return len + TCP_BPF_EXPOPT_BASE_LEN;
0068     else
0069         return len + 2; /* +1 kind, +1 kind-len */
0070 }
0071 
0072 static void write_test_option(const struct bpf_test_option *test_opt,
0073                   __u8 *data)
0074 {
0075     __u8 offset = 0;
0076 
0077     data[offset++] = test_opt->flags;
0078     if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
0079         data[offset++] = test_opt->max_delack_ms;
0080 
0081     if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
0082         data[offset++] = test_opt->rand;
0083 }
0084 
0085 static int store_option(struct bpf_sock_ops *skops,
0086             const struct bpf_test_option *test_opt)
0087 {
0088     union {
0089         struct tcp_exprm_opt exprm;
0090         struct tcp_opt regular;
0091     } write_opt;
0092     int err;
0093 
0094     if (test_kind == TCPOPT_EXP) {
0095         write_opt.exprm.kind = TCPOPT_EXP;
0096         write_opt.exprm.len = option_total_len(test_opt->flags);
0097         write_opt.exprm.magic = __bpf_htons(test_magic);
0098         write_opt.exprm.data32 = 0;
0099         write_test_option(test_opt, write_opt.exprm.data);
0100         err = bpf_store_hdr_opt(skops, &write_opt.exprm,
0101                     sizeof(write_opt.exprm), 0);
0102     } else {
0103         write_opt.regular.kind = test_kind;
0104         write_opt.regular.len = option_total_len(test_opt->flags);
0105         write_opt.regular.data32 = 0;
0106         write_test_option(test_opt, write_opt.regular.data);
0107         err = bpf_store_hdr_opt(skops, &write_opt.regular,
0108                     sizeof(write_opt.regular), 0);
0109     }
0110 
0111     if (err)
0112         RET_CG_ERR(err);
0113 
0114     return CG_OK;
0115 }
0116 
0117 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
0118 {
0119     opt->flags = *start++;
0120 
0121     if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
0122         opt->max_delack_ms = *start++;
0123 
0124     if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
0125         opt->rand = *start++;
0126 
0127     return 0;
0128 }
0129 
0130 static int load_option(struct bpf_sock_ops *skops,
0131                struct bpf_test_option *test_opt, bool from_syn)
0132 {
0133     union {
0134         struct tcp_exprm_opt exprm;
0135         struct tcp_opt regular;
0136     } search_opt;
0137     int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
0138 
0139     if (test_kind == TCPOPT_EXP) {
0140         search_opt.exprm.kind = TCPOPT_EXP;
0141         search_opt.exprm.len = 4;
0142         search_opt.exprm.magic = __bpf_htons(test_magic);
0143         search_opt.exprm.data32 = 0;
0144         ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
0145                        sizeof(search_opt.exprm), load_flags);
0146         if (ret < 0)
0147             return ret;
0148         return parse_test_option(test_opt, search_opt.exprm.data);
0149     } else {
0150         search_opt.regular.kind = test_kind;
0151         search_opt.regular.len = 0;
0152         search_opt.regular.data32 = 0;
0153         ret = bpf_load_hdr_opt(skops, &search_opt.regular,
0154                        sizeof(search_opt.regular), load_flags);
0155         if (ret < 0)
0156             return ret;
0157         return parse_test_option(test_opt, search_opt.regular.data);
0158     }
0159 }
0160 
0161 static int synack_opt_len(struct bpf_sock_ops *skops)
0162 {
0163     struct bpf_test_option test_opt = {};
0164     __u8 optlen;
0165     int err;
0166 
0167     if (!passive_synack_out.flags)
0168         return CG_OK;
0169 
0170     err = load_option(skops, &test_opt, true);
0171 
0172     /* bpf_test_option is not found */
0173     if (err == -ENOMSG)
0174         return CG_OK;
0175 
0176     if (err)
0177         RET_CG_ERR(err);
0178 
0179     optlen = option_total_len(passive_synack_out.flags);
0180     if (optlen) {
0181         err = bpf_reserve_hdr_opt(skops, optlen, 0);
0182         if (err)
0183             RET_CG_ERR(err);
0184     }
0185 
0186     return CG_OK;
0187 }
0188 
0189 static int write_synack_opt(struct bpf_sock_ops *skops)
0190 {
0191     struct bpf_test_option opt;
0192 
0193     if (!passive_synack_out.flags)
0194         /* We should not even be called since no header
0195          * space has been reserved.
0196          */
0197         RET_CG_ERR(0);
0198 
0199     opt = passive_synack_out;
0200     if (skops_want_cookie(skops))
0201         SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
0202 
0203     return store_option(skops, &opt);
0204 }
0205 
0206 static int syn_opt_len(struct bpf_sock_ops *skops)
0207 {
0208     __u8 optlen;
0209     int err;
0210 
0211     if (!active_syn_out.flags)
0212         return CG_OK;
0213 
0214     optlen = option_total_len(active_syn_out.flags);
0215     if (optlen) {
0216         err = bpf_reserve_hdr_opt(skops, optlen, 0);
0217         if (err)
0218             RET_CG_ERR(err);
0219     }
0220 
0221     return CG_OK;
0222 }
0223 
0224 static int write_syn_opt(struct bpf_sock_ops *skops)
0225 {
0226     if (!active_syn_out.flags)
0227         RET_CG_ERR(0);
0228 
0229     return store_option(skops, &active_syn_out);
0230 }
0231 
0232 static int fin_opt_len(struct bpf_sock_ops *skops)
0233 {
0234     struct bpf_test_option *opt;
0235     struct hdr_stg *hdr_stg;
0236     __u8 optlen;
0237     int err;
0238 
0239     if (!skops->sk)
0240         RET_CG_ERR(0);
0241 
0242     hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
0243     if (!hdr_stg)
0244         RET_CG_ERR(0);
0245 
0246     if (hdr_stg->active)
0247         opt = &active_fin_out;
0248     else
0249         opt = &passive_fin_out;
0250 
0251     optlen = option_total_len(opt->flags);
0252     if (optlen) {
0253         err = bpf_reserve_hdr_opt(skops, optlen, 0);
0254         if (err)
0255             RET_CG_ERR(err);
0256     }
0257 
0258     return CG_OK;
0259 }
0260 
0261 static int write_fin_opt(struct bpf_sock_ops *skops)
0262 {
0263     struct bpf_test_option *opt;
0264     struct hdr_stg *hdr_stg;
0265 
0266     if (!skops->sk)
0267         RET_CG_ERR(0);
0268 
0269     hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
0270     if (!hdr_stg)
0271         RET_CG_ERR(0);
0272 
0273     if (hdr_stg->active)
0274         opt = &active_fin_out;
0275     else
0276         opt = &passive_fin_out;
0277 
0278     if (!opt->flags)
0279         RET_CG_ERR(0);
0280 
0281     return store_option(skops, opt);
0282 }
0283 
0284 static int resend_in_ack(struct bpf_sock_ops *skops)
0285 {
0286     struct hdr_stg *hdr_stg;
0287 
0288     if (!skops->sk)
0289         return -1;
0290 
0291     hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
0292     if (!hdr_stg)
0293         return -1;
0294 
0295     return !!hdr_stg->resend_syn;
0296 }
0297 
0298 static int nodata_opt_len(struct bpf_sock_ops *skops)
0299 {
0300     int resend;
0301 
0302     resend = resend_in_ack(skops);
0303     if (resend < 0)
0304         RET_CG_ERR(0);
0305 
0306     if (resend)
0307         return syn_opt_len(skops);
0308 
0309     return CG_OK;
0310 }
0311 
0312 static int write_nodata_opt(struct bpf_sock_ops *skops)
0313 {
0314     int resend;
0315 
0316     resend = resend_in_ack(skops);
0317     if (resend < 0)
0318         RET_CG_ERR(0);
0319 
0320     if (resend)
0321         return write_syn_opt(skops);
0322 
0323     return CG_OK;
0324 }
0325 
0326 static int data_opt_len(struct bpf_sock_ops *skops)
0327 {
0328     /* Same as the nodata version.  Mostly to show
0329      * an example usage on skops->skb_len.
0330      */
0331     return nodata_opt_len(skops);
0332 }
0333 
0334 static int write_data_opt(struct bpf_sock_ops *skops)
0335 {
0336     return write_nodata_opt(skops);
0337 }
0338 
0339 static int current_mss_opt_len(struct bpf_sock_ops *skops)
0340 {
0341     /* Reserve maximum that may be needed */
0342     int err;
0343 
0344     err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
0345     if (err)
0346         RET_CG_ERR(err);
0347 
0348     return CG_OK;
0349 }
0350 
0351 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
0352 {
0353     __u8 tcp_flags = skops_tcp_flags(skops);
0354 
0355     if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
0356         return synack_opt_len(skops);
0357 
0358     if (tcp_flags & TCPHDR_SYN)
0359         return syn_opt_len(skops);
0360 
0361     if (tcp_flags & TCPHDR_FIN)
0362         return fin_opt_len(skops);
0363 
0364     if (skops_current_mss(skops))
0365         /* The kernel is calculating the MSS */
0366         return current_mss_opt_len(skops);
0367 
0368     if (skops->skb_len)
0369         return data_opt_len(skops);
0370 
0371     return nodata_opt_len(skops);
0372 }
0373 
0374 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
0375 {
0376     __u8 tcp_flags = skops_tcp_flags(skops);
0377     struct tcphdr *th;
0378 
0379     if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
0380         return write_synack_opt(skops);
0381 
0382     if (tcp_flags & TCPHDR_SYN)
0383         return write_syn_opt(skops);
0384 
0385     if (tcp_flags & TCPHDR_FIN)
0386         return write_fin_opt(skops);
0387 
0388     th = skops->skb_data;
0389     if (th + 1 > skops->skb_data_end)
0390         RET_CG_ERR(0);
0391 
0392     if (skops->skb_len > tcp_hdrlen(th))
0393         return write_data_opt(skops);
0394 
0395     return write_nodata_opt(skops);
0396 }
0397 
0398 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
0399 {
0400     __u32 max_delack_us = max_delack_ms * 1000;
0401 
0402     return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
0403                   &max_delack_us, sizeof(max_delack_us));
0404 }
0405 
0406 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
0407 {
0408     __u32 min_rto_us = peer_max_delack_ms * 1000;
0409 
0410     return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
0411                   sizeof(min_rto_us));
0412 }
0413 
0414 static int handle_active_estab(struct bpf_sock_ops *skops)
0415 {
0416     struct hdr_stg init_stg = {
0417         .active = true,
0418     };
0419     int err;
0420 
0421     err = load_option(skops, &active_estab_in, false);
0422     if (err && err != -ENOMSG)
0423         RET_CG_ERR(err);
0424 
0425     init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
0426                         OPTION_RESEND);
0427     if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
0428                           &init_stg,
0429                           BPF_SK_STORAGE_GET_F_CREATE))
0430         RET_CG_ERR(0);
0431 
0432     if (init_stg.resend_syn)
0433         /* Don't clear the write_hdr cb now because
0434          * the ACK may get lost and retransmit may
0435          * be needed.
0436          *
0437          * PARSE_ALL_HDR cb flag is set to learn if this
0438          * resend_syn option has received by the peer.
0439          *
0440          * The header option will be resent until a valid
0441          * packet is received at handle_parse_hdr()
0442          * and all hdr cb flags will be cleared in
0443          * handle_parse_hdr().
0444          */
0445         set_parse_all_hdr_cb_flags(skops);
0446     else if (!active_fin_out.flags)
0447         /* No options will be written from now */
0448         clear_hdr_cb_flags(skops);
0449 
0450     if (active_syn_out.max_delack_ms) {
0451         err = set_delack_max(skops, active_syn_out.max_delack_ms);
0452         if (err)
0453             RET_CG_ERR(err);
0454     }
0455 
0456     if (active_estab_in.max_delack_ms) {
0457         err = set_rto_min(skops, active_estab_in.max_delack_ms);
0458         if (err)
0459             RET_CG_ERR(err);
0460     }
0461 
0462     return CG_OK;
0463 }
0464 
0465 static int handle_passive_estab(struct bpf_sock_ops *skops)
0466 {
0467     struct hdr_stg init_stg = {};
0468     struct tcphdr *th;
0469     int err;
0470 
0471     inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
0472 
0473     err = load_option(skops, &passive_estab_in, true);
0474     if (err == -ENOENT) {
0475         /* saved_syn is not found. It was in syncookie mode.
0476          * We have asked the active side to resend the options
0477          * in ACK, so try to find the bpf_test_option from ACK now.
0478          */
0479         err = load_option(skops, &passive_estab_in, false);
0480         init_stg.syncookie = true;
0481     }
0482 
0483     /* ENOMSG: The bpf_test_option is not found which is fine.
0484      * Bail out now for all other errors.
0485      */
0486     if (err && err != -ENOMSG)
0487         RET_CG_ERR(err);
0488 
0489     th = skops->skb_data;
0490     if (th + 1 > skops->skb_data_end)
0491         RET_CG_ERR(0);
0492 
0493     if (th->syn) {
0494         /* Fastopen */
0495 
0496         /* Cannot clear cb_flags to stop write_hdr cb.
0497          * synack is not sent yet for fast open.
0498          * Even it was, the synack may need to be retransmitted.
0499          *
0500          * PARSE_ALL_HDR cb flag is set to learn
0501          * if synack has reached the peer.
0502          * All cb_flags will be cleared in handle_parse_hdr().
0503          */
0504         set_parse_all_hdr_cb_flags(skops);
0505         init_stg.fastopen = true;
0506     } else if (!passive_fin_out.flags) {
0507         /* No options will be written from now */
0508         clear_hdr_cb_flags(skops);
0509     }
0510 
0511     if (!skops->sk ||
0512         !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
0513                 BPF_SK_STORAGE_GET_F_CREATE))
0514         RET_CG_ERR(0);
0515 
0516     if (passive_synack_out.max_delack_ms) {
0517         err = set_delack_max(skops, passive_synack_out.max_delack_ms);
0518         if (err)
0519             RET_CG_ERR(err);
0520     }
0521 
0522     if (passive_estab_in.max_delack_ms) {
0523         err = set_rto_min(skops, passive_estab_in.max_delack_ms);
0524         if (err)
0525             RET_CG_ERR(err);
0526     }
0527 
0528     return CG_OK;
0529 }
0530 
0531 static int handle_parse_hdr(struct bpf_sock_ops *skops)
0532 {
0533     struct hdr_stg *hdr_stg;
0534     struct tcphdr *th;
0535 
0536     if (!skops->sk)
0537         RET_CG_ERR(0);
0538 
0539     th = skops->skb_data;
0540     if (th + 1 > skops->skb_data_end)
0541         RET_CG_ERR(0);
0542 
0543     hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
0544     if (!hdr_stg)
0545         RET_CG_ERR(0);
0546 
0547     if (hdr_stg->resend_syn || hdr_stg->fastopen)
0548         /* The PARSE_ALL_HDR cb flag was turned on
0549          * to ensure that the previously written
0550          * options have reached the peer.
0551          * Those previously written option includes:
0552          *     - Active side: resend_syn in ACK during syncookie
0553          *      or
0554          *     - Passive side: SYNACK during fastopen
0555          *
0556          * A valid packet has been received here after
0557          * the 3WHS, so the PARSE_ALL_HDR cb flag
0558          * can be cleared now.
0559          */
0560         clear_parse_all_hdr_cb_flags(skops);
0561 
0562     if (hdr_stg->resend_syn && !active_fin_out.flags)
0563         /* Active side resent the syn option in ACK
0564          * because the server was in syncookie mode.
0565          * A valid packet has been received, so
0566          * clear header cb flags if there is no
0567          * more option to send.
0568          */
0569         clear_hdr_cb_flags(skops);
0570 
0571     if (hdr_stg->fastopen && !passive_fin_out.flags)
0572         /* Passive side was in fastopen.
0573          * A valid packet has been received, so
0574          * the SYNACK has reached the peer.
0575          * Clear header cb flags if there is no more
0576          * option to send.
0577          */
0578         clear_hdr_cb_flags(skops);
0579 
0580     if (th->fin) {
0581         struct bpf_test_option *fin_opt;
0582         int err;
0583 
0584         if (hdr_stg->active)
0585             fin_opt = &active_fin_in;
0586         else
0587             fin_opt = &passive_fin_in;
0588 
0589         err = load_option(skops, fin_opt, false);
0590         if (err && err != -ENOMSG)
0591             RET_CG_ERR(err);
0592     }
0593 
0594     return CG_OK;
0595 }
0596 
0597 SEC("sockops")
0598 int estab(struct bpf_sock_ops *skops)
0599 {
0600     int true_val = 1;
0601 
0602     switch (skops->op) {
0603     case BPF_SOCK_OPS_TCP_LISTEN_CB:
0604         bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
0605                    &true_val, sizeof(true_val));
0606         set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
0607         break;
0608     case BPF_SOCK_OPS_TCP_CONNECT_CB:
0609         set_hdr_cb_flags(skops, 0);
0610         break;
0611     case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
0612         return handle_parse_hdr(skops);
0613     case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
0614         return handle_hdr_opt_len(skops);
0615     case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
0616         return handle_write_hdr_opt(skops);
0617     case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
0618         return handle_passive_estab(skops);
0619     case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
0620         return handle_active_estab(skops);
0621     }
0622 
0623     return CG_OK;
0624 }
0625 
0626 char _license[] SEC("license") = "GPL";