0001
0002
0003
0004 #include <string.h>
0005
0006 #include <linux/stddef.h>
0007 #include <linux/bpf.h>
0008 #include <linux/in.h>
0009 #include <linux/in6.h>
0010 #include <sys/socket.h>
0011 #include <netinet/tcp.h>
0012 #include <linux/if.h>
0013 #include <errno.h>
0014
0015 #include <bpf/bpf_helpers.h>
0016 #include <bpf/bpf_endian.h>
0017
0018 #define SRC_REWRITE_IP4 0x7f000004U
0019 #define DST_REWRITE_IP4 0x7f000001U
0020 #define DST_REWRITE_PORT4 4444
0021
0022 #ifndef TCP_CA_NAME_MAX
0023 #define TCP_CA_NAME_MAX 16
0024 #endif
0025
0026 #ifndef TCP_NOTSENT_LOWAT
0027 #define TCP_NOTSENT_LOWAT 25
0028 #endif
0029
0030 #ifndef IFNAMSIZ
0031 #define IFNAMSIZ 16
0032 #endif
0033
0034 __attribute__ ((noinline))
0035 int do_bind(struct bpf_sock_addr *ctx)
0036 {
0037 struct sockaddr_in sa = {};
0038
0039 sa.sin_family = AF_INET;
0040 sa.sin_port = bpf_htons(0);
0041 sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
0042
0043 if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
0044 return 0;
0045
0046 return 1;
0047 }
0048
0049 static __inline int verify_cc(struct bpf_sock_addr *ctx,
0050 char expected[TCP_CA_NAME_MAX])
0051 {
0052 char buf[TCP_CA_NAME_MAX];
0053 int i;
0054
0055 if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
0056 return 1;
0057
0058 for (i = 0; i < TCP_CA_NAME_MAX; i++) {
0059 if (buf[i] != expected[i])
0060 return 1;
0061 if (buf[i] == 0)
0062 break;
0063 }
0064
0065 return 0;
0066 }
0067
0068 static __inline int set_cc(struct bpf_sock_addr *ctx)
0069 {
0070 char reno[TCP_CA_NAME_MAX] = "reno";
0071 char cubic[TCP_CA_NAME_MAX] = "cubic";
0072
0073 if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
0074 return 1;
0075 if (verify_cc(ctx, reno))
0076 return 1;
0077
0078 if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
0079 return 1;
0080 if (verify_cc(ctx, cubic))
0081 return 1;
0082
0083 return 0;
0084 }
0085
0086 static __inline int bind_to_device(struct bpf_sock_addr *ctx)
0087 {
0088 char veth1[IFNAMSIZ] = "test_sock_addr1";
0089 char veth2[IFNAMSIZ] = "test_sock_addr2";
0090 char missing[IFNAMSIZ] = "nonexistent_dev";
0091 char del_bind[IFNAMSIZ] = "";
0092
0093 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
0094 &veth1, sizeof(veth1)))
0095 return 1;
0096 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
0097 &veth2, sizeof(veth2)))
0098 return 1;
0099 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
0100 &missing, sizeof(missing)) != -ENODEV)
0101 return 1;
0102 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
0103 &del_bind, sizeof(del_bind)))
0104 return 1;
0105
0106 return 0;
0107 }
0108
0109 static __inline int set_keepalive(struct bpf_sock_addr *ctx)
0110 {
0111 int zero = 0, one = 1;
0112
0113 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
0114 return 1;
0115 if (ctx->type == SOCK_STREAM) {
0116 if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
0117 return 1;
0118 if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
0119 return 1;
0120 if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
0121 return 1;
0122 if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
0123 return 1;
0124 if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
0125 return 1;
0126 }
0127 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
0128 return 1;
0129
0130 return 0;
0131 }
0132
0133 static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
0134 {
0135 int lowat = 65535;
0136
0137 if (ctx->type == SOCK_STREAM) {
0138 if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
0139 return 1;
0140 }
0141
0142 return 0;
0143 }
0144
0145 SEC("cgroup/connect4")
0146 int connect_v4_prog(struct bpf_sock_addr *ctx)
0147 {
0148 struct bpf_sock_tuple tuple = {};
0149 struct bpf_sock *sk;
0150
0151
0152 memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
0153 memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
0154
0155 tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
0156 tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
0157
0158
0159 if (bind_to_device(ctx))
0160 return 0;
0161
0162 if (set_keepalive(ctx))
0163 return 0;
0164
0165 if (set_notsent_lowat(ctx))
0166 return 0;
0167
0168 if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
0169 return 0;
0170 else if (ctx->type == SOCK_STREAM)
0171 sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
0172 BPF_F_CURRENT_NETNS, 0);
0173 else
0174 sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
0175 BPF_F_CURRENT_NETNS, 0);
0176
0177 if (!sk)
0178 return 0;
0179
0180 if (sk->src_ip4 != tuple.ipv4.daddr ||
0181 sk->src_port != DST_REWRITE_PORT4) {
0182 bpf_sk_release(sk);
0183 return 0;
0184 }
0185
0186 bpf_sk_release(sk);
0187
0188
0189 if (ctx->type == SOCK_STREAM && set_cc(ctx))
0190 return 0;
0191
0192
0193 ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
0194 ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
0195
0196 return do_bind(ctx) ? 1 : 0;
0197 }
0198
0199 char _license[] SEC("license") = "GPL";