0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <errno.h>
0012 #include <stdlib.h>
0013 #include <string.h>
0014 #include <unistd.h>
0015 #include <arpa/inet.h>
0016 #include <asm/barrier.h>
0017 #include <linux/compiler.h>
0018 #include <linux/ethtool.h>
0019 #include <linux/filter.h>
0020 #include <linux/if_ether.h>
0021 #include <linux/if_packet.h>
0022 #include <linux/if_xdp.h>
0023 #include <linux/kernel.h>
0024 #include <linux/list.h>
0025 #include <linux/sockios.h>
0026 #include <net/if.h>
0027 #include <sys/ioctl.h>
0028 #include <sys/mman.h>
0029 #include <sys/socket.h>
0030 #include <sys/types.h>
0031 #include <linux/if_link.h>
0032
0033 #include <bpf/bpf.h>
0034 #include <bpf/libbpf.h>
0035 #include "xsk.h"
0036
0037 #ifndef SOL_XDP
0038 #define SOL_XDP 283
0039 #endif
0040
0041 #ifndef AF_XDP
0042 #define AF_XDP 44
0043 #endif
0044
0045 #ifndef PF_XDP
0046 #define PF_XDP AF_XDP
0047 #endif
0048
0049 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
0050
0051 enum xsk_prog {
0052 XSK_PROG_FALLBACK,
0053 XSK_PROG_REDIRECT_FLAGS,
0054 };
0055
0056 struct xsk_umem {
0057 struct xsk_ring_prod *fill_save;
0058 struct xsk_ring_cons *comp_save;
0059 char *umem_area;
0060 struct xsk_umem_config config;
0061 int fd;
0062 int refcount;
0063 struct list_head ctx_list;
0064 bool rx_ring_setup_done;
0065 bool tx_ring_setup_done;
0066 };
0067
0068 struct xsk_ctx {
0069 struct xsk_ring_prod *fill;
0070 struct xsk_ring_cons *comp;
0071 __u32 queue_id;
0072 struct xsk_umem *umem;
0073 int refcount;
0074 int ifindex;
0075 struct list_head list;
0076 int prog_fd;
0077 int link_fd;
0078 int xsks_map_fd;
0079 char ifname[IFNAMSIZ];
0080 bool has_bpf_link;
0081 };
0082
0083 struct xsk_socket {
0084 struct xsk_ring_cons *rx;
0085 struct xsk_ring_prod *tx;
0086 __u64 outstanding_tx;
0087 struct xsk_ctx *ctx;
0088 struct xsk_socket_config config;
0089 int fd;
0090 };
0091
0092 struct xsk_nl_info {
0093 bool xdp_prog_attached;
0094 int ifindex;
0095 int fd;
0096 };
0097
0098
0099 struct xdp_ring_offset_v1 {
0100 __u64 producer;
0101 __u64 consumer;
0102 __u64 desc;
0103 };
0104
0105
0106 struct xdp_mmap_offsets_v1 {
0107 struct xdp_ring_offset_v1 rx;
0108 struct xdp_ring_offset_v1 tx;
0109 struct xdp_ring_offset_v1 fr;
0110 struct xdp_ring_offset_v1 cr;
0111 };
0112
0113 int xsk_umem__fd(const struct xsk_umem *umem)
0114 {
0115 return umem ? umem->fd : -EINVAL;
0116 }
0117
0118 int xsk_socket__fd(const struct xsk_socket *xsk)
0119 {
0120 return xsk ? xsk->fd : -EINVAL;
0121 }
0122
0123 static bool xsk_page_aligned(void *buffer)
0124 {
0125 unsigned long addr = (unsigned long)buffer;
0126
0127 return !(addr & (getpagesize() - 1));
0128 }
0129
0130 static void xsk_set_umem_config(struct xsk_umem_config *cfg,
0131 const struct xsk_umem_config *usr_cfg)
0132 {
0133 if (!usr_cfg) {
0134 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
0135 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
0136 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
0137 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
0138 cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
0139 return;
0140 }
0141
0142 cfg->fill_size = usr_cfg->fill_size;
0143 cfg->comp_size = usr_cfg->comp_size;
0144 cfg->frame_size = usr_cfg->frame_size;
0145 cfg->frame_headroom = usr_cfg->frame_headroom;
0146 cfg->flags = usr_cfg->flags;
0147 }
0148
0149 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
0150 const struct xsk_socket_config *usr_cfg)
0151 {
0152 if (!usr_cfg) {
0153 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
0154 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
0155 cfg->libbpf_flags = 0;
0156 cfg->xdp_flags = 0;
0157 cfg->bind_flags = 0;
0158 return 0;
0159 }
0160
0161 if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
0162 return -EINVAL;
0163
0164 cfg->rx_size = usr_cfg->rx_size;
0165 cfg->tx_size = usr_cfg->tx_size;
0166 cfg->libbpf_flags = usr_cfg->libbpf_flags;
0167 cfg->xdp_flags = usr_cfg->xdp_flags;
0168 cfg->bind_flags = usr_cfg->bind_flags;
0169
0170 return 0;
0171 }
0172
0173 static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
0174 {
0175 struct xdp_mmap_offsets_v1 off_v1;
0176
0177
0178
0179
0180
0181 memcpy(&off_v1, off, sizeof(off_v1));
0182
0183 off->rx.producer = off_v1.rx.producer;
0184 off->rx.consumer = off_v1.rx.consumer;
0185 off->rx.desc = off_v1.rx.desc;
0186 off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
0187
0188 off->tx.producer = off_v1.tx.producer;
0189 off->tx.consumer = off_v1.tx.consumer;
0190 off->tx.desc = off_v1.tx.desc;
0191 off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
0192
0193 off->fr.producer = off_v1.fr.producer;
0194 off->fr.consumer = off_v1.fr.consumer;
0195 off->fr.desc = off_v1.fr.desc;
0196 off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
0197
0198 off->cr.producer = off_v1.cr.producer;
0199 off->cr.consumer = off_v1.cr.consumer;
0200 off->cr.desc = off_v1.cr.desc;
0201 off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
0202 }
0203
0204 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
0205 {
0206 socklen_t optlen;
0207 int err;
0208
0209 optlen = sizeof(*off);
0210 err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
0211 if (err)
0212 return err;
0213
0214 if (optlen == sizeof(*off))
0215 return 0;
0216
0217 if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
0218 xsk_mmap_offsets_v1(off);
0219 return 0;
0220 }
0221
0222 return -EINVAL;
0223 }
0224
0225 static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
0226 struct xsk_ring_prod *fill,
0227 struct xsk_ring_cons *comp)
0228 {
0229 struct xdp_mmap_offsets off;
0230 void *map;
0231 int err;
0232
0233 err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
0234 &umem->config.fill_size,
0235 sizeof(umem->config.fill_size));
0236 if (err)
0237 return -errno;
0238
0239 err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
0240 &umem->config.comp_size,
0241 sizeof(umem->config.comp_size));
0242 if (err)
0243 return -errno;
0244
0245 err = xsk_get_mmap_offsets(fd, &off);
0246 if (err)
0247 return -errno;
0248
0249 map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
0250 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
0251 XDP_UMEM_PGOFF_FILL_RING);
0252 if (map == MAP_FAILED)
0253 return -errno;
0254
0255 fill->mask = umem->config.fill_size - 1;
0256 fill->size = umem->config.fill_size;
0257 fill->producer = map + off.fr.producer;
0258 fill->consumer = map + off.fr.consumer;
0259 fill->flags = map + off.fr.flags;
0260 fill->ring = map + off.fr.desc;
0261 fill->cached_cons = umem->config.fill_size;
0262
0263 map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
0264 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
0265 XDP_UMEM_PGOFF_COMPLETION_RING);
0266 if (map == MAP_FAILED) {
0267 err = -errno;
0268 goto out_mmap;
0269 }
0270
0271 comp->mask = umem->config.comp_size - 1;
0272 comp->size = umem->config.comp_size;
0273 comp->producer = map + off.cr.producer;
0274 comp->consumer = map + off.cr.consumer;
0275 comp->flags = map + off.cr.flags;
0276 comp->ring = map + off.cr.desc;
0277
0278 return 0;
0279
0280 out_mmap:
0281 munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
0282 return err;
0283 }
0284
0285 int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
0286 __u64 size, struct xsk_ring_prod *fill,
0287 struct xsk_ring_cons *comp,
0288 const struct xsk_umem_config *usr_config)
0289 {
0290 struct xdp_umem_reg mr;
0291 struct xsk_umem *umem;
0292 int err;
0293
0294 if (!umem_area || !umem_ptr || !fill || !comp)
0295 return -EFAULT;
0296 if (!size && !xsk_page_aligned(umem_area))
0297 return -EINVAL;
0298
0299 umem = calloc(1, sizeof(*umem));
0300 if (!umem)
0301 return -ENOMEM;
0302
0303 umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
0304 if (umem->fd < 0) {
0305 err = -errno;
0306 goto out_umem_alloc;
0307 }
0308
0309 umem->umem_area = umem_area;
0310 INIT_LIST_HEAD(&umem->ctx_list);
0311 xsk_set_umem_config(&umem->config, usr_config);
0312
0313 memset(&mr, 0, sizeof(mr));
0314 mr.addr = (uintptr_t)umem_area;
0315 mr.len = size;
0316 mr.chunk_size = umem->config.frame_size;
0317 mr.headroom = umem->config.frame_headroom;
0318 mr.flags = umem->config.flags;
0319
0320 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
0321 if (err) {
0322 err = -errno;
0323 goto out_socket;
0324 }
0325
0326 err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
0327 if (err)
0328 goto out_socket;
0329
0330 umem->fill_save = fill;
0331 umem->comp_save = comp;
0332 *umem_ptr = umem;
0333 return 0;
0334
0335 out_socket:
0336 close(umem->fd);
0337 out_umem_alloc:
0338 free(umem);
0339 return err;
0340 }
0341
0342 struct xsk_umem_config_v1 {
0343 __u32 fill_size;
0344 __u32 comp_size;
0345 __u32 frame_size;
0346 __u32 frame_headroom;
0347 };
0348
0349 static enum xsk_prog get_xsk_prog(void)
0350 {
0351 enum xsk_prog detected = XSK_PROG_FALLBACK;
0352 char data_in = 0, data_out;
0353 struct bpf_insn insns[] = {
0354 BPF_LD_MAP_FD(BPF_REG_1, 0),
0355 BPF_MOV64_IMM(BPF_REG_2, 0),
0356 BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
0357 BPF_EMIT_CALL(BPF_FUNC_redirect_map),
0358 BPF_EXIT_INSN(),
0359 };
0360 LIBBPF_OPTS(bpf_test_run_opts, opts,
0361 .data_in = &data_in,
0362 .data_size_in = 1,
0363 .data_out = &data_out,
0364 );
0365
0366 int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
0367
0368 map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
0369 if (map_fd < 0)
0370 return detected;
0371
0372 insns[0].imm = map_fd;
0373
0374 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
0375 if (prog_fd < 0) {
0376 close(map_fd);
0377 return detected;
0378 }
0379
0380 ret = bpf_prog_test_run_opts(prog_fd, &opts);
0381 if (!ret && opts.retval == XDP_PASS)
0382 detected = XSK_PROG_REDIRECT_FLAGS;
0383 close(prog_fd);
0384 close(map_fd);
0385 return detected;
0386 }
0387
0388 static int xsk_load_xdp_prog(struct xsk_socket *xsk)
0389 {
0390 static const int log_buf_size = 16 * 1024;
0391 struct xsk_ctx *ctx = xsk->ctx;
0392 char log_buf[log_buf_size];
0393 int prog_fd;
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413 struct bpf_insn prog[] = {
0414
0415 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
0416
0417 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
0418
0419 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
0420
0421 BPF_MOV64_IMM(BPF_REG_3, 2),
0422
0423 BPF_EMIT_CALL(BPF_FUNC_redirect_map),
0424
0425 BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
0426
0427 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
0428
0429 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
0430
0431 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
0432
0433 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
0434
0435 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
0436
0437 BPF_MOV64_IMM(BPF_REG_0, 2),
0438
0439 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
0440
0441 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
0442
0443 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
0444
0445 BPF_MOV64_IMM(BPF_REG_3, 0),
0446
0447 BPF_EMIT_CALL(BPF_FUNC_redirect_map),
0448
0449 BPF_EXIT_INSN(),
0450 };
0451
0452
0453
0454
0455
0456
0457
0458 struct bpf_insn prog_redirect_flags[] = {
0459
0460 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
0461
0462 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
0463
0464 BPF_MOV64_IMM(BPF_REG_3, 2),
0465
0466 BPF_EMIT_CALL(BPF_FUNC_redirect_map),
0467 BPF_EXIT_INSN(),
0468 };
0469 size_t insns_cnt[] = {ARRAY_SIZE(prog),
0470 ARRAY_SIZE(prog_redirect_flags),
0471 };
0472 struct bpf_insn *progs[] = {prog, prog_redirect_flags};
0473 enum xsk_prog option = get_xsk_prog();
0474 LIBBPF_OPTS(bpf_prog_load_opts, opts,
0475 .log_buf = log_buf,
0476 .log_size = log_buf_size,
0477 );
0478
0479 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
0480 progs[option], insns_cnt[option], &opts);
0481 if (prog_fd < 0) {
0482 pr_warn("BPF log buffer:\n%s", log_buf);
0483 return prog_fd;
0484 }
0485
0486 ctx->prog_fd = prog_fd;
0487 return 0;
0488 }
0489
0490 static int xsk_create_bpf_link(struct xsk_socket *xsk)
0491 {
0492 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
0493 struct xsk_ctx *ctx = xsk->ctx;
0494 __u32 prog_id = 0;
0495 int link_fd;
0496 int err;
0497
0498 err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
0499 if (err) {
0500 pr_warn("getting XDP prog id failed\n");
0501 return err;
0502 }
0503
0504
0505
0506
0507 if (prog_id) {
0508 pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
0509 return -EINVAL;
0510 }
0511
0512 opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
0513
0514 link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
0515 if (link_fd < 0) {
0516 pr_warn("bpf_link_create failed: %s\n", strerror(errno));
0517 return link_fd;
0518 }
0519
0520 ctx->link_fd = link_fd;
0521 return 0;
0522 }
0523
0524
0525
0526
0527
0528
0529
0530 static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
0531 {
0532 size_t i;
0533
0534 if (sz == 0)
0535 return;
0536
0537 sz--;
0538 for (i = 0; i < sz && src[i]; i++)
0539 dst[i] = src[i];
0540 dst[i] = '\0';
0541 }
0542
0543 static int xsk_get_max_queues(struct xsk_socket *xsk)
0544 {
0545 struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
0546 struct xsk_ctx *ctx = xsk->ctx;
0547 struct ifreq ifr = {};
0548 int fd, err, ret;
0549
0550 fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
0551 if (fd < 0)
0552 return -errno;
0553
0554 ifr.ifr_data = (void *)&channels;
0555 libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
0556 err = ioctl(fd, SIOCETHTOOL, &ifr);
0557 if (err && errno != EOPNOTSUPP) {
0558 ret = -errno;
0559 goto out;
0560 }
0561
0562 if (err) {
0563
0564
0565
0566 ret = 1;
0567 } else {
0568
0569
0570
0571 ret = max(channels.max_rx, channels.max_tx);
0572 ret = max(ret, (int)channels.max_combined);
0573 }
0574
0575 out:
0576 close(fd);
0577 return ret;
0578 }
0579
0580 static int xsk_create_bpf_maps(struct xsk_socket *xsk)
0581 {
0582 struct xsk_ctx *ctx = xsk->ctx;
0583 int max_queues;
0584 int fd;
0585
0586 max_queues = xsk_get_max_queues(xsk);
0587 if (max_queues < 0)
0588 return max_queues;
0589
0590 fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
0591 sizeof(int), sizeof(int), max_queues, NULL);
0592 if (fd < 0)
0593 return fd;
0594
0595 ctx->xsks_map_fd = fd;
0596
0597 return 0;
0598 }
0599
0600 static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
0601 {
0602 struct xsk_ctx *ctx = xsk->ctx;
0603
0604 bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
0605 close(ctx->xsks_map_fd);
0606 }
0607
0608 static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
0609 {
0610 __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
0611 __u32 map_len = sizeof(struct bpf_map_info);
0612 struct bpf_prog_info prog_info = {};
0613 struct xsk_ctx *ctx = xsk->ctx;
0614 struct bpf_map_info map_info;
0615 int fd, err;
0616
0617 err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
0618 if (err)
0619 return err;
0620
0621 num_maps = prog_info.nr_map_ids;
0622
0623 map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
0624 if (!map_ids)
0625 return -ENOMEM;
0626
0627 memset(&prog_info, 0, prog_len);
0628 prog_info.nr_map_ids = num_maps;
0629 prog_info.map_ids = (__u64)(unsigned long)map_ids;
0630
0631 err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
0632 if (err)
0633 goto out_map_ids;
0634
0635 ctx->xsks_map_fd = -1;
0636
0637 for (i = 0; i < prog_info.nr_map_ids; i++) {
0638 fd = bpf_map_get_fd_by_id(map_ids[i]);
0639 if (fd < 0)
0640 continue;
0641
0642 memset(&map_info, 0, map_len);
0643 err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
0644 if (err) {
0645 close(fd);
0646 continue;
0647 }
0648
0649 if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
0650 ctx->xsks_map_fd = fd;
0651 break;
0652 }
0653
0654 close(fd);
0655 }
0656
0657 if (ctx->xsks_map_fd == -1)
0658 err = -ENOENT;
0659
0660 out_map_ids:
0661 free(map_ids);
0662 return err;
0663 }
0664
0665 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
0666 {
0667 struct xsk_ctx *ctx = xsk->ctx;
0668
0669 return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
0670 &xsk->fd, 0);
0671 }
0672
0673 static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
0674 {
0675 struct bpf_link_info link_info;
0676 __u32 link_len;
0677 __u32 id = 0;
0678 int err;
0679 int fd;
0680
0681 while (true) {
0682 err = bpf_link_get_next_id(id, &id);
0683 if (err) {
0684 if (errno == ENOENT) {
0685 err = 0;
0686 break;
0687 }
0688 pr_warn("can't get next link: %s\n", strerror(errno));
0689 break;
0690 }
0691
0692 fd = bpf_link_get_fd_by_id(id);
0693 if (fd < 0) {
0694 if (errno == ENOENT)
0695 continue;
0696 pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
0697 err = -errno;
0698 break;
0699 }
0700
0701 link_len = sizeof(struct bpf_link_info);
0702 memset(&link_info, 0, link_len);
0703 err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
0704 if (err) {
0705 pr_warn("can't get link info: %s\n", strerror(errno));
0706 close(fd);
0707 break;
0708 }
0709 if (link_info.type == BPF_LINK_TYPE_XDP) {
0710 if (link_info.xdp.ifindex == ifindex) {
0711 *link_fd = fd;
0712 if (prog_id)
0713 *prog_id = link_info.prog_id;
0714 break;
0715 }
0716 }
0717 close(fd);
0718 }
0719
0720 return err;
0721 }
0722
0723 static bool xsk_probe_bpf_link(void)
0724 {
0725 LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
0726 struct bpf_insn insns[2] = {
0727 BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
0728 BPF_EXIT_INSN()
0729 };
0730 int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
0731 int ifindex_lo = 1;
0732 bool ret = false;
0733 int err;
0734
0735 err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
0736 if (err)
0737 return ret;
0738
0739 if (link_fd >= 0)
0740 return true;
0741
0742 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
0743 if (prog_fd < 0)
0744 return ret;
0745
0746 link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
0747 close(prog_fd);
0748
0749 if (link_fd >= 0) {
0750 ret = true;
0751 close(link_fd);
0752 }
0753
0754 return ret;
0755 }
0756
0757 static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
0758 {
0759 char ifname[IFNAMSIZ];
0760 struct xsk_ctx *ctx;
0761 char *interface;
0762
0763 ctx = calloc(1, sizeof(*ctx));
0764 if (!ctx)
0765 return -ENOMEM;
0766
0767 interface = if_indextoname(ifindex, &ifname[0]);
0768 if (!interface) {
0769 free(ctx);
0770 return -errno;
0771 }
0772
0773 ctx->ifindex = ifindex;
0774 libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
0775
0776 xsk->ctx = ctx;
0777 xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
0778
0779 return 0;
0780 }
0781
0782 static int xsk_init_xdp_res(struct xsk_socket *xsk,
0783 int *xsks_map_fd)
0784 {
0785 struct xsk_ctx *ctx = xsk->ctx;
0786 int err;
0787
0788 err = xsk_create_bpf_maps(xsk);
0789 if (err)
0790 return err;
0791
0792 err = xsk_load_xdp_prog(xsk);
0793 if (err)
0794 goto err_load_xdp_prog;
0795
0796 if (ctx->has_bpf_link)
0797 err = xsk_create_bpf_link(xsk);
0798 else
0799 err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
0800 xsk->config.xdp_flags, NULL);
0801
0802 if (err)
0803 goto err_attach_xdp_prog;
0804
0805 if (!xsk->rx)
0806 return err;
0807
0808 err = xsk_set_bpf_maps(xsk);
0809 if (err)
0810 goto err_set_bpf_maps;
0811
0812 return err;
0813
0814 err_set_bpf_maps:
0815 if (ctx->has_bpf_link)
0816 close(ctx->link_fd);
0817 else
0818 bpf_xdp_detach(ctx->ifindex, 0, NULL);
0819 err_attach_xdp_prog:
0820 close(ctx->prog_fd);
0821 err_load_xdp_prog:
0822 xsk_delete_bpf_maps(xsk);
0823 return err;
0824 }
0825
0826 static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
0827 {
0828 struct xsk_ctx *ctx = xsk->ctx;
0829 int err;
0830
0831 ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
0832 if (ctx->prog_fd < 0) {
0833 err = -errno;
0834 goto err_prog_fd;
0835 }
0836 err = xsk_lookup_bpf_maps(xsk);
0837 if (err)
0838 goto err_lookup_maps;
0839
0840 if (!xsk->rx)
0841 return err;
0842
0843 err = xsk_set_bpf_maps(xsk);
0844 if (err)
0845 goto err_set_maps;
0846
0847 return err;
0848
0849 err_set_maps:
0850 close(ctx->xsks_map_fd);
0851 err_lookup_maps:
0852 close(ctx->prog_fd);
0853 err_prog_fd:
0854 if (ctx->has_bpf_link)
0855 close(ctx->link_fd);
0856 return err;
0857 }
0858
0859 static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
0860 {
0861 struct xsk_socket *xsk = _xdp;
0862 struct xsk_ctx *ctx = xsk->ctx;
0863 __u32 prog_id = 0;
0864 int err;
0865
0866 if (ctx->has_bpf_link)
0867 err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
0868 else
0869 err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
0870
0871 if (err)
0872 return err;
0873
0874 err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
0875 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
0876
0877 if (!err && xsks_map_fd)
0878 *xsks_map_fd = ctx->xsks_map_fd;
0879
0880 return err;
0881 }
0882
0883 int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
0884 {
0885 return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
0886 }
0887
0888 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
0889 __u32 queue_id)
0890 {
0891 struct xsk_ctx *ctx;
0892
0893 if (list_empty(&umem->ctx_list))
0894 return NULL;
0895
0896 list_for_each_entry(ctx, &umem->ctx_list, list) {
0897 if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
0898 ctx->refcount++;
0899 return ctx;
0900 }
0901 }
0902
0903 return NULL;
0904 }
0905
0906 static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
0907 {
0908 struct xsk_umem *umem = ctx->umem;
0909 struct xdp_mmap_offsets off;
0910 int err;
0911
0912 if (--ctx->refcount)
0913 return;
0914
0915 if (!unmap)
0916 goto out_free;
0917
0918 err = xsk_get_mmap_offsets(umem->fd, &off);
0919 if (err)
0920 goto out_free;
0921
0922 munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
0923 sizeof(__u64));
0924 munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
0925 sizeof(__u64));
0926
0927 out_free:
0928 list_del(&ctx->list);
0929 free(ctx);
0930 }
0931
0932 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
0933 struct xsk_umem *umem, int ifindex,
0934 const char *ifname, __u32 queue_id,
0935 struct xsk_ring_prod *fill,
0936 struct xsk_ring_cons *comp)
0937 {
0938 struct xsk_ctx *ctx;
0939 int err;
0940
0941 ctx = calloc(1, sizeof(*ctx));
0942 if (!ctx)
0943 return NULL;
0944
0945 if (!umem->fill_save) {
0946 err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
0947 if (err) {
0948 free(ctx);
0949 return NULL;
0950 }
0951 } else if (umem->fill_save != fill || umem->comp_save != comp) {
0952
0953 memcpy(fill, umem->fill_save, sizeof(*fill));
0954 memcpy(comp, umem->comp_save, sizeof(*comp));
0955 }
0956
0957 ctx->ifindex = ifindex;
0958 ctx->refcount = 1;
0959 ctx->umem = umem;
0960 ctx->queue_id = queue_id;
0961 libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
0962
0963 ctx->fill = fill;
0964 ctx->comp = comp;
0965 list_add(&ctx->list, &umem->ctx_list);
0966 ctx->has_bpf_link = xsk_probe_bpf_link();
0967 return ctx;
0968 }
0969
0970 static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
0971 {
0972 free(xsk->ctx);
0973 free(xsk);
0974 }
0975
0976 int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
0977 {
0978 xsk->ctx->xsks_map_fd = fd;
0979 return xsk_set_bpf_maps(xsk);
0980 }
0981
0982 int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
0983 {
0984 struct xsk_socket *xsk;
0985 int res;
0986
0987 xsk = calloc(1, sizeof(*xsk));
0988 if (!xsk)
0989 return -ENOMEM;
0990
0991 res = xsk_create_xsk_struct(ifindex, xsk);
0992 if (res) {
0993 free(xsk);
0994 return -EINVAL;
0995 }
0996
0997 res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
0998
0999 xsk_destroy_xsk_struct(xsk);
1000
1001 return res;
1002 }
1003
1004 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
1005 const char *ifname,
1006 __u32 queue_id, struct xsk_umem *umem,
1007 struct xsk_ring_cons *rx,
1008 struct xsk_ring_prod *tx,
1009 struct xsk_ring_prod *fill,
1010 struct xsk_ring_cons *comp,
1011 const struct xsk_socket_config *usr_config)
1012 {
1013 bool unmap, rx_setup_done = false, tx_setup_done = false;
1014 void *rx_map = NULL, *tx_map = NULL;
1015 struct sockaddr_xdp sxdp = {};
1016 struct xdp_mmap_offsets off;
1017 struct xsk_socket *xsk;
1018 struct xsk_ctx *ctx;
1019 int err, ifindex;
1020
1021 if (!umem || !xsk_ptr || !(rx || tx))
1022 return -EFAULT;
1023
1024 unmap = umem->fill_save != fill;
1025
1026 xsk = calloc(1, sizeof(*xsk));
1027 if (!xsk)
1028 return -ENOMEM;
1029
1030 err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
1031 if (err)
1032 goto out_xsk_alloc;
1033
1034 xsk->outstanding_tx = 0;
1035 ifindex = if_nametoindex(ifname);
1036 if (!ifindex) {
1037 err = -errno;
1038 goto out_xsk_alloc;
1039 }
1040
1041 if (umem->refcount++ > 0) {
1042 xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
1043 if (xsk->fd < 0) {
1044 err = -errno;
1045 goto out_xsk_alloc;
1046 }
1047 } else {
1048 xsk->fd = umem->fd;
1049 rx_setup_done = umem->rx_ring_setup_done;
1050 tx_setup_done = umem->tx_ring_setup_done;
1051 }
1052
1053 ctx = xsk_get_ctx(umem, ifindex, queue_id);
1054 if (!ctx) {
1055 if (!fill || !comp) {
1056 err = -EFAULT;
1057 goto out_socket;
1058 }
1059
1060 ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
1061 fill, comp);
1062 if (!ctx) {
1063 err = -ENOMEM;
1064 goto out_socket;
1065 }
1066 }
1067 xsk->ctx = ctx;
1068
1069 if (rx && !rx_setup_done) {
1070 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
1071 &xsk->config.rx_size,
1072 sizeof(xsk->config.rx_size));
1073 if (err) {
1074 err = -errno;
1075 goto out_put_ctx;
1076 }
1077 if (xsk->fd == umem->fd)
1078 umem->rx_ring_setup_done = true;
1079 }
1080 if (tx && !tx_setup_done) {
1081 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
1082 &xsk->config.tx_size,
1083 sizeof(xsk->config.tx_size));
1084 if (err) {
1085 err = -errno;
1086 goto out_put_ctx;
1087 }
1088 if (xsk->fd == umem->fd)
1089 umem->tx_ring_setup_done = true;
1090 }
1091
1092 err = xsk_get_mmap_offsets(xsk->fd, &off);
1093 if (err) {
1094 err = -errno;
1095 goto out_put_ctx;
1096 }
1097
1098 if (rx) {
1099 rx_map = mmap(NULL, off.rx.desc +
1100 xsk->config.rx_size * sizeof(struct xdp_desc),
1101 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
1102 xsk->fd, XDP_PGOFF_RX_RING);
1103 if (rx_map == MAP_FAILED) {
1104 err = -errno;
1105 goto out_put_ctx;
1106 }
1107
1108 rx->mask = xsk->config.rx_size - 1;
1109 rx->size = xsk->config.rx_size;
1110 rx->producer = rx_map + off.rx.producer;
1111 rx->consumer = rx_map + off.rx.consumer;
1112 rx->flags = rx_map + off.rx.flags;
1113 rx->ring = rx_map + off.rx.desc;
1114 rx->cached_prod = *rx->producer;
1115 rx->cached_cons = *rx->consumer;
1116 }
1117 xsk->rx = rx;
1118
1119 if (tx) {
1120 tx_map = mmap(NULL, off.tx.desc +
1121 xsk->config.tx_size * sizeof(struct xdp_desc),
1122 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
1123 xsk->fd, XDP_PGOFF_TX_RING);
1124 if (tx_map == MAP_FAILED) {
1125 err = -errno;
1126 goto out_mmap_rx;
1127 }
1128
1129 tx->mask = xsk->config.tx_size - 1;
1130 tx->size = xsk->config.tx_size;
1131 tx->producer = tx_map + off.tx.producer;
1132 tx->consumer = tx_map + off.tx.consumer;
1133 tx->flags = tx_map + off.tx.flags;
1134 tx->ring = tx_map + off.tx.desc;
1135 tx->cached_prod = *tx->producer;
1136
1137
1138
1139 tx->cached_cons = *tx->consumer + xsk->config.tx_size;
1140 }
1141 xsk->tx = tx;
1142
1143 sxdp.sxdp_family = PF_XDP;
1144 sxdp.sxdp_ifindex = ctx->ifindex;
1145 sxdp.sxdp_queue_id = ctx->queue_id;
1146 if (umem->refcount > 1) {
1147 sxdp.sxdp_flags |= XDP_SHARED_UMEM;
1148 sxdp.sxdp_shared_umem_fd = umem->fd;
1149 } else {
1150 sxdp.sxdp_flags = xsk->config.bind_flags;
1151 }
1152
1153 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
1154 if (err) {
1155 err = -errno;
1156 goto out_mmap_tx;
1157 }
1158
1159 if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
1160 err = __xsk_setup_xdp_prog(xsk, NULL);
1161 if (err)
1162 goto out_mmap_tx;
1163 }
1164
1165 *xsk_ptr = xsk;
1166 umem->fill_save = NULL;
1167 umem->comp_save = NULL;
1168 return 0;
1169
1170 out_mmap_tx:
1171 if (tx)
1172 munmap(tx_map, off.tx.desc +
1173 xsk->config.tx_size * sizeof(struct xdp_desc));
1174 out_mmap_rx:
1175 if (rx)
1176 munmap(rx_map, off.rx.desc +
1177 xsk->config.rx_size * sizeof(struct xdp_desc));
1178 out_put_ctx:
1179 xsk_put_ctx(ctx, unmap);
1180 out_socket:
1181 if (--umem->refcount)
1182 close(xsk->fd);
1183 out_xsk_alloc:
1184 free(xsk);
1185 return err;
1186 }
1187
1188 int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
1189 __u32 queue_id, struct xsk_umem *umem,
1190 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
1191 const struct xsk_socket_config *usr_config)
1192 {
1193 if (!umem)
1194 return -EFAULT;
1195
1196 return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
1197 rx, tx, umem->fill_save,
1198 umem->comp_save, usr_config);
1199 }
1200
1201 int xsk_umem__delete(struct xsk_umem *umem)
1202 {
1203 struct xdp_mmap_offsets off;
1204 int err;
1205
1206 if (!umem)
1207 return 0;
1208
1209 if (umem->refcount)
1210 return -EBUSY;
1211
1212 err = xsk_get_mmap_offsets(umem->fd, &off);
1213 if (!err && umem->fill_save && umem->comp_save) {
1214 munmap(umem->fill_save->ring - off.fr.desc,
1215 off.fr.desc + umem->config.fill_size * sizeof(__u64));
1216 munmap(umem->comp_save->ring - off.cr.desc,
1217 off.cr.desc + umem->config.comp_size * sizeof(__u64));
1218 }
1219
1220 close(umem->fd);
1221 free(umem);
1222
1223 return 0;
1224 }
1225
1226 void xsk_socket__delete(struct xsk_socket *xsk)
1227 {
1228 size_t desc_sz = sizeof(struct xdp_desc);
1229 struct xdp_mmap_offsets off;
1230 struct xsk_umem *umem;
1231 struct xsk_ctx *ctx;
1232 int err;
1233
1234 if (!xsk)
1235 return;
1236
1237 ctx = xsk->ctx;
1238 umem = ctx->umem;
1239
1240 xsk_put_ctx(ctx, true);
1241
1242 if (!ctx->refcount) {
1243 xsk_delete_bpf_maps(xsk);
1244 close(ctx->prog_fd);
1245 if (ctx->has_bpf_link)
1246 close(ctx->link_fd);
1247 }
1248
1249 err = xsk_get_mmap_offsets(xsk->fd, &off);
1250 if (!err) {
1251 if (xsk->rx) {
1252 munmap(xsk->rx->ring - off.rx.desc,
1253 off.rx.desc + xsk->config.rx_size * desc_sz);
1254 }
1255 if (xsk->tx) {
1256 munmap(xsk->tx->ring - off.tx.desc,
1257 off.tx.desc + xsk->config.tx_size * desc_sz);
1258 }
1259 }
1260
1261 umem->refcount--;
1262
1263
1264
1265 if (xsk->fd != umem->fd)
1266 close(xsk->fd);
1267 free(xsk);
1268 }