Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2020 Facebook */
0003 #include <asm/barrier.h>
0004 #include <linux/perf_event.h>
0005 #include <linux/ring_buffer.h>
0006 #include <sys/epoll.h>
0007 #include <sys/mman.h>
0008 #include <argp.h>
0009 #include <stdlib.h>
0010 #include "bench.h"
0011 #include "ringbuf_bench.skel.h"
0012 #include "perfbuf_bench.skel.h"
0013 
0014 static struct {
0015     bool back2back;
0016     int batch_cnt;
0017     bool sampled;
0018     int sample_rate;
0019     int ringbuf_sz; /* per-ringbuf, in bytes */
0020     bool ringbuf_use_output; /* use slower output API */
0021     int perfbuf_sz; /* per-CPU size, in pages */
0022 } args = {
0023     .back2back = false,
0024     .batch_cnt = 500,
0025     .sampled = false,
0026     .sample_rate = 500,
0027     .ringbuf_sz = 512 * 1024,
0028     .ringbuf_use_output = false,
0029     .perfbuf_sz = 128,
0030 };
0031 
0032 enum {
0033     ARG_RB_BACK2BACK = 2000,
0034     ARG_RB_USE_OUTPUT = 2001,
0035     ARG_RB_BATCH_CNT = 2002,
0036     ARG_RB_SAMPLED = 2003,
0037     ARG_RB_SAMPLE_RATE = 2004,
0038 };
0039 
0040 static const struct argp_option opts[] = {
0041     { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
0042     { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
0043     { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
0044     { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
0045     { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
0046     {},
0047 };
0048 
0049 static error_t parse_arg(int key, char *arg, struct argp_state *state)
0050 {
0051     switch (key) {
0052     case ARG_RB_BACK2BACK:
0053         args.back2back = true;
0054         break;
0055     case ARG_RB_USE_OUTPUT:
0056         args.ringbuf_use_output = true;
0057         break;
0058     case ARG_RB_BATCH_CNT:
0059         args.batch_cnt = strtol(arg, NULL, 10);
0060         if (args.batch_cnt < 0) {
0061             fprintf(stderr, "Invalid batch count.");
0062             argp_usage(state);
0063         }
0064         break;
0065     case ARG_RB_SAMPLED:
0066         args.sampled = true;
0067         break;
0068     case ARG_RB_SAMPLE_RATE:
0069         args.sample_rate = strtol(arg, NULL, 10);
0070         if (args.sample_rate < 0) {
0071             fprintf(stderr, "Invalid perfbuf sample rate.");
0072             argp_usage(state);
0073         }
0074         break;
0075     default:
0076         return ARGP_ERR_UNKNOWN;
0077     }
0078     return 0;
0079 }
0080 
0081 /* exported into benchmark runner */
0082 const struct argp bench_ringbufs_argp = {
0083     .options = opts,
0084     .parser = parse_arg,
0085 };
0086 
0087 /* RINGBUF-LIBBPF benchmark */
0088 
0089 static struct counter buf_hits;
0090 
0091 static inline void bufs_trigger_batch(void)
0092 {
0093     (void)syscall(__NR_getpgid);
0094 }
0095 
0096 static void bufs_validate(void)
0097 {
0098     if (env.consumer_cnt != 1) {
0099         fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
0100         exit(1);
0101     }
0102 
0103     if (args.back2back && env.producer_cnt > 1) {
0104         fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
0105         exit(1);
0106     }
0107 }
0108 
0109 static void *bufs_sample_producer(void *input)
0110 {
0111     if (args.back2back) {
0112         /* initial batch to get everything started */
0113         bufs_trigger_batch();
0114         return NULL;
0115     }
0116 
0117     while (true)
0118         bufs_trigger_batch();
0119     return NULL;
0120 }
0121 
0122 static struct ringbuf_libbpf_ctx {
0123     struct ringbuf_bench *skel;
0124     struct ring_buffer *ringbuf;
0125 } ringbuf_libbpf_ctx;
0126 
0127 static void ringbuf_libbpf_measure(struct bench_res *res)
0128 {
0129     struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
0130 
0131     res->hits = atomic_swap(&buf_hits.value, 0);
0132     res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
0133 }
0134 
0135 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
0136 {
0137     struct ringbuf_bench *skel;
0138 
0139     setup_libbpf();
0140 
0141     skel = ringbuf_bench__open();
0142     if (!skel) {
0143         fprintf(stderr, "failed to open skeleton\n");
0144         exit(1);
0145     }
0146 
0147     skel->rodata->batch_cnt = args.batch_cnt;
0148     skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
0149 
0150     if (args.sampled)
0151         /* record data + header take 16 bytes */
0152         skel->rodata->wakeup_data_size = args.sample_rate * 16;
0153 
0154     bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
0155 
0156     if (ringbuf_bench__load(skel)) {
0157         fprintf(stderr, "failed to load skeleton\n");
0158         exit(1);
0159     }
0160 
0161     return skel;
0162 }
0163 
0164 static int buf_process_sample(void *ctx, void *data, size_t len)
0165 {
0166     atomic_inc(&buf_hits.value);
0167     return 0;
0168 }
0169 
0170 static void ringbuf_libbpf_setup(void)
0171 {
0172     struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
0173     struct bpf_link *link;
0174 
0175     ctx->skel = ringbuf_setup_skeleton();
0176     ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
0177                     buf_process_sample, NULL, NULL);
0178     if (!ctx->ringbuf) {
0179         fprintf(stderr, "failed to create ringbuf\n");
0180         exit(1);
0181     }
0182 
0183     link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
0184     if (!link) {
0185         fprintf(stderr, "failed to attach program!\n");
0186         exit(1);
0187     }
0188 }
0189 
0190 static void *ringbuf_libbpf_consumer(void *input)
0191 {
0192     struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
0193 
0194     while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
0195         if (args.back2back)
0196             bufs_trigger_batch();
0197     }
0198     fprintf(stderr, "ringbuf polling failed!\n");
0199     return NULL;
0200 }
0201 
0202 /* RINGBUF-CUSTOM benchmark */
0203 struct ringbuf_custom {
0204     __u64 *consumer_pos;
0205     __u64 *producer_pos;
0206     __u64 mask;
0207     void *data;
0208     int map_fd;
0209 };
0210 
0211 static struct ringbuf_custom_ctx {
0212     struct ringbuf_bench *skel;
0213     struct ringbuf_custom ringbuf;
0214     int epoll_fd;
0215     struct epoll_event event;
0216 } ringbuf_custom_ctx;
0217 
0218 static void ringbuf_custom_measure(struct bench_res *res)
0219 {
0220     struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
0221 
0222     res->hits = atomic_swap(&buf_hits.value, 0);
0223     res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
0224 }
0225 
0226 static void ringbuf_custom_setup(void)
0227 {
0228     struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
0229     const size_t page_size = getpagesize();
0230     struct bpf_link *link;
0231     struct ringbuf_custom *r;
0232     void *tmp;
0233     int err;
0234 
0235     ctx->skel = ringbuf_setup_skeleton();
0236 
0237     ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
0238     if (ctx->epoll_fd < 0) {
0239         fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
0240         exit(1);
0241     }
0242 
0243     r = &ctx->ringbuf;
0244     r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
0245     r->mask = args.ringbuf_sz - 1;
0246 
0247     /* Map writable consumer page */
0248     tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
0249            r->map_fd, 0);
0250     if (tmp == MAP_FAILED) {
0251         fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
0252         exit(1);
0253     }
0254     r->consumer_pos = tmp;
0255 
0256     /* Map read-only producer page and data pages. */
0257     tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
0258            r->map_fd, page_size);
0259     if (tmp == MAP_FAILED) {
0260         fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
0261         exit(1);
0262     }
0263     r->producer_pos = tmp;
0264     r->data = tmp + page_size;
0265 
0266     ctx->event.events = EPOLLIN;
0267     err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
0268     if (err < 0) {
0269         fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
0270         exit(1);
0271     }
0272 
0273     link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
0274     if (!link) {
0275         fprintf(stderr, "failed to attach program\n");
0276         exit(1);
0277     }
0278 }
0279 
0280 #define RINGBUF_BUSY_BIT (1 << 31)
0281 #define RINGBUF_DISCARD_BIT (1 << 30)
0282 #define RINGBUF_META_LEN 8
0283 
0284 static inline int roundup_len(__u32 len)
0285 {
0286     /* clear out top 2 bits */
0287     len <<= 2;
0288     len >>= 2;
0289     /* add length prefix */
0290     len += RINGBUF_META_LEN;
0291     /* round up to 8 byte alignment */
0292     return (len + 7) / 8 * 8;
0293 }
0294 
0295 static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
0296 {
0297     unsigned long cons_pos, prod_pos;
0298     int *len_ptr, len;
0299     bool got_new_data;
0300 
0301     cons_pos = smp_load_acquire(r->consumer_pos);
0302     while (true) {
0303         got_new_data = false;
0304         prod_pos = smp_load_acquire(r->producer_pos);
0305         while (cons_pos < prod_pos) {
0306             len_ptr = r->data + (cons_pos & r->mask);
0307             len = smp_load_acquire(len_ptr);
0308 
0309             /* sample not committed yet, bail out for now */
0310             if (len & RINGBUF_BUSY_BIT)
0311                 return;
0312 
0313             got_new_data = true;
0314             cons_pos += roundup_len(len);
0315 
0316             atomic_inc(&buf_hits.value);
0317         }
0318         if (got_new_data)
0319             smp_store_release(r->consumer_pos, cons_pos);
0320         else
0321             break;
0322     }
0323 }
0324 
0325 static void *ringbuf_custom_consumer(void *input)
0326 {
0327     struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
0328     int cnt;
0329 
0330     do {
0331         if (args.back2back)
0332             bufs_trigger_batch();
0333         cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
0334         if (cnt > 0)
0335             ringbuf_custom_process_ring(&ctx->ringbuf);
0336     } while (cnt >= 0);
0337     fprintf(stderr, "ringbuf polling failed!\n");
0338     return 0;
0339 }
0340 
0341 /* PERFBUF-LIBBPF benchmark */
0342 static struct perfbuf_libbpf_ctx {
0343     struct perfbuf_bench *skel;
0344     struct perf_buffer *perfbuf;
0345 } perfbuf_libbpf_ctx;
0346 
0347 static void perfbuf_measure(struct bench_res *res)
0348 {
0349     struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
0350 
0351     res->hits = atomic_swap(&buf_hits.value, 0);
0352     res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
0353 }
0354 
0355 static struct perfbuf_bench *perfbuf_setup_skeleton(void)
0356 {
0357     struct perfbuf_bench *skel;
0358 
0359     setup_libbpf();
0360 
0361     skel = perfbuf_bench__open();
0362     if (!skel) {
0363         fprintf(stderr, "failed to open skeleton\n");
0364         exit(1);
0365     }
0366 
0367     skel->rodata->batch_cnt = args.batch_cnt;
0368 
0369     if (perfbuf_bench__load(skel)) {
0370         fprintf(stderr, "failed to load skeleton\n");
0371         exit(1);
0372     }
0373 
0374     return skel;
0375 }
0376 
0377 static enum bpf_perf_event_ret
0378 perfbuf_process_sample_raw(void *input_ctx, int cpu,
0379                struct perf_event_header *e)
0380 {
0381     switch (e->type) {
0382     case PERF_RECORD_SAMPLE:
0383         atomic_inc(&buf_hits.value);
0384         break;
0385     case PERF_RECORD_LOST:
0386         break;
0387     default:
0388         return LIBBPF_PERF_EVENT_ERROR;
0389     }
0390     return LIBBPF_PERF_EVENT_CONT;
0391 }
0392 
0393 static void perfbuf_libbpf_setup(void)
0394 {
0395     struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
0396     struct perf_event_attr attr;
0397     struct bpf_link *link;
0398 
0399     ctx->skel = perfbuf_setup_skeleton();
0400 
0401     memset(&attr, 0, sizeof(attr));
0402     attr.config = PERF_COUNT_SW_BPF_OUTPUT,
0403     attr.type = PERF_TYPE_SOFTWARE;
0404     attr.sample_type = PERF_SAMPLE_RAW;
0405     /* notify only every Nth sample */
0406     if (args.sampled) {
0407         attr.sample_period = args.sample_rate;
0408         attr.wakeup_events = args.sample_rate;
0409     } else {
0410         attr.sample_period = 1;
0411         attr.wakeup_events = 1;
0412     }
0413 
0414     if (args.sample_rate > args.batch_cnt) {
0415         fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
0416             args.sample_rate, args.batch_cnt);
0417         exit(1);
0418     }
0419 
0420     ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
0421                         args.perfbuf_sz, &attr,
0422                         perfbuf_process_sample_raw, NULL, NULL);
0423     if (!ctx->perfbuf) {
0424         fprintf(stderr, "failed to create perfbuf\n");
0425         exit(1);
0426     }
0427 
0428     link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
0429     if (!link) {
0430         fprintf(stderr, "failed to attach program\n");
0431         exit(1);
0432     }
0433 }
0434 
0435 static void *perfbuf_libbpf_consumer(void *input)
0436 {
0437     struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
0438 
0439     while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
0440         if (args.back2back)
0441             bufs_trigger_batch();
0442     }
0443     fprintf(stderr, "perfbuf polling failed!\n");
0444     return NULL;
0445 }
0446 
0447 /* PERFBUF-CUSTOM benchmark */
0448 
0449 /* copies of internal libbpf definitions */
0450 struct perf_cpu_buf {
0451     struct perf_buffer *pb;
0452     void *base; /* mmap()'ed memory */
0453     void *buf; /* for reconstructing segmented data */
0454     size_t buf_size;
0455     int fd;
0456     int cpu;
0457     int map_key;
0458 };
0459 
0460 struct perf_buffer {
0461     perf_buffer_event_fn event_cb;
0462     perf_buffer_sample_fn sample_cb;
0463     perf_buffer_lost_fn lost_cb;
0464     void *ctx; /* passed into callbacks */
0465 
0466     size_t page_size;
0467     size_t mmap_size;
0468     struct perf_cpu_buf **cpu_bufs;
0469     struct epoll_event *events;
0470     int cpu_cnt; /* number of allocated CPU buffers */
0471     int epoll_fd; /* perf event FD */
0472     int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
0473 };
0474 
0475 static void *perfbuf_custom_consumer(void *input)
0476 {
0477     struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
0478     struct perf_buffer *pb = ctx->perfbuf;
0479     struct perf_cpu_buf *cpu_buf;
0480     struct perf_event_mmap_page *header;
0481     size_t mmap_mask = pb->mmap_size - 1;
0482     struct perf_event_header *ehdr;
0483     __u64 data_head, data_tail;
0484     size_t ehdr_size;
0485     void *base;
0486     int i, cnt;
0487 
0488     while (true) {
0489         if (args.back2back)
0490             bufs_trigger_batch();
0491         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
0492         if (cnt <= 0) {
0493             fprintf(stderr, "perf epoll failed: %d\n", -errno);
0494             exit(1);
0495         }
0496 
0497         for (i = 0; i < cnt; ++i) {
0498             cpu_buf = pb->events[i].data.ptr;
0499             header = cpu_buf->base;
0500             base = ((void *)header) + pb->page_size;
0501 
0502             data_head = ring_buffer_read_head(header);
0503             data_tail = header->data_tail;
0504             while (data_head != data_tail) {
0505                 ehdr = base + (data_tail & mmap_mask);
0506                 ehdr_size = ehdr->size;
0507 
0508                 if (ehdr->type == PERF_RECORD_SAMPLE)
0509                     atomic_inc(&buf_hits.value);
0510 
0511                 data_tail += ehdr_size;
0512             }
0513             ring_buffer_write_tail(header, data_tail);
0514         }
0515     }
0516     return NULL;
0517 }
0518 
0519 const struct bench bench_rb_libbpf = {
0520     .name = "rb-libbpf",
0521     .validate = bufs_validate,
0522     .setup = ringbuf_libbpf_setup,
0523     .producer_thread = bufs_sample_producer,
0524     .consumer_thread = ringbuf_libbpf_consumer,
0525     .measure = ringbuf_libbpf_measure,
0526     .report_progress = hits_drops_report_progress,
0527     .report_final = hits_drops_report_final,
0528 };
0529 
0530 const struct bench bench_rb_custom = {
0531     .name = "rb-custom",
0532     .validate = bufs_validate,
0533     .setup = ringbuf_custom_setup,
0534     .producer_thread = bufs_sample_producer,
0535     .consumer_thread = ringbuf_custom_consumer,
0536     .measure = ringbuf_custom_measure,
0537     .report_progress = hits_drops_report_progress,
0538     .report_final = hits_drops_report_final,
0539 };
0540 
0541 const struct bench bench_pb_libbpf = {
0542     .name = "pb-libbpf",
0543     .validate = bufs_validate,
0544     .setup = perfbuf_libbpf_setup,
0545     .producer_thread = bufs_sample_producer,
0546     .consumer_thread = perfbuf_libbpf_consumer,
0547     .measure = perfbuf_measure,
0548     .report_progress = hits_drops_report_progress,
0549     .report_final = hits_drops_report_final,
0550 };
0551 
0552 const struct bench bench_pb_custom = {
0553     .name = "pb-custom",
0554     .validate = bufs_validate,
0555     .setup = perfbuf_libbpf_setup,
0556     .producer_thread = bufs_sample_producer,
0557     .consumer_thread = perfbuf_custom_consumer,
0558     .measure = perfbuf_measure,
0559     .report_progress = hits_drops_report_progress,
0560     .report_final = hits_drops_report_final,
0561 };
0562