0001
0002 #ifdef HAVE_EVENTFD_SUPPORT
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 #include <string.h>
0065 #include <pthread.h>
0066 #include <unistd.h>
0067
0068 #include <errno.h>
0069 #include <inttypes.h>
0070 #include <signal.h>
0071 #include <stdlib.h>
0072 #include <linux/compiler.h>
0073 #include <linux/kernel.h>
0074 #include <sys/time.h>
0075 #include <sys/resource.h>
0076 #include <sys/epoll.h>
0077 #include <sys/eventfd.h>
0078 #include <sys/types.h>
0079 #include <perf/cpumap.h>
0080
0081 #include "../util/stat.h"
0082 #include <subcmd/parse-options.h>
0083 #include "bench.h"
0084
0085 #include <err.h>
0086
0087 #define printinfo(fmt, arg...) \
0088 do { if (__verbose) { printf(fmt, ## arg); fflush(stdout); } } while (0)
0089
0090 static unsigned int nthreads = 0;
0091 static unsigned int nsecs = 8;
0092 static bool wdone, done, __verbose, randomize, nonblocking;
0093
0094
0095
0096
0097
0098
0099 #define EPOLL_MAXNESTS 4
0100
0101 static int epollfd;
0102 static int *epollfdp;
0103 static bool noaffinity;
0104 static unsigned int nested = 0;
0105 static bool et;
0106 static bool oneshot;
0107 static bool multiq;
0108
0109
0110 static unsigned int nfds = 64;
0111
0112 static pthread_mutex_t thread_lock;
0113 static unsigned int threads_starting;
0114 static struct stats throughput_stats;
0115 static pthread_cond_t thread_parent, thread_worker;
0116
0117 struct worker {
0118 int tid;
0119 int epollfd;
0120 pthread_t thread;
0121 unsigned long ops;
0122 int *fdmap;
0123 };
0124
0125 static const struct option options[] = {
0126
0127 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
0128 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
0129 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"),
0130 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"),
0131 OPT_BOOLEAN('R', "randomize", &randomize, "Enable random write behaviour (default is lineal)"),
0132 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"),
0133
0134
0135 OPT_BOOLEAN( 'm', "multiq", &multiq, "Use multiple epoll instances (one per thread)"),
0136 OPT_BOOLEAN( 'B', "nonblocking", &nonblocking, "Nonblocking epoll_wait(2) behaviour"),
0137 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"),
0138 OPT_BOOLEAN( 'S', "oneshot", &oneshot, "Use EPOLLONESHOT semantics"),
0139 OPT_BOOLEAN( 'E', "edge", &et, "Use Edge-triggered interface (default is LT)"),
0140
0141 OPT_END()
0142 };
0143
0144 static const char * const bench_epoll_wait_usage[] = {
0145 "perf bench epoll wait <options>",
0146 NULL
0147 };
0148
0149
0150
0151
0152
0153
0154
0155
0156 static void shuffle(void *array, size_t n, size_t size)
0157 {
0158 char *carray = array;
0159 void *aux;
0160 size_t i;
0161
0162 if (n <= 1)
0163 return;
0164
0165 aux = calloc(1, size);
0166 if (!aux)
0167 err(EXIT_FAILURE, "calloc");
0168
0169 for (i = 1; i < n; ++i) {
0170 size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
0171 j *= size;
0172
0173 memcpy(aux, &carray[j], size);
0174 memcpy(&carray[j], &carray[i*size], size);
0175 memcpy(&carray[i*size], aux, size);
0176 }
0177
0178 free(aux);
0179 }
0180
0181
0182 static void *workerfn(void *arg)
0183 {
0184 int fd, ret, r;
0185 struct worker *w = (struct worker *) arg;
0186 unsigned long ops = w->ops;
0187 struct epoll_event ev;
0188 uint64_t val;
0189 int to = nonblocking? 0 : -1;
0190 int efd = multiq ? w->epollfd : epollfd;
0191
0192 pthread_mutex_lock(&thread_lock);
0193 threads_starting--;
0194 if (!threads_starting)
0195 pthread_cond_signal(&thread_parent);
0196 pthread_cond_wait(&thread_worker, &thread_lock);
0197 pthread_mutex_unlock(&thread_lock);
0198
0199 do {
0200
0201
0202
0203
0204
0205
0206 do {
0207 ret = epoll_wait(efd, &ev, 1, to);
0208 } while (ret < 0 && errno == EINTR);
0209 if (ret < 0)
0210 err(EXIT_FAILURE, "epoll_wait");
0211
0212 fd = ev.data.fd;
0213
0214 do {
0215 r = read(fd, &val, sizeof(val));
0216 } while (!done && (r < 0 && errno == EAGAIN));
0217
0218 if (et) {
0219 ev.events = EPOLLIN | EPOLLET;
0220 ret = epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev);
0221 }
0222
0223 if (oneshot) {
0224
0225 ev.events |= EPOLLIN | EPOLLONESHOT;
0226 ret = epoll_ctl(efd, EPOLL_CTL_MOD, fd, &ev);
0227 }
0228
0229 ops++;
0230 } while (!done);
0231
0232 if (multiq)
0233 close(w->epollfd);
0234
0235 w->ops = ops;
0236 return NULL;
0237 }
0238
0239 static void nest_epollfd(struct worker *w)
0240 {
0241 unsigned int i;
0242 struct epoll_event ev;
0243 int efd = multiq ? w->epollfd : epollfd;
0244
0245 if (nested > EPOLL_MAXNESTS)
0246 nested = EPOLL_MAXNESTS;
0247
0248 epollfdp = calloc(nested, sizeof(*epollfdp));
0249 if (!epollfdp)
0250 err(EXIT_FAILURE, "calloc");
0251
0252 for (i = 0; i < nested; i++) {
0253 epollfdp[i] = epoll_create(1);
0254 if (epollfdp[i] < 0)
0255 err(EXIT_FAILURE, "epoll_create");
0256 }
0257
0258 ev.events = EPOLLHUP;
0259 ev.data.u64 = i;
0260
0261 for (i = nested - 1; i; i--) {
0262 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD,
0263 epollfdp[i], &ev) < 0)
0264 err(EXIT_FAILURE, "epoll_ctl");
0265 }
0266
0267 if (epoll_ctl(efd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0)
0268 err(EXIT_FAILURE, "epoll_ctl");
0269 }
0270
0271 static void toggle_done(int sig __maybe_unused,
0272 siginfo_t *info __maybe_unused,
0273 void *uc __maybe_unused)
0274 {
0275
0276 done = true;
0277 gettimeofday(&bench__end, NULL);
0278 timersub(&bench__end, &bench__start, &bench__runtime);
0279 }
0280
0281 static void print_summary(void)
0282 {
0283 unsigned long avg = avg_stats(&throughput_stats);
0284 double stddev = stddev_stats(&throughput_stats);
0285
0286 printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
0287 avg, rel_stddev_stats(stddev, avg),
0288 (int)bench__runtime.tv_sec);
0289 }
0290
0291 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
0292 {
0293 pthread_attr_t thread_attr, *attrp = NULL;
0294 cpu_set_t *cpuset;
0295 unsigned int i, j;
0296 int ret = 0, events = EPOLLIN;
0297 int nrcpus;
0298 size_t size;
0299
0300 if (oneshot)
0301 events |= EPOLLONESHOT;
0302 if (et)
0303 events |= EPOLLET;
0304
0305 printinfo("starting worker/consumer %sthreads%s\n",
0306 noaffinity ? "":"CPU affinity ",
0307 nonblocking ? " (nonblocking)":"");
0308 if (!noaffinity)
0309 pthread_attr_init(&thread_attr);
0310
0311 nrcpus = perf_cpu_map__nr(cpu);
0312 cpuset = CPU_ALLOC(nrcpus);
0313 BUG_ON(!cpuset);
0314 size = CPU_ALLOC_SIZE(nrcpus);
0315
0316 for (i = 0; i < nthreads; i++) {
0317 struct worker *w = &worker[i];
0318
0319 if (multiq) {
0320 w->epollfd = epoll_create(1);
0321 if (w->epollfd < 0)
0322 err(EXIT_FAILURE, "epoll_create");
0323
0324 if (nested)
0325 nest_epollfd(w);
0326 }
0327
0328 w->tid = i;
0329 w->fdmap = calloc(nfds, sizeof(int));
0330 if (!w->fdmap)
0331 return 1;
0332
0333 for (j = 0; j < nfds; j++) {
0334 int efd = multiq ? w->epollfd : epollfd;
0335 struct epoll_event ev;
0336
0337 w->fdmap[j] = eventfd(0, EFD_NONBLOCK);
0338 if (w->fdmap[j] < 0)
0339 err(EXIT_FAILURE, "eventfd");
0340
0341 ev.data.fd = w->fdmap[j];
0342 ev.events = events;
0343
0344 ret = epoll_ctl(efd, EPOLL_CTL_ADD,
0345 w->fdmap[j], &ev);
0346 if (ret < 0)
0347 err(EXIT_FAILURE, "epoll_ctl");
0348 }
0349
0350 if (!noaffinity) {
0351 CPU_ZERO_S(size, cpuset);
0352 CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
0353 size, cpuset);
0354
0355 ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
0356 if (ret) {
0357 CPU_FREE(cpuset);
0358 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
0359 }
0360
0361 attrp = &thread_attr;
0362 }
0363
0364 ret = pthread_create(&w->thread, attrp, workerfn,
0365 (void *)(struct worker *) w);
0366 if (ret) {
0367 CPU_FREE(cpuset);
0368 err(EXIT_FAILURE, "pthread_create");
0369 }
0370 }
0371
0372 CPU_FREE(cpuset);
0373 if (!noaffinity)
0374 pthread_attr_destroy(&thread_attr);
0375
0376 return ret;
0377 }
0378
0379 static void *writerfn(void *p)
0380 {
0381 struct worker *worker = p;
0382 size_t i, j, iter;
0383 const uint64_t val = 1;
0384 ssize_t sz;
0385 struct timespec ts = { .tv_sec = 0,
0386 .tv_nsec = 500 };
0387
0388 printinfo("starting writer-thread: doing %s writes ...\n",
0389 randomize? "random":"lineal");
0390
0391 for (iter = 0; !wdone; iter++) {
0392 if (randomize) {
0393 shuffle((void *)worker, nthreads, sizeof(*worker));
0394 }
0395
0396 for (i = 0; i < nthreads; i++) {
0397 struct worker *w = &worker[i];
0398
0399 if (randomize) {
0400 shuffle((void *)w->fdmap, nfds, sizeof(int));
0401 }
0402
0403 for (j = 0; j < nfds; j++) {
0404 do {
0405 sz = write(w->fdmap[j], &val, sizeof(val));
0406 } while (!wdone && (sz < 0 && errno == EAGAIN));
0407 }
0408 }
0409
0410 nanosleep(&ts, NULL);
0411 }
0412
0413 printinfo("exiting writer-thread (total full-loops: %zd)\n", iter);
0414 return NULL;
0415 }
0416
0417 static int cmpworker(const void *p1, const void *p2)
0418 {
0419
0420 struct worker *w1 = (struct worker *) p1;
0421 struct worker *w2 = (struct worker *) p2;
0422 return w1->tid > w2->tid;
0423 }
0424
0425 int bench_epoll_wait(int argc, const char **argv)
0426 {
0427 int ret = 0;
0428 struct sigaction act;
0429 unsigned int i;
0430 struct worker *worker = NULL;
0431 struct perf_cpu_map *cpu;
0432 pthread_t wthread;
0433 struct rlimit rl, prevrl;
0434
0435 argc = parse_options(argc, argv, options, bench_epoll_wait_usage, 0);
0436 if (argc) {
0437 usage_with_options(bench_epoll_wait_usage, options);
0438 exit(EXIT_FAILURE);
0439 }
0440
0441 memset(&act, 0, sizeof(act));
0442 sigfillset(&act.sa_mask);
0443 act.sa_sigaction = toggle_done;
0444 sigaction(SIGINT, &act, NULL);
0445
0446 cpu = perf_cpu_map__new(NULL);
0447 if (!cpu)
0448 goto errmem;
0449
0450
0451 if (!multiq) {
0452 epollfd = epoll_create(1);
0453 if (epollfd < 0)
0454 err(EXIT_FAILURE, "epoll_create");
0455
0456
0457
0458
0459 if (nested)
0460 nest_epollfd(NULL);
0461 }
0462
0463 printinfo("Using %s queue model\n", multiq ? "multi" : "single");
0464 printinfo("Nesting level(s): %d\n", nested);
0465
0466
0467 if (!nthreads)
0468 nthreads = perf_cpu_map__nr(cpu) - 1;
0469
0470 worker = calloc(nthreads, sizeof(*worker));
0471 if (!worker) {
0472 goto errmem;
0473 }
0474
0475 if (getrlimit(RLIMIT_NOFILE, &prevrl))
0476 err(EXIT_FAILURE, "getrlimit");
0477 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50;
0478 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n",
0479 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max);
0480 if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
0481 err(EXIT_FAILURE, "setrlimit");
0482
0483 printf("Run summary [PID %d]: %d threads monitoring%s on "
0484 "%d file-descriptors for %d secs.\n\n",
0485 getpid(), nthreads, oneshot ? " (EPOLLONESHOT semantics)": "", nfds, nsecs);
0486
0487 init_stats(&throughput_stats);
0488 pthread_mutex_init(&thread_lock, NULL);
0489 pthread_cond_init(&thread_parent, NULL);
0490 pthread_cond_init(&thread_worker, NULL);
0491
0492 threads_starting = nthreads;
0493
0494 gettimeofday(&bench__start, NULL);
0495
0496 do_threads(worker, cpu);
0497
0498 pthread_mutex_lock(&thread_lock);
0499 while (threads_starting)
0500 pthread_cond_wait(&thread_parent, &thread_lock);
0501 pthread_cond_broadcast(&thread_worker);
0502 pthread_mutex_unlock(&thread_lock);
0503
0504
0505
0506
0507
0508
0509 ret = pthread_create(&wthread, NULL, writerfn,
0510 (void *)(struct worker *) worker);
0511 if (ret)
0512 err(EXIT_FAILURE, "pthread_create");
0513
0514 sleep(nsecs);
0515 toggle_done(0, NULL, NULL);
0516 printinfo("main thread: toggling done\n");
0517
0518 sleep(1);
0519 wdone = true;
0520 ret = pthread_join(wthread, NULL);
0521 if (ret)
0522 err(EXIT_FAILURE, "pthread_join");
0523
0524
0525 pthread_cond_destroy(&thread_parent);
0526 pthread_cond_destroy(&thread_worker);
0527 pthread_mutex_destroy(&thread_lock);
0528
0529
0530 if (randomize)
0531 qsort(worker, nthreads, sizeof(struct worker), cmpworker);
0532
0533 for (i = 0; i < nthreads; i++) {
0534 unsigned long t = bench__runtime.tv_sec > 0 ?
0535 worker[i].ops / bench__runtime.tv_sec : 0;
0536
0537 update_stats(&throughput_stats, t);
0538
0539 if (nfds == 1)
0540 printf("[thread %2d] fdmap: %p [ %04ld ops/sec ]\n",
0541 worker[i].tid, &worker[i].fdmap[0], t);
0542 else
0543 printf("[thread %2d] fdmap: %p ... %p [ %04ld ops/sec ]\n",
0544 worker[i].tid, &worker[i].fdmap[0],
0545 &worker[i].fdmap[nfds-1], t);
0546 }
0547
0548 print_summary();
0549
0550 close(epollfd);
0551 return ret;
0552 errmem:
0553 err(EXIT_FAILURE, "calloc");
0554 }
0555 #endif