0001
0002
0003
0004
0005
0006
0007
0008 #ifdef HAVE_EVENTFD_SUPPORT
0009
0010 #include <string.h>
0011 #include <pthread.h>
0012
0013 #include <errno.h>
0014 #include <inttypes.h>
0015 #include <signal.h>
0016 #include <stdlib.h>
0017 #include <unistd.h>
0018 #include <linux/compiler.h>
0019 #include <linux/kernel.h>
0020 #include <sys/time.h>
0021 #include <sys/resource.h>
0022 #include <sys/epoll.h>
0023 #include <sys/eventfd.h>
0024 #include <perf/cpumap.h>
0025
0026 #include "../util/stat.h"
0027 #include <subcmd/parse-options.h>
0028 #include "bench.h"
0029
0030 #include <err.h>
0031
0032 #define printinfo(fmt, arg...) \
0033 do { if (__verbose) printf(fmt, ## arg); } while (0)
0034
0035 static unsigned int nthreads = 0;
0036 static unsigned int nsecs = 8;
0037 static bool done, __verbose, randomize;
0038
0039
0040
0041
0042
0043
0044 #define EPOLL_MAXNESTS 4
0045
0046 enum {
0047 OP_EPOLL_ADD,
0048 OP_EPOLL_MOD,
0049 OP_EPOLL_DEL,
0050 EPOLL_NR_OPS,
0051 };
0052
0053 static int epollfd;
0054 static int *epollfdp;
0055 static bool noaffinity;
0056 static unsigned int nested = 0;
0057
0058
0059 static unsigned int nfds = 64;
0060
0061 static pthread_mutex_t thread_lock;
0062 static unsigned int threads_starting;
0063 static struct stats all_stats[EPOLL_NR_OPS];
0064 static pthread_cond_t thread_parent, thread_worker;
0065
0066 struct worker {
0067 int tid;
0068 pthread_t thread;
0069 unsigned long ops[EPOLL_NR_OPS];
0070 int *fdmap;
0071 };
0072
0073 static const struct option options[] = {
0074 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
0075 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
0076 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"),
0077 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"),
0078 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"),
0079 OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"),
0080 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"),
0081 OPT_END()
0082 };
0083
0084 static const char * const bench_epoll_ctl_usage[] = {
0085 "perf bench epoll ctl <options>",
0086 NULL
0087 };
0088
0089 static void toggle_done(int sig __maybe_unused,
0090 siginfo_t *info __maybe_unused,
0091 void *uc __maybe_unused)
0092 {
0093
0094 done = true;
0095 gettimeofday(&bench__end, NULL);
0096 timersub(&bench__end, &bench__start, &bench__runtime);
0097 }
0098
0099 static void nest_epollfd(void)
0100 {
0101 unsigned int i;
0102 struct epoll_event ev;
0103
0104 if (nested > EPOLL_MAXNESTS)
0105 nested = EPOLL_MAXNESTS;
0106 printinfo("Nesting level(s): %d\n", nested);
0107
0108 epollfdp = calloc(nested, sizeof(int));
0109 if (!epollfdp)
0110 err(EXIT_FAILURE, "calloc");
0111
0112 for (i = 0; i < nested; i++) {
0113 epollfdp[i] = epoll_create(1);
0114 if (epollfd < 0)
0115 err(EXIT_FAILURE, "epoll_create");
0116 }
0117
0118 ev.events = EPOLLHUP;
0119 ev.data.u64 = i;
0120
0121 for (i = nested - 1; i; i--) {
0122 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD,
0123 epollfdp[i], &ev) < 0)
0124 err(EXIT_FAILURE, "epoll_ctl");
0125 }
0126
0127 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0)
0128 err(EXIT_FAILURE, "epoll_ctl");
0129 }
0130
0131 static inline void do_epoll_op(struct worker *w, int op, int fd)
0132 {
0133 int error;
0134 struct epoll_event ev;
0135
0136 ev.events = EPOLLIN;
0137 ev.data.u64 = fd;
0138
0139 switch (op) {
0140 case OP_EPOLL_ADD:
0141 error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
0142 break;
0143 case OP_EPOLL_MOD:
0144 ev.events = EPOLLOUT;
0145 error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev);
0146 break;
0147 case OP_EPOLL_DEL:
0148 error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
0149 break;
0150 default:
0151 error = 1;
0152 break;
0153 }
0154
0155 if (!error)
0156 w->ops[op]++;
0157 }
0158
0159 static inline void do_random_epoll_op(struct worker *w)
0160 {
0161 unsigned long rnd1 = random(), rnd2 = random();
0162 int op, fd;
0163
0164 fd = w->fdmap[rnd1 % nfds];
0165 op = rnd2 % EPOLL_NR_OPS;
0166
0167 do_epoll_op(w, op, fd);
0168 }
0169
0170 static void *workerfn(void *arg)
0171 {
0172 unsigned int i;
0173 struct worker *w = (struct worker *) arg;
0174 struct timespec ts = { .tv_sec = 0,
0175 .tv_nsec = 250 };
0176
0177 pthread_mutex_lock(&thread_lock);
0178 threads_starting--;
0179 if (!threads_starting)
0180 pthread_cond_signal(&thread_parent);
0181 pthread_cond_wait(&thread_worker, &thread_lock);
0182 pthread_mutex_unlock(&thread_lock);
0183
0184
0185 do {
0186
0187 if (randomize) {
0188 do_random_epoll_op(w);
0189 } else {
0190 for (i = 0; i < nfds; i++) {
0191 do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]);
0192 do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]);
0193 do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]);
0194 }
0195 }
0196
0197 nanosleep(&ts, NULL);
0198 } while (!done);
0199
0200 return NULL;
0201 }
0202
0203 static void init_fdmaps(struct worker *w, int pct)
0204 {
0205 unsigned int i;
0206 int inc;
0207 struct epoll_event ev;
0208
0209 if (!pct)
0210 return;
0211
0212 inc = 100/pct;
0213 for (i = 0; i < nfds; i+=inc) {
0214 ev.data.fd = w->fdmap[i];
0215 ev.events = EPOLLIN;
0216
0217 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0)
0218 err(EXIT_FAILURE, "epoll_ct");
0219 }
0220 }
0221
0222 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
0223 {
0224 pthread_attr_t thread_attr, *attrp = NULL;
0225 cpu_set_t *cpuset;
0226 unsigned int i, j;
0227 int ret = 0;
0228 int nrcpus;
0229 size_t size;
0230
0231 if (!noaffinity)
0232 pthread_attr_init(&thread_attr);
0233
0234 nrcpus = perf_cpu_map__nr(cpu);
0235 cpuset = CPU_ALLOC(nrcpus);
0236 BUG_ON(!cpuset);
0237 size = CPU_ALLOC_SIZE(nrcpus);
0238
0239 for (i = 0; i < nthreads; i++) {
0240 struct worker *w = &worker[i];
0241
0242 w->tid = i;
0243 w->fdmap = calloc(nfds, sizeof(int));
0244 if (!w->fdmap)
0245 return 1;
0246
0247 for (j = 0; j < nfds; j++) {
0248 w->fdmap[j] = eventfd(0, EFD_NONBLOCK);
0249 if (w->fdmap[j] < 0)
0250 err(EXIT_FAILURE, "eventfd");
0251 }
0252
0253
0254
0255
0256
0257
0258 if (randomize)
0259 init_fdmaps(w, 50);
0260
0261 if (!noaffinity) {
0262 CPU_ZERO_S(size, cpuset);
0263 CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
0264 size, cpuset);
0265
0266 ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
0267 if (ret) {
0268 CPU_FREE(cpuset);
0269 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
0270 }
0271
0272 attrp = &thread_attr;
0273 }
0274
0275 ret = pthread_create(&w->thread, attrp, workerfn,
0276 (void *)(struct worker *) w);
0277 if (ret) {
0278 CPU_FREE(cpuset);
0279 err(EXIT_FAILURE, "pthread_create");
0280 }
0281 }
0282
0283 CPU_FREE(cpuset);
0284 if (!noaffinity)
0285 pthread_attr_destroy(&thread_attr);
0286
0287 return ret;
0288 }
0289
0290 static void print_summary(void)
0291 {
0292 int i;
0293 unsigned long avg[EPOLL_NR_OPS];
0294 double stddev[EPOLL_NR_OPS];
0295
0296 for (i = 0; i < EPOLL_NR_OPS; i++) {
0297 avg[i] = avg_stats(&all_stats[i]);
0298 stddev[i] = stddev_stats(&all_stats[i]);
0299 }
0300
0301 printf("\nAveraged %ld ADD operations (+- %.2f%%)\n",
0302 avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD],
0303 avg[OP_EPOLL_ADD]));
0304 printf("Averaged %ld MOD operations (+- %.2f%%)\n",
0305 avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD],
0306 avg[OP_EPOLL_MOD]));
0307 printf("Averaged %ld DEL operations (+- %.2f%%)\n",
0308 avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL],
0309 avg[OP_EPOLL_DEL]));
0310 }
0311
0312 int bench_epoll_ctl(int argc, const char **argv)
0313 {
0314 int j, ret = 0;
0315 struct sigaction act;
0316 struct worker *worker = NULL;
0317 struct perf_cpu_map *cpu;
0318 struct rlimit rl, prevrl;
0319 unsigned int i;
0320
0321 argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0);
0322 if (argc) {
0323 usage_with_options(bench_epoll_ctl_usage, options);
0324 exit(EXIT_FAILURE);
0325 }
0326
0327 memset(&act, 0, sizeof(act));
0328 sigfillset(&act.sa_mask);
0329 act.sa_sigaction = toggle_done;
0330 sigaction(SIGINT, &act, NULL);
0331
0332 cpu = perf_cpu_map__new(NULL);
0333 if (!cpu)
0334 goto errmem;
0335
0336
0337 epollfd = epoll_create(1);
0338 if (epollfd < 0)
0339 err(EXIT_FAILURE, "epoll_create");
0340
0341
0342
0343
0344 if (nested)
0345 nest_epollfd();
0346
0347
0348 if (!nthreads)
0349 nthreads = perf_cpu_map__nr(cpu);
0350
0351 worker = calloc(nthreads, sizeof(*worker));
0352 if (!worker)
0353 goto errmem;
0354
0355 if (getrlimit(RLIMIT_NOFILE, &prevrl))
0356 err(EXIT_FAILURE, "getrlimit");
0357 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50;
0358 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n",
0359 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max);
0360 if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
0361 err(EXIT_FAILURE, "setrlimit");
0362
0363 printf("Run summary [PID %d]: %d threads doing epoll_ctl ops "
0364 "%d file-descriptors for %d secs.\n\n",
0365 getpid(), nthreads, nfds, nsecs);
0366
0367 for (i = 0; i < EPOLL_NR_OPS; i++)
0368 init_stats(&all_stats[i]);
0369
0370 pthread_mutex_init(&thread_lock, NULL);
0371 pthread_cond_init(&thread_parent, NULL);
0372 pthread_cond_init(&thread_worker, NULL);
0373
0374 threads_starting = nthreads;
0375
0376 gettimeofday(&bench__start, NULL);
0377
0378 do_threads(worker, cpu);
0379
0380 pthread_mutex_lock(&thread_lock);
0381 while (threads_starting)
0382 pthread_cond_wait(&thread_parent, &thread_lock);
0383 pthread_cond_broadcast(&thread_worker);
0384 pthread_mutex_unlock(&thread_lock);
0385
0386 sleep(nsecs);
0387 toggle_done(0, NULL, NULL);
0388 printinfo("main thread: toggling done\n");
0389
0390 for (i = 0; i < nthreads; i++) {
0391 ret = pthread_join(worker[i].thread, NULL);
0392 if (ret)
0393 err(EXIT_FAILURE, "pthread_join");
0394 }
0395
0396
0397 pthread_cond_destroy(&thread_parent);
0398 pthread_cond_destroy(&thread_worker);
0399 pthread_mutex_destroy(&thread_lock);
0400
0401 for (i = 0; i < nthreads; i++) {
0402 unsigned long t[EPOLL_NR_OPS];
0403
0404 for (j = 0; j < EPOLL_NR_OPS; j++) {
0405 t[j] = worker[i].ops[j];
0406 update_stats(&all_stats[j], t[j]);
0407 }
0408
0409 if (nfds == 1)
0410 printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n",
0411 worker[i].tid, &worker[i].fdmap[0],
0412 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
0413 else
0414 printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n",
0415 worker[i].tid, &worker[i].fdmap[0],
0416 &worker[i].fdmap[nfds-1],
0417 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
0418 }
0419
0420 print_summary();
0421
0422 close(epollfd);
0423 return ret;
0424 errmem:
0425 err(EXIT_FAILURE, "calloc");
0426 }
0427 #endif