Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
0004  *
0005  * futex-requeue: Block a bunch of threads on futex1 and requeue them
0006  *                on futex2, N at a time.
0007  *
0008  * This program is particularly useful to measure the latency of nthread
0009  * requeues without waking up any tasks (in the non-pi case) -- thus
0010  * mimicking a regular futex_wait.
0011  */
0012 
0013 /* For the CLR_() macros */
0014 #include <string.h>
0015 #include <pthread.h>
0016 
0017 #include <signal.h>
0018 #include "../util/stat.h"
0019 #include <subcmd/parse-options.h>
0020 #include <linux/compiler.h>
0021 #include <linux/kernel.h>
0022 #include <linux/time64.h>
0023 #include <errno.h>
0024 #include <perf/cpumap.h>
0025 #include "bench.h"
0026 #include "futex.h"
0027 
0028 #include <err.h>
0029 #include <stdlib.h>
0030 #include <sys/time.h>
0031 #include <sys/mman.h>
0032 
0033 static u_int32_t futex1 = 0, futex2 = 0;
0034 
0035 static pthread_t *worker;
0036 static bool done = false;
0037 static pthread_mutex_t thread_lock;
0038 static pthread_cond_t thread_parent, thread_worker;
0039 static struct stats requeuetime_stats, requeued_stats;
0040 static unsigned int threads_starting;
0041 static int futex_flag = 0;
0042 
0043 static struct bench_futex_parameters params = {
0044     /*
0045      * How many tasks to requeue at a time.
0046      * Default to 1 in order to make the kernel work more.
0047      */
0048     .nrequeue = 1,
0049 };
0050 
0051 static const struct option options[] = {
0052     OPT_UINTEGER('t', "threads",  &params.nthreads, "Specify amount of threads"),
0053     OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
0054     OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
0055     OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
0056     OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
0057     OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
0058     OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
0059 
0060     OPT_END()
0061 };
0062 
0063 static const char * const bench_futex_requeue_usage[] = {
0064     "perf bench futex requeue <options>",
0065     NULL
0066 };
0067 
0068 static void print_summary(void)
0069 {
0070     double requeuetime_avg = avg_stats(&requeuetime_stats);
0071     double requeuetime_stddev = stddev_stats(&requeuetime_stats);
0072     unsigned int requeued_avg = avg_stats(&requeued_stats);
0073 
0074     printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
0075            requeued_avg,
0076            params.nthreads,
0077            requeuetime_avg / USEC_PER_MSEC,
0078            rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
0079 }
0080 
0081 static void *workerfn(void *arg __maybe_unused)
0082 {
0083     int ret;
0084 
0085     pthread_mutex_lock(&thread_lock);
0086     threads_starting--;
0087     if (!threads_starting)
0088         pthread_cond_signal(&thread_parent);
0089     pthread_cond_wait(&thread_worker, &thread_lock);
0090     pthread_mutex_unlock(&thread_lock);
0091 
0092     while (1) {
0093         if (!params.pi) {
0094             ret = futex_wait(&futex1, 0, NULL, futex_flag);
0095             if (!ret)
0096                 break;
0097 
0098             if (ret && errno != EAGAIN) {
0099                 if (!params.silent)
0100                     warnx("futex_wait");
0101                 break;
0102             }
0103         } else {
0104             ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
0105                             NULL, futex_flag);
0106             if (!ret) {
0107                 /* got the lock at futex2 */
0108                 futex_unlock_pi(&futex2, futex_flag);
0109                 break;
0110             }
0111 
0112             if (ret && errno != EAGAIN) {
0113                 if (!params.silent)
0114                     warnx("futex_wait_requeue_pi");
0115                 break;
0116             }
0117         }
0118     }
0119 
0120     return NULL;
0121 }
0122 
0123 static void block_threads(pthread_t *w,
0124               pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
0125 {
0126     cpu_set_t *cpuset;
0127     unsigned int i;
0128     int nrcpus = perf_cpu_map__nr(cpu);
0129     size_t size;
0130 
0131     threads_starting = params.nthreads;
0132 
0133     cpuset = CPU_ALLOC(nrcpus);
0134     BUG_ON(!cpuset);
0135     size = CPU_ALLOC_SIZE(nrcpus);
0136 
0137     /* create and block all threads */
0138     for (i = 0; i < params.nthreads; i++) {
0139         CPU_ZERO_S(size, cpuset);
0140         CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
0141 
0142         if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
0143             CPU_FREE(cpuset);
0144             err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
0145         }
0146 
0147         if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
0148             CPU_FREE(cpuset);
0149             err(EXIT_FAILURE, "pthread_create");
0150         }
0151     }
0152     CPU_FREE(cpuset);
0153 }
0154 
0155 static void toggle_done(int sig __maybe_unused,
0156             siginfo_t *info __maybe_unused,
0157             void *uc __maybe_unused)
0158 {
0159     done = true;
0160 }
0161 
0162 int bench_futex_requeue(int argc, const char **argv)
0163 {
0164     int ret = 0;
0165     unsigned int i, j;
0166     struct sigaction act;
0167     pthread_attr_t thread_attr;
0168     struct perf_cpu_map *cpu;
0169 
0170     argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
0171     if (argc)
0172         goto err;
0173 
0174     cpu = perf_cpu_map__new(NULL);
0175     if (!cpu)
0176         err(EXIT_FAILURE, "cpu_map__new");
0177 
0178     memset(&act, 0, sizeof(act));
0179     sigfillset(&act.sa_mask);
0180     act.sa_sigaction = toggle_done;
0181     sigaction(SIGINT, &act, NULL);
0182 
0183     if (params.mlockall) {
0184         if (mlockall(MCL_CURRENT | MCL_FUTURE))
0185             err(EXIT_FAILURE, "mlockall");
0186     }
0187 
0188     if (!params.nthreads)
0189         params.nthreads = perf_cpu_map__nr(cpu);
0190 
0191     worker = calloc(params.nthreads, sizeof(*worker));
0192     if (!worker)
0193         err(EXIT_FAILURE, "calloc");
0194 
0195     if (!params.fshared)
0196         futex_flag = FUTEX_PRIVATE_FLAG;
0197 
0198     if (params.nrequeue > params.nthreads)
0199         params.nrequeue = params.nthreads;
0200 
0201     if (params.broadcast)
0202         params.nrequeue = params.nthreads;
0203 
0204     printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
0205            "%d at a time.\n\n",  getpid(), params.nthreads,
0206            params.fshared ? "shared":"private", &futex1,
0207            params.pi ? "PI ": "", &futex2, params.nrequeue);
0208 
0209     init_stats(&requeued_stats);
0210     init_stats(&requeuetime_stats);
0211     pthread_attr_init(&thread_attr);
0212     pthread_mutex_init(&thread_lock, NULL);
0213     pthread_cond_init(&thread_parent, NULL);
0214     pthread_cond_init(&thread_worker, NULL);
0215 
0216     for (j = 0; j < bench_repeat && !done; j++) {
0217         unsigned int nrequeued = 0, wakeups = 0;
0218         struct timeval start, end, runtime;
0219 
0220         /* create, launch & block all threads */
0221         block_threads(worker, thread_attr, cpu);
0222 
0223         /* make sure all threads are already blocked */
0224         pthread_mutex_lock(&thread_lock);
0225         while (threads_starting)
0226             pthread_cond_wait(&thread_parent, &thread_lock);
0227         pthread_cond_broadcast(&thread_worker);
0228         pthread_mutex_unlock(&thread_lock);
0229 
0230         usleep(100000);
0231 
0232         /* Ok, all threads are patiently blocked, start requeueing */
0233         gettimeofday(&start, NULL);
0234         while (nrequeued < params.nthreads) {
0235             int r;
0236 
0237             /*
0238              * For the regular non-pi case, do not wakeup any tasks
0239              * blocked on futex1, allowing us to really measure
0240              * futex_wait functionality. For the PI case the first
0241              * waiter is always awoken.
0242              */
0243             if (!params.pi) {
0244                 r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
0245                               params.nrequeue,
0246                               futex_flag);
0247             } else {
0248                 r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
0249                              params.nrequeue,
0250                              futex_flag);
0251                 wakeups++; /* assume no error */
0252             }
0253 
0254             if (r < 0)
0255                 err(EXIT_FAILURE, "couldn't requeue from %p to %p",
0256                     &futex1, &futex2);
0257 
0258             nrequeued += r;
0259         }
0260 
0261         gettimeofday(&end, NULL);
0262         timersub(&end, &start, &runtime);
0263 
0264         update_stats(&requeued_stats, nrequeued);
0265         update_stats(&requeuetime_stats, runtime.tv_usec);
0266 
0267         if (!params.silent) {
0268             if (!params.pi)
0269                 printf("[Run %d]: Requeued %d of %d threads in "
0270                        "%.4f ms\n", j + 1, nrequeued,
0271                        params.nthreads,
0272                        runtime.tv_usec / (double)USEC_PER_MSEC);
0273             else {
0274                 nrequeued -= wakeups;
0275                 printf("[Run %d]: Awoke and Requeued (%d+%d) of "
0276                        "%d threads in %.4f ms\n",
0277                        j + 1, wakeups, nrequeued,
0278                        params.nthreads,
0279                        runtime.tv_usec / (double)USEC_PER_MSEC);
0280             }
0281 
0282         }
0283 
0284         if (!params.pi) {
0285             /* everybody should be blocked on futex2, wake'em up */
0286             nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
0287             if (params.nthreads != nrequeued)
0288                 warnx("couldn't wakeup all tasks (%d/%d)",
0289                       nrequeued, params.nthreads);
0290         }
0291 
0292         for (i = 0; i < params.nthreads; i++) {
0293             ret = pthread_join(worker[i], NULL);
0294             if (ret)
0295                 err(EXIT_FAILURE, "pthread_join");
0296         }
0297     }
0298 
0299     /* cleanup & report results */
0300     pthread_cond_destroy(&thread_parent);
0301     pthread_cond_destroy(&thread_worker);
0302     pthread_mutex_destroy(&thread_lock);
0303     pthread_attr_destroy(&thread_attr);
0304 
0305     print_summary();
0306 
0307     free(worker);
0308     perf_cpu_map__put(cpu);
0309     return ret;
0310 err:
0311     usage_with_options(bench_futex_requeue_usage, options);
0312     exit(EXIT_FAILURE);
0313 }