Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: LGPL-2.1
0002 #define _GNU_SOURCE
0003 #include <assert.h>
0004 #include <linux/membarrier.h>
0005 #include <pthread.h>
0006 #include <sched.h>
0007 #include <stdatomic.h>
0008 #include <stdint.h>
0009 #include <stdio.h>
0010 #include <stdlib.h>
0011 #include <string.h>
0012 #include <syscall.h>
0013 #include <unistd.h>
0014 #include <poll.h>
0015 #include <sys/types.h>
0016 #include <signal.h>
0017 #include <errno.h>
0018 #include <stddef.h>
0019 
0020 static inline pid_t rseq_gettid(void)
0021 {
0022     return syscall(__NR_gettid);
0023 }
0024 
0025 #define NR_INJECT   9
0026 static int loop_cnt[NR_INJECT + 1];
0027 
0028 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
0029 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
0030 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
0031 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
0032 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
0033 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
0034 
0035 static int opt_modulo, verbose;
0036 
0037 static int opt_yield, opt_signal, opt_sleep,
0038         opt_disable_rseq, opt_threads = 200,
0039         opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
0040 
0041 #ifndef RSEQ_SKIP_FASTPATH
0042 static long long opt_reps = 5000;
0043 #else
0044 static long long opt_reps = 100;
0045 #endif
0046 
0047 static __thread __attribute__((tls_model("initial-exec")))
0048 unsigned int signals_delivered;
0049 
0050 #ifndef BENCHMARK
0051 
0052 static __thread __attribute__((tls_model("initial-exec"), unused))
0053 unsigned int yield_mod_cnt, nr_abort;
0054 
0055 #define printf_verbose(fmt, ...)            \
0056     do {                        \
0057         if (verbose)                \
0058             printf(fmt, ## __VA_ARGS__);    \
0059     } while (0)
0060 
0061 #ifdef __i386__
0062 
0063 #define INJECT_ASM_REG  "eax"
0064 
0065 #define RSEQ_INJECT_CLOBBER \
0066     , INJECT_ASM_REG
0067 
0068 #define RSEQ_INJECT_ASM(n) \
0069     "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
0070     "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
0071     "jz 333f\n\t" \
0072     "222:\n\t" \
0073     "dec %%" INJECT_ASM_REG "\n\t" \
0074     "jnz 222b\n\t" \
0075     "333:\n\t"
0076 
0077 #elif defined(__x86_64__)
0078 
0079 #define INJECT_ASM_REG_P    "rax"
0080 #define INJECT_ASM_REG      "eax"
0081 
0082 #define RSEQ_INJECT_CLOBBER \
0083     , INJECT_ASM_REG_P \
0084     , INJECT_ASM_REG
0085 
0086 #define RSEQ_INJECT_ASM(n) \
0087     "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
0088     "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
0089     "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
0090     "jz 333f\n\t" \
0091     "222:\n\t" \
0092     "dec %%" INJECT_ASM_REG "\n\t" \
0093     "jnz 222b\n\t" \
0094     "333:\n\t"
0095 
0096 #elif defined(__s390__)
0097 
0098 #define RSEQ_INJECT_INPUT \
0099     , [loop_cnt_1]"m"(loop_cnt[1]) \
0100     , [loop_cnt_2]"m"(loop_cnt[2]) \
0101     , [loop_cnt_3]"m"(loop_cnt[3]) \
0102     , [loop_cnt_4]"m"(loop_cnt[4]) \
0103     , [loop_cnt_5]"m"(loop_cnt[5]) \
0104     , [loop_cnt_6]"m"(loop_cnt[6])
0105 
0106 #define INJECT_ASM_REG  "r12"
0107 
0108 #define RSEQ_INJECT_CLOBBER \
0109     , INJECT_ASM_REG
0110 
0111 #define RSEQ_INJECT_ASM(n) \
0112     "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
0113     "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
0114     "je 333f\n\t" \
0115     "222:\n\t" \
0116     "ahi %%" INJECT_ASM_REG ", -1\n\t" \
0117     "jnz 222b\n\t" \
0118     "333:\n\t"
0119 
0120 #elif defined(__ARMEL__)
0121 
0122 #define RSEQ_INJECT_INPUT \
0123     , [loop_cnt_1]"m"(loop_cnt[1]) \
0124     , [loop_cnt_2]"m"(loop_cnt[2]) \
0125     , [loop_cnt_3]"m"(loop_cnt[3]) \
0126     , [loop_cnt_4]"m"(loop_cnt[4]) \
0127     , [loop_cnt_5]"m"(loop_cnt[5]) \
0128     , [loop_cnt_6]"m"(loop_cnt[6])
0129 
0130 #define INJECT_ASM_REG  "r4"
0131 
0132 #define RSEQ_INJECT_CLOBBER \
0133     , INJECT_ASM_REG
0134 
0135 #define RSEQ_INJECT_ASM(n) \
0136     "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
0137     "cmp " INJECT_ASM_REG ", #0\n\t" \
0138     "beq 333f\n\t" \
0139     "222:\n\t" \
0140     "subs " INJECT_ASM_REG ", #1\n\t" \
0141     "bne 222b\n\t" \
0142     "333:\n\t"
0143 
0144 #elif defined(__AARCH64EL__)
0145 
0146 #define RSEQ_INJECT_INPUT \
0147     , [loop_cnt_1] "Qo" (loop_cnt[1]) \
0148     , [loop_cnt_2] "Qo" (loop_cnt[2]) \
0149     , [loop_cnt_3] "Qo" (loop_cnt[3]) \
0150     , [loop_cnt_4] "Qo" (loop_cnt[4]) \
0151     , [loop_cnt_5] "Qo" (loop_cnt[5]) \
0152     , [loop_cnt_6] "Qo" (loop_cnt[6])
0153 
0154 #define INJECT_ASM_REG  RSEQ_ASM_TMP_REG32
0155 
0156 #define RSEQ_INJECT_ASM(n) \
0157     "   ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"   \
0158     "   cbz " INJECT_ASM_REG ", 333f\n"         \
0159     "222:\n"                            \
0160     "   sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"   \
0161     "   cbnz    " INJECT_ASM_REG ", 222b\n"         \
0162     "333:\n"
0163 
0164 #elif defined(__PPC__)
0165 
0166 #define RSEQ_INJECT_INPUT \
0167     , [loop_cnt_1]"m"(loop_cnt[1]) \
0168     , [loop_cnt_2]"m"(loop_cnt[2]) \
0169     , [loop_cnt_3]"m"(loop_cnt[3]) \
0170     , [loop_cnt_4]"m"(loop_cnt[4]) \
0171     , [loop_cnt_5]"m"(loop_cnt[5]) \
0172     , [loop_cnt_6]"m"(loop_cnt[6])
0173 
0174 #define INJECT_ASM_REG  "r18"
0175 
0176 #define RSEQ_INJECT_CLOBBER \
0177     , INJECT_ASM_REG
0178 
0179 #define RSEQ_INJECT_ASM(n) \
0180     "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
0181     "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
0182     "beq 333f\n\t" \
0183     "222:\n\t" \
0184     "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
0185     "bne 222b\n\t" \
0186     "333:\n\t"
0187 
0188 #elif defined(__mips__)
0189 
0190 #define RSEQ_INJECT_INPUT \
0191     , [loop_cnt_1]"m"(loop_cnt[1]) \
0192     , [loop_cnt_2]"m"(loop_cnt[2]) \
0193     , [loop_cnt_3]"m"(loop_cnt[3]) \
0194     , [loop_cnt_4]"m"(loop_cnt[4]) \
0195     , [loop_cnt_5]"m"(loop_cnt[5]) \
0196     , [loop_cnt_6]"m"(loop_cnt[6])
0197 
0198 #define INJECT_ASM_REG  "$5"
0199 
0200 #define RSEQ_INJECT_CLOBBER \
0201     , INJECT_ASM_REG
0202 
0203 #define RSEQ_INJECT_ASM(n) \
0204     "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
0205     "beqz " INJECT_ASM_REG ", 333f\n\t" \
0206     "222:\n\t" \
0207     "addiu " INJECT_ASM_REG ", -1\n\t" \
0208     "bnez " INJECT_ASM_REG ", 222b\n\t" \
0209     "333:\n\t"
0210 #elif defined(__riscv)
0211 
0212 #define RSEQ_INJECT_INPUT \
0213     , [loop_cnt_1]"m"(loop_cnt[1]) \
0214     , [loop_cnt_2]"m"(loop_cnt[2]) \
0215     , [loop_cnt_3]"m"(loop_cnt[3]) \
0216     , [loop_cnt_4]"m"(loop_cnt[4]) \
0217     , [loop_cnt_5]"m"(loop_cnt[5]) \
0218     , [loop_cnt_6]"m"(loop_cnt[6])
0219 
0220 #define INJECT_ASM_REG  "t1"
0221 
0222 #define RSEQ_INJECT_CLOBBER \
0223     , INJECT_ASM_REG
0224 
0225 #define RSEQ_INJECT_ASM(n)                  \
0226     "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"     \
0227     "beqz " INJECT_ASM_REG ", 333f\n\t"         \
0228     "222:\n\t"                      \
0229     "addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"   \
0230     "bnez " INJECT_ASM_REG ", 222b\n\t"         \
0231     "333:\n\t"
0232 
0233 
0234 #else
0235 #error unsupported target
0236 #endif
0237 
0238 #define RSEQ_INJECT_FAILED \
0239     nr_abort++;
0240 
0241 #define RSEQ_INJECT_C(n) \
0242 { \
0243     int loc_i, loc_nr_loops = loop_cnt[n]; \
0244     \
0245     for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
0246         rseq_barrier(); \
0247     } \
0248     if (loc_nr_loops == -1 && opt_modulo) { \
0249         if (yield_mod_cnt == opt_modulo - 1) { \
0250             if (opt_sleep > 0) \
0251                 poll(NULL, 0, opt_sleep); \
0252             if (opt_yield) \
0253                 sched_yield(); \
0254             if (opt_signal) \
0255                 raise(SIGUSR1); \
0256             yield_mod_cnt = 0; \
0257         } else { \
0258             yield_mod_cnt++; \
0259         } \
0260     } \
0261 }
0262 
0263 #else
0264 
0265 #define printf_verbose(fmt, ...)
0266 
0267 #endif /* BENCHMARK */
0268 
0269 #include "rseq.h"
0270 
0271 struct percpu_lock_entry {
0272     intptr_t v;
0273 } __attribute__((aligned(128)));
0274 
0275 struct percpu_lock {
0276     struct percpu_lock_entry c[CPU_SETSIZE];
0277 };
0278 
0279 struct test_data_entry {
0280     intptr_t count;
0281 } __attribute__((aligned(128)));
0282 
0283 struct spinlock_test_data {
0284     struct percpu_lock lock;
0285     struct test_data_entry c[CPU_SETSIZE];
0286 };
0287 
0288 struct spinlock_thread_test_data {
0289     struct spinlock_test_data *data;
0290     long long reps;
0291     int reg;
0292 };
0293 
0294 struct inc_test_data {
0295     struct test_data_entry c[CPU_SETSIZE];
0296 };
0297 
0298 struct inc_thread_test_data {
0299     struct inc_test_data *data;
0300     long long reps;
0301     int reg;
0302 };
0303 
0304 struct percpu_list_node {
0305     intptr_t data;
0306     struct percpu_list_node *next;
0307 };
0308 
0309 struct percpu_list_entry {
0310     struct percpu_list_node *head;
0311 } __attribute__((aligned(128)));
0312 
0313 struct percpu_list {
0314     struct percpu_list_entry c[CPU_SETSIZE];
0315 };
0316 
0317 #define BUFFER_ITEM_PER_CPU 100
0318 
0319 struct percpu_buffer_node {
0320     intptr_t data;
0321 };
0322 
0323 struct percpu_buffer_entry {
0324     intptr_t offset;
0325     intptr_t buflen;
0326     struct percpu_buffer_node **array;
0327 } __attribute__((aligned(128)));
0328 
0329 struct percpu_buffer {
0330     struct percpu_buffer_entry c[CPU_SETSIZE];
0331 };
0332 
0333 #define MEMCPY_BUFFER_ITEM_PER_CPU  100
0334 
0335 struct percpu_memcpy_buffer_node {
0336     intptr_t data1;
0337     uint64_t data2;
0338 };
0339 
0340 struct percpu_memcpy_buffer_entry {
0341     intptr_t offset;
0342     intptr_t buflen;
0343     struct percpu_memcpy_buffer_node *array;
0344 } __attribute__((aligned(128)));
0345 
0346 struct percpu_memcpy_buffer {
0347     struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
0348 };
0349 
0350 /* A simple percpu spinlock. Grabs lock on current cpu. */
0351 static int rseq_this_cpu_lock(struct percpu_lock *lock)
0352 {
0353     int cpu;
0354 
0355     for (;;) {
0356         int ret;
0357 
0358         cpu = rseq_cpu_start();
0359         ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
0360                      0, 1, cpu);
0361         if (rseq_likely(!ret))
0362             break;
0363         /* Retry if comparison fails or rseq aborts. */
0364     }
0365     /*
0366      * Acquire semantic when taking lock after control dependency.
0367      * Matches rseq_smp_store_release().
0368      */
0369     rseq_smp_acquire__after_ctrl_dep();
0370     return cpu;
0371 }
0372 
0373 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
0374 {
0375     assert(lock->c[cpu].v == 1);
0376     /*
0377      * Release lock, with release semantic. Matches
0378      * rseq_smp_acquire__after_ctrl_dep().
0379      */
0380     rseq_smp_store_release(&lock->c[cpu].v, 0);
0381 }
0382 
0383 void *test_percpu_spinlock_thread(void *arg)
0384 {
0385     struct spinlock_thread_test_data *thread_data = arg;
0386     struct spinlock_test_data *data = thread_data->data;
0387     long long i, reps;
0388 
0389     if (!opt_disable_rseq && thread_data->reg &&
0390         rseq_register_current_thread())
0391         abort();
0392     reps = thread_data->reps;
0393     for (i = 0; i < reps; i++) {
0394         int cpu = rseq_this_cpu_lock(&data->lock);
0395         data->c[cpu].count++;
0396         rseq_percpu_unlock(&data->lock, cpu);
0397 #ifndef BENCHMARK
0398         if (i != 0 && !(i % (reps / 10)))
0399             printf_verbose("tid %d: count %lld\n",
0400                        (int) rseq_gettid(), i);
0401 #endif
0402     }
0403     printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
0404                (int) rseq_gettid(), nr_abort, signals_delivered);
0405     if (!opt_disable_rseq && thread_data->reg &&
0406         rseq_unregister_current_thread())
0407         abort();
0408     return NULL;
0409 }
0410 
0411 /*
0412  * A simple test which implements a sharded counter using a per-cpu
0413  * lock.  Obviously real applications might prefer to simply use a
0414  * per-cpu increment; however, this is reasonable for a test and the
0415  * lock can be extended to synchronize more complicated operations.
0416  */
0417 void test_percpu_spinlock(void)
0418 {
0419     const int num_threads = opt_threads;
0420     int i, ret;
0421     uint64_t sum;
0422     pthread_t test_threads[num_threads];
0423     struct spinlock_test_data data;
0424     struct spinlock_thread_test_data thread_data[num_threads];
0425 
0426     memset(&data, 0, sizeof(data));
0427     for (i = 0; i < num_threads; i++) {
0428         thread_data[i].reps = opt_reps;
0429         if (opt_disable_mod <= 0 || (i % opt_disable_mod))
0430             thread_data[i].reg = 1;
0431         else
0432             thread_data[i].reg = 0;
0433         thread_data[i].data = &data;
0434         ret = pthread_create(&test_threads[i], NULL,
0435                      test_percpu_spinlock_thread,
0436                      &thread_data[i]);
0437         if (ret) {
0438             errno = ret;
0439             perror("pthread_create");
0440             abort();
0441         }
0442     }
0443 
0444     for (i = 0; i < num_threads; i++) {
0445         ret = pthread_join(test_threads[i], NULL);
0446         if (ret) {
0447             errno = ret;
0448             perror("pthread_join");
0449             abort();
0450         }
0451     }
0452 
0453     sum = 0;
0454     for (i = 0; i < CPU_SETSIZE; i++)
0455         sum += data.c[i].count;
0456 
0457     assert(sum == (uint64_t)opt_reps * num_threads);
0458 }
0459 
0460 void *test_percpu_inc_thread(void *arg)
0461 {
0462     struct inc_thread_test_data *thread_data = arg;
0463     struct inc_test_data *data = thread_data->data;
0464     long long i, reps;
0465 
0466     if (!opt_disable_rseq && thread_data->reg &&
0467         rseq_register_current_thread())
0468         abort();
0469     reps = thread_data->reps;
0470     for (i = 0; i < reps; i++) {
0471         int ret;
0472 
0473         do {
0474             int cpu;
0475 
0476             cpu = rseq_cpu_start();
0477             ret = rseq_addv(&data->c[cpu].count, 1, cpu);
0478         } while (rseq_unlikely(ret));
0479 #ifndef BENCHMARK
0480         if (i != 0 && !(i % (reps / 10)))
0481             printf_verbose("tid %d: count %lld\n",
0482                        (int) rseq_gettid(), i);
0483 #endif
0484     }
0485     printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
0486                (int) rseq_gettid(), nr_abort, signals_delivered);
0487     if (!opt_disable_rseq && thread_data->reg &&
0488         rseq_unregister_current_thread())
0489         abort();
0490     return NULL;
0491 }
0492 
0493 void test_percpu_inc(void)
0494 {
0495     const int num_threads = opt_threads;
0496     int i, ret;
0497     uint64_t sum;
0498     pthread_t test_threads[num_threads];
0499     struct inc_test_data data;
0500     struct inc_thread_test_data thread_data[num_threads];
0501 
0502     memset(&data, 0, sizeof(data));
0503     for (i = 0; i < num_threads; i++) {
0504         thread_data[i].reps = opt_reps;
0505         if (opt_disable_mod <= 0 || (i % opt_disable_mod))
0506             thread_data[i].reg = 1;
0507         else
0508             thread_data[i].reg = 0;
0509         thread_data[i].data = &data;
0510         ret = pthread_create(&test_threads[i], NULL,
0511                      test_percpu_inc_thread,
0512                      &thread_data[i]);
0513         if (ret) {
0514             errno = ret;
0515             perror("pthread_create");
0516             abort();
0517         }
0518     }
0519 
0520     for (i = 0; i < num_threads; i++) {
0521         ret = pthread_join(test_threads[i], NULL);
0522         if (ret) {
0523             errno = ret;
0524             perror("pthread_join");
0525             abort();
0526         }
0527     }
0528 
0529     sum = 0;
0530     for (i = 0; i < CPU_SETSIZE; i++)
0531         sum += data.c[i].count;
0532 
0533     assert(sum == (uint64_t)opt_reps * num_threads);
0534 }
0535 
0536 void this_cpu_list_push(struct percpu_list *list,
0537             struct percpu_list_node *node,
0538             int *_cpu)
0539 {
0540     int cpu;
0541 
0542     for (;;) {
0543         intptr_t *targetptr, newval, expect;
0544         int ret;
0545 
0546         cpu = rseq_cpu_start();
0547         /* Load list->c[cpu].head with single-copy atomicity. */
0548         expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
0549         newval = (intptr_t)node;
0550         targetptr = (intptr_t *)&list->c[cpu].head;
0551         node->next = (struct percpu_list_node *)expect;
0552         ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
0553         if (rseq_likely(!ret))
0554             break;
0555         /* Retry if comparison fails or rseq aborts. */
0556     }
0557     if (_cpu)
0558         *_cpu = cpu;
0559 }
0560 
0561 /*
0562  * Unlike a traditional lock-less linked list; the availability of a
0563  * rseq primitive allows us to implement pop without concerns over
0564  * ABA-type races.
0565  */
0566 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
0567                        int *_cpu)
0568 {
0569     struct percpu_list_node *node = NULL;
0570     int cpu;
0571 
0572     for (;;) {
0573         struct percpu_list_node *head;
0574         intptr_t *targetptr, expectnot, *load;
0575         long offset;
0576         int ret;
0577 
0578         cpu = rseq_cpu_start();
0579         targetptr = (intptr_t *)&list->c[cpu].head;
0580         expectnot = (intptr_t)NULL;
0581         offset = offsetof(struct percpu_list_node, next);
0582         load = (intptr_t *)&head;
0583         ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
0584                            offset, load, cpu);
0585         if (rseq_likely(!ret)) {
0586             node = head;
0587             break;
0588         }
0589         if (ret > 0)
0590             break;
0591         /* Retry if rseq aborts. */
0592     }
0593     if (_cpu)
0594         *_cpu = cpu;
0595     return node;
0596 }
0597 
0598 /*
0599  * __percpu_list_pop is not safe against concurrent accesses. Should
0600  * only be used on lists that are not concurrently modified.
0601  */
0602 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
0603 {
0604     struct percpu_list_node *node;
0605 
0606     node = list->c[cpu].head;
0607     if (!node)
0608         return NULL;
0609     list->c[cpu].head = node->next;
0610     return node;
0611 }
0612 
0613 void *test_percpu_list_thread(void *arg)
0614 {
0615     long long i, reps;
0616     struct percpu_list *list = (struct percpu_list *)arg;
0617 
0618     if (!opt_disable_rseq && rseq_register_current_thread())
0619         abort();
0620 
0621     reps = opt_reps;
0622     for (i = 0; i < reps; i++) {
0623         struct percpu_list_node *node;
0624 
0625         node = this_cpu_list_pop(list, NULL);
0626         if (opt_yield)
0627             sched_yield();  /* encourage shuffling */
0628         if (node)
0629             this_cpu_list_push(list, node, NULL);
0630     }
0631 
0632     printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
0633                (int) rseq_gettid(), nr_abort, signals_delivered);
0634     if (!opt_disable_rseq && rseq_unregister_current_thread())
0635         abort();
0636 
0637     return NULL;
0638 }
0639 
0640 /* Simultaneous modification to a per-cpu linked list from many threads.  */
0641 void test_percpu_list(void)
0642 {
0643     const int num_threads = opt_threads;
0644     int i, j, ret;
0645     uint64_t sum = 0, expected_sum = 0;
0646     struct percpu_list list;
0647     pthread_t test_threads[num_threads];
0648     cpu_set_t allowed_cpus;
0649 
0650     memset(&list, 0, sizeof(list));
0651 
0652     /* Generate list entries for every usable cpu. */
0653     sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
0654     for (i = 0; i < CPU_SETSIZE; i++) {
0655         if (!CPU_ISSET(i, &allowed_cpus))
0656             continue;
0657         for (j = 1; j <= 100; j++) {
0658             struct percpu_list_node *node;
0659 
0660             expected_sum += j;
0661 
0662             node = malloc(sizeof(*node));
0663             assert(node);
0664             node->data = j;
0665             node->next = list.c[i].head;
0666             list.c[i].head = node;
0667         }
0668     }
0669 
0670     for (i = 0; i < num_threads; i++) {
0671         ret = pthread_create(&test_threads[i], NULL,
0672                      test_percpu_list_thread, &list);
0673         if (ret) {
0674             errno = ret;
0675             perror("pthread_create");
0676             abort();
0677         }
0678     }
0679 
0680     for (i = 0; i < num_threads; i++) {
0681         ret = pthread_join(test_threads[i], NULL);
0682         if (ret) {
0683             errno = ret;
0684             perror("pthread_join");
0685             abort();
0686         }
0687     }
0688 
0689     for (i = 0; i < CPU_SETSIZE; i++) {
0690         struct percpu_list_node *node;
0691 
0692         if (!CPU_ISSET(i, &allowed_cpus))
0693             continue;
0694 
0695         while ((node = __percpu_list_pop(&list, i))) {
0696             sum += node->data;
0697             free(node);
0698         }
0699     }
0700 
0701     /*
0702      * All entries should now be accounted for (unless some external
0703      * actor is interfering with our allowed affinity while this
0704      * test is running).
0705      */
0706     assert(sum == expected_sum);
0707 }
0708 
0709 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
0710               struct percpu_buffer_node *node,
0711               int *_cpu)
0712 {
0713     bool result = false;
0714     int cpu;
0715 
0716     for (;;) {
0717         intptr_t *targetptr_spec, newval_spec;
0718         intptr_t *targetptr_final, newval_final;
0719         intptr_t offset;
0720         int ret;
0721 
0722         cpu = rseq_cpu_start();
0723         offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
0724         if (offset == buffer->c[cpu].buflen)
0725             break;
0726         newval_spec = (intptr_t)node;
0727         targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
0728         newval_final = offset + 1;
0729         targetptr_final = &buffer->c[cpu].offset;
0730         if (opt_mb)
0731             ret = rseq_cmpeqv_trystorev_storev_release(
0732                 targetptr_final, offset, targetptr_spec,
0733                 newval_spec, newval_final, cpu);
0734         else
0735             ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
0736                 offset, targetptr_spec, newval_spec,
0737                 newval_final, cpu);
0738         if (rseq_likely(!ret)) {
0739             result = true;
0740             break;
0741         }
0742         /* Retry if comparison fails or rseq aborts. */
0743     }
0744     if (_cpu)
0745         *_cpu = cpu;
0746     return result;
0747 }
0748 
0749 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
0750                            int *_cpu)
0751 {
0752     struct percpu_buffer_node *head;
0753     int cpu;
0754 
0755     for (;;) {
0756         intptr_t *targetptr, newval;
0757         intptr_t offset;
0758         int ret;
0759 
0760         cpu = rseq_cpu_start();
0761         /* Load offset with single-copy atomicity. */
0762         offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
0763         if (offset == 0) {
0764             head = NULL;
0765             break;
0766         }
0767         head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
0768         newval = offset - 1;
0769         targetptr = (intptr_t *)&buffer->c[cpu].offset;
0770         ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
0771             (intptr_t *)&buffer->c[cpu].array[offset - 1],
0772             (intptr_t)head, newval, cpu);
0773         if (rseq_likely(!ret))
0774             break;
0775         /* Retry if comparison fails or rseq aborts. */
0776     }
0777     if (_cpu)
0778         *_cpu = cpu;
0779     return head;
0780 }
0781 
0782 /*
0783  * __percpu_buffer_pop is not safe against concurrent accesses. Should
0784  * only be used on buffers that are not concurrently modified.
0785  */
0786 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
0787                            int cpu)
0788 {
0789     struct percpu_buffer_node *head;
0790     intptr_t offset;
0791 
0792     offset = buffer->c[cpu].offset;
0793     if (offset == 0)
0794         return NULL;
0795     head = buffer->c[cpu].array[offset - 1];
0796     buffer->c[cpu].offset = offset - 1;
0797     return head;
0798 }
0799 
0800 void *test_percpu_buffer_thread(void *arg)
0801 {
0802     long long i, reps;
0803     struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
0804 
0805     if (!opt_disable_rseq && rseq_register_current_thread())
0806         abort();
0807 
0808     reps = opt_reps;
0809     for (i = 0; i < reps; i++) {
0810         struct percpu_buffer_node *node;
0811 
0812         node = this_cpu_buffer_pop(buffer, NULL);
0813         if (opt_yield)
0814             sched_yield();  /* encourage shuffling */
0815         if (node) {
0816             if (!this_cpu_buffer_push(buffer, node, NULL)) {
0817                 /* Should increase buffer size. */
0818                 abort();
0819             }
0820         }
0821     }
0822 
0823     printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
0824                (int) rseq_gettid(), nr_abort, signals_delivered);
0825     if (!opt_disable_rseq && rseq_unregister_current_thread())
0826         abort();
0827 
0828     return NULL;
0829 }
0830 
0831 /* Simultaneous modification to a per-cpu buffer from many threads.  */
0832 void test_percpu_buffer(void)
0833 {
0834     const int num_threads = opt_threads;
0835     int i, j, ret;
0836     uint64_t sum = 0, expected_sum = 0;
0837     struct percpu_buffer buffer;
0838     pthread_t test_threads[num_threads];
0839     cpu_set_t allowed_cpus;
0840 
0841     memset(&buffer, 0, sizeof(buffer));
0842 
0843     /* Generate list entries for every usable cpu. */
0844     sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
0845     for (i = 0; i < CPU_SETSIZE; i++) {
0846         if (!CPU_ISSET(i, &allowed_cpus))
0847             continue;
0848         /* Worse-case is every item in same CPU. */
0849         buffer.c[i].array =
0850             malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
0851                    BUFFER_ITEM_PER_CPU);
0852         assert(buffer.c[i].array);
0853         buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
0854         for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
0855             struct percpu_buffer_node *node;
0856 
0857             expected_sum += j;
0858 
0859             /*
0860              * We could theoretically put the word-sized
0861              * "data" directly in the buffer. However, we
0862              * want to model objects that would not fit
0863              * within a single word, so allocate an object
0864              * for each node.
0865              */
0866             node = malloc(sizeof(*node));
0867             assert(node);
0868             node->data = j;
0869             buffer.c[i].array[j - 1] = node;
0870             buffer.c[i].offset++;
0871         }
0872     }
0873 
0874     for (i = 0; i < num_threads; i++) {
0875         ret = pthread_create(&test_threads[i], NULL,
0876                      test_percpu_buffer_thread, &buffer);
0877         if (ret) {
0878             errno = ret;
0879             perror("pthread_create");
0880             abort();
0881         }
0882     }
0883 
0884     for (i = 0; i < num_threads; i++) {
0885         ret = pthread_join(test_threads[i], NULL);
0886         if (ret) {
0887             errno = ret;
0888             perror("pthread_join");
0889             abort();
0890         }
0891     }
0892 
0893     for (i = 0; i < CPU_SETSIZE; i++) {
0894         struct percpu_buffer_node *node;
0895 
0896         if (!CPU_ISSET(i, &allowed_cpus))
0897             continue;
0898 
0899         while ((node = __percpu_buffer_pop(&buffer, i))) {
0900             sum += node->data;
0901             free(node);
0902         }
0903         free(buffer.c[i].array);
0904     }
0905 
0906     /*
0907      * All entries should now be accounted for (unless some external
0908      * actor is interfering with our allowed affinity while this
0909      * test is running).
0910      */
0911     assert(sum == expected_sum);
0912 }
0913 
0914 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
0915                  struct percpu_memcpy_buffer_node item,
0916                  int *_cpu)
0917 {
0918     bool result = false;
0919     int cpu;
0920 
0921     for (;;) {
0922         intptr_t *targetptr_final, newval_final, offset;
0923         char *destptr, *srcptr;
0924         size_t copylen;
0925         int ret;
0926 
0927         cpu = rseq_cpu_start();
0928         /* Load offset with single-copy atomicity. */
0929         offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
0930         if (offset == buffer->c[cpu].buflen)
0931             break;
0932         destptr = (char *)&buffer->c[cpu].array[offset];
0933         srcptr = (char *)&item;
0934         /* copylen must be <= 4kB. */
0935         copylen = sizeof(item);
0936         newval_final = offset + 1;
0937         targetptr_final = &buffer->c[cpu].offset;
0938         if (opt_mb)
0939             ret = rseq_cmpeqv_trymemcpy_storev_release(
0940                 targetptr_final, offset,
0941                 destptr, srcptr, copylen,
0942                 newval_final, cpu);
0943         else
0944             ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
0945                 offset, destptr, srcptr, copylen,
0946                 newval_final, cpu);
0947         if (rseq_likely(!ret)) {
0948             result = true;
0949             break;
0950         }
0951         /* Retry if comparison fails or rseq aborts. */
0952     }
0953     if (_cpu)
0954         *_cpu = cpu;
0955     return result;
0956 }
0957 
0958 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
0959                 struct percpu_memcpy_buffer_node *item,
0960                 int *_cpu)
0961 {
0962     bool result = false;
0963     int cpu;
0964 
0965     for (;;) {
0966         intptr_t *targetptr_final, newval_final, offset;
0967         char *destptr, *srcptr;
0968         size_t copylen;
0969         int ret;
0970 
0971         cpu = rseq_cpu_start();
0972         /* Load offset with single-copy atomicity. */
0973         offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
0974         if (offset == 0)
0975             break;
0976         destptr = (char *)item;
0977         srcptr = (char *)&buffer->c[cpu].array[offset - 1];
0978         /* copylen must be <= 4kB. */
0979         copylen = sizeof(*item);
0980         newval_final = offset - 1;
0981         targetptr_final = &buffer->c[cpu].offset;
0982         ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
0983             offset, destptr, srcptr, copylen,
0984             newval_final, cpu);
0985         if (rseq_likely(!ret)) {
0986             result = true;
0987             break;
0988         }
0989         /* Retry if comparison fails or rseq aborts. */
0990     }
0991     if (_cpu)
0992         *_cpu = cpu;
0993     return result;
0994 }
0995 
0996 /*
0997  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
0998  * only be used on buffers that are not concurrently modified.
0999  */
1000 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1001                 struct percpu_memcpy_buffer_node *item,
1002                 int cpu)
1003 {
1004     intptr_t offset;
1005 
1006     offset = buffer->c[cpu].offset;
1007     if (offset == 0)
1008         return false;
1009     memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1010     buffer->c[cpu].offset = offset - 1;
1011     return true;
1012 }
1013 
1014 void *test_percpu_memcpy_buffer_thread(void *arg)
1015 {
1016     long long i, reps;
1017     struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1018 
1019     if (!opt_disable_rseq && rseq_register_current_thread())
1020         abort();
1021 
1022     reps = opt_reps;
1023     for (i = 0; i < reps; i++) {
1024         struct percpu_memcpy_buffer_node item;
1025         bool result;
1026 
1027         result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1028         if (opt_yield)
1029             sched_yield();  /* encourage shuffling */
1030         if (result) {
1031             if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1032                 /* Should increase buffer size. */
1033                 abort();
1034             }
1035         }
1036     }
1037 
1038     printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1039                (int) rseq_gettid(), nr_abort, signals_delivered);
1040     if (!opt_disable_rseq && rseq_unregister_current_thread())
1041         abort();
1042 
1043     return NULL;
1044 }
1045 
1046 /* Simultaneous modification to a per-cpu buffer from many threads.  */
1047 void test_percpu_memcpy_buffer(void)
1048 {
1049     const int num_threads = opt_threads;
1050     int i, j, ret;
1051     uint64_t sum = 0, expected_sum = 0;
1052     struct percpu_memcpy_buffer buffer;
1053     pthread_t test_threads[num_threads];
1054     cpu_set_t allowed_cpus;
1055 
1056     memset(&buffer, 0, sizeof(buffer));
1057 
1058     /* Generate list entries for every usable cpu. */
1059     sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1060     for (i = 0; i < CPU_SETSIZE; i++) {
1061         if (!CPU_ISSET(i, &allowed_cpus))
1062             continue;
1063         /* Worse-case is every item in same CPU. */
1064         buffer.c[i].array =
1065             malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1066                    MEMCPY_BUFFER_ITEM_PER_CPU);
1067         assert(buffer.c[i].array);
1068         buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1069         for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1070             expected_sum += 2 * j + 1;
1071 
1072             /*
1073              * We could theoretically put the word-sized
1074              * "data" directly in the buffer. However, we
1075              * want to model objects that would not fit
1076              * within a single word, so allocate an object
1077              * for each node.
1078              */
1079             buffer.c[i].array[j - 1].data1 = j;
1080             buffer.c[i].array[j - 1].data2 = j + 1;
1081             buffer.c[i].offset++;
1082         }
1083     }
1084 
1085     for (i = 0; i < num_threads; i++) {
1086         ret = pthread_create(&test_threads[i], NULL,
1087                      test_percpu_memcpy_buffer_thread,
1088                      &buffer);
1089         if (ret) {
1090             errno = ret;
1091             perror("pthread_create");
1092             abort();
1093         }
1094     }
1095 
1096     for (i = 0; i < num_threads; i++) {
1097         ret = pthread_join(test_threads[i], NULL);
1098         if (ret) {
1099             errno = ret;
1100             perror("pthread_join");
1101             abort();
1102         }
1103     }
1104 
1105     for (i = 0; i < CPU_SETSIZE; i++) {
1106         struct percpu_memcpy_buffer_node item;
1107 
1108         if (!CPU_ISSET(i, &allowed_cpus))
1109             continue;
1110 
1111         while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1112             sum += item.data1;
1113             sum += item.data2;
1114         }
1115         free(buffer.c[i].array);
1116     }
1117 
1118     /*
1119      * All entries should now be accounted for (unless some external
1120      * actor is interfering with our allowed affinity while this
1121      * test is running).
1122      */
1123     assert(sum == expected_sum);
1124 }
1125 
1126 static void test_signal_interrupt_handler(int signo)
1127 {
1128     signals_delivered++;
1129 }
1130 
1131 static int set_signal_handler(void)
1132 {
1133     int ret = 0;
1134     struct sigaction sa;
1135     sigset_t sigset;
1136 
1137     ret = sigemptyset(&sigset);
1138     if (ret < 0) {
1139         perror("sigemptyset");
1140         return ret;
1141     }
1142 
1143     sa.sa_handler = test_signal_interrupt_handler;
1144     sa.sa_mask = sigset;
1145     sa.sa_flags = 0;
1146     ret = sigaction(SIGUSR1, &sa, NULL);
1147     if (ret < 0) {
1148         perror("sigaction");
1149         return ret;
1150     }
1151 
1152     printf_verbose("Signal handler set for SIGUSR1\n");
1153 
1154     return ret;
1155 }
1156 
1157 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1158 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1159 struct test_membarrier_thread_args {
1160     int stop;
1161     intptr_t percpu_list_ptr;
1162 };
1163 
1164 /* Worker threads modify data in their "active" percpu lists. */
1165 void *test_membarrier_worker_thread(void *arg)
1166 {
1167     struct test_membarrier_thread_args *args =
1168         (struct test_membarrier_thread_args *)arg;
1169     const int iters = opt_reps;
1170     int i;
1171 
1172     if (rseq_register_current_thread()) {
1173         fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1174             errno, strerror(errno));
1175         abort();
1176     }
1177 
1178     /* Wait for initialization. */
1179     while (!atomic_load(&args->percpu_list_ptr)) {}
1180 
1181     for (i = 0; i < iters; ++i) {
1182         int ret;
1183 
1184         do {
1185             int cpu = rseq_cpu_start();
1186 
1187             ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1188                 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1189         } while (rseq_unlikely(ret));
1190     }
1191 
1192     if (rseq_unregister_current_thread()) {
1193         fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1194             errno, strerror(errno));
1195         abort();
1196     }
1197     return NULL;
1198 }
1199 
1200 void test_membarrier_init_percpu_list(struct percpu_list *list)
1201 {
1202     int i;
1203 
1204     memset(list, 0, sizeof(*list));
1205     for (i = 0; i < CPU_SETSIZE; i++) {
1206         struct percpu_list_node *node;
1207 
1208         node = malloc(sizeof(*node));
1209         assert(node);
1210         node->data = 0;
1211         node->next = NULL;
1212         list->c[i].head = node;
1213     }
1214 }
1215 
1216 void test_membarrier_free_percpu_list(struct percpu_list *list)
1217 {
1218     int i;
1219 
1220     for (i = 0; i < CPU_SETSIZE; i++)
1221         free(list->c[i].head);
1222 }
1223 
1224 static int sys_membarrier(int cmd, int flags, int cpu_id)
1225 {
1226     return syscall(__NR_membarrier, cmd, flags, cpu_id);
1227 }
1228 
1229 /*
1230  * The manager thread swaps per-cpu lists that worker threads see,
1231  * and validates that there are no unexpected modifications.
1232  */
1233 void *test_membarrier_manager_thread(void *arg)
1234 {
1235     struct test_membarrier_thread_args *args =
1236         (struct test_membarrier_thread_args *)arg;
1237     struct percpu_list list_a, list_b;
1238     intptr_t expect_a = 0, expect_b = 0;
1239     int cpu_a = 0, cpu_b = 0;
1240 
1241     if (rseq_register_current_thread()) {
1242         fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1243             errno, strerror(errno));
1244         abort();
1245     }
1246 
1247     /* Init lists. */
1248     test_membarrier_init_percpu_list(&list_a);
1249     test_membarrier_init_percpu_list(&list_b);
1250 
1251     atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1252 
1253     while (!atomic_load(&args->stop)) {
1254         /* list_a is "active". */
1255         cpu_a = rand() % CPU_SETSIZE;
1256         /*
1257          * As list_b is "inactive", we should never see changes
1258          * to list_b.
1259          */
1260         if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1261             fprintf(stderr, "Membarrier test failed\n");
1262             abort();
1263         }
1264 
1265         /* Make list_b "active". */
1266         atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1267         if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1268                     MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1269                 errno != ENXIO /* missing CPU */) {
1270             perror("sys_membarrier");
1271             abort();
1272         }
1273         /*
1274          * Cpu A should now only modify list_b, so the values
1275          * in list_a should be stable.
1276          */
1277         expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1278 
1279         cpu_b = rand() % CPU_SETSIZE;
1280         /*
1281          * As list_a is "inactive", we should never see changes
1282          * to list_a.
1283          */
1284         if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1285             fprintf(stderr, "Membarrier test failed\n");
1286             abort();
1287         }
1288 
1289         /* Make list_a "active". */
1290         atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1291         if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1292                     MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1293                 errno != ENXIO /* missing CPU*/) {
1294             perror("sys_membarrier");
1295             abort();
1296         }
1297         /* Remember a value from list_b. */
1298         expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1299     }
1300 
1301     test_membarrier_free_percpu_list(&list_a);
1302     test_membarrier_free_percpu_list(&list_b);
1303 
1304     if (rseq_unregister_current_thread()) {
1305         fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1306             errno, strerror(errno));
1307         abort();
1308     }
1309     return NULL;
1310 }
1311 
1312 void test_membarrier(void)
1313 {
1314     const int num_threads = opt_threads;
1315     struct test_membarrier_thread_args thread_args;
1316     pthread_t worker_threads[num_threads];
1317     pthread_t manager_thread;
1318     int i, ret;
1319 
1320     if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1321         perror("sys_membarrier");
1322         abort();
1323     }
1324 
1325     thread_args.stop = 0;
1326     thread_args.percpu_list_ptr = 0;
1327     ret = pthread_create(&manager_thread, NULL,
1328             test_membarrier_manager_thread, &thread_args);
1329     if (ret) {
1330         errno = ret;
1331         perror("pthread_create");
1332         abort();
1333     }
1334 
1335     for (i = 0; i < num_threads; i++) {
1336         ret = pthread_create(&worker_threads[i], NULL,
1337                 test_membarrier_worker_thread, &thread_args);
1338         if (ret) {
1339             errno = ret;
1340             perror("pthread_create");
1341             abort();
1342         }
1343     }
1344 
1345 
1346     for (i = 0; i < num_threads; i++) {
1347         ret = pthread_join(worker_threads[i], NULL);
1348         if (ret) {
1349             errno = ret;
1350             perror("pthread_join");
1351             abort();
1352         }
1353     }
1354 
1355     atomic_store(&thread_args.stop, 1);
1356     ret = pthread_join(manager_thread, NULL);
1357     if (ret) {
1358         errno = ret;
1359         perror("pthread_join");
1360         abort();
1361     }
1362 }
1363 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1364 void test_membarrier(void)
1365 {
1366     fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1367             "Skipping membarrier test.\n");
1368 }
1369 #endif
1370 
1371 static void show_usage(int argc, char **argv)
1372 {
1373     printf("Usage : %s <OPTIONS>\n",
1374         argv[0]);
1375     printf("OPTIONS:\n");
1376     printf("    [-1 loops] Number of loops for delay injection 1\n");
1377     printf("    [-2 loops] Number of loops for delay injection 2\n");
1378     printf("    [-3 loops] Number of loops for delay injection 3\n");
1379     printf("    [-4 loops] Number of loops for delay injection 4\n");
1380     printf("    [-5 loops] Number of loops for delay injection 5\n");
1381     printf("    [-6 loops] Number of loops for delay injection 6\n");
1382     printf("    [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1383     printf("    [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1384     printf("    [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1385     printf("    [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1386     printf("    [-y] Yield\n");
1387     printf("    [-k] Kill thread with signal\n");
1388     printf("    [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1389     printf("    [-t N] Number of threads (default 200)\n");
1390     printf("    [-r N] Number of repetitions per thread (default 5000)\n");
1391     printf("    [-d] Disable rseq system call (no initialization)\n");
1392     printf("    [-D M] Disable rseq for each M threads\n");
1393     printf("    [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1394     printf("    [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1395     printf("    [-v] Verbose output.\n");
1396     printf("    [-h] Show this help.\n");
1397     printf("\n");
1398 }
1399 
1400 int main(int argc, char **argv)
1401 {
1402     int i;
1403 
1404     for (i = 1; i < argc; i++) {
1405         if (argv[i][0] != '-')
1406             continue;
1407         switch (argv[i][1]) {
1408         case '1':
1409         case '2':
1410         case '3':
1411         case '4':
1412         case '5':
1413         case '6':
1414         case '7':
1415         case '8':
1416         case '9':
1417             if (argc < i + 2) {
1418                 show_usage(argc, argv);
1419                 goto error;
1420             }
1421             loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1422             i++;
1423             break;
1424         case 'm':
1425             if (argc < i + 2) {
1426                 show_usage(argc, argv);
1427                 goto error;
1428             }
1429             opt_modulo = atol(argv[i + 1]);
1430             if (opt_modulo < 0) {
1431                 show_usage(argc, argv);
1432                 goto error;
1433             }
1434             i++;
1435             break;
1436         case 's':
1437             if (argc < i + 2) {
1438                 show_usage(argc, argv);
1439                 goto error;
1440             }
1441             opt_sleep = atol(argv[i + 1]);
1442             if (opt_sleep < 0) {
1443                 show_usage(argc, argv);
1444                 goto error;
1445             }
1446             i++;
1447             break;
1448         case 'y':
1449             opt_yield = 1;
1450             break;
1451         case 'k':
1452             opt_signal = 1;
1453             break;
1454         case 'd':
1455             opt_disable_rseq = 1;
1456             break;
1457         case 'D':
1458             if (argc < i + 2) {
1459                 show_usage(argc, argv);
1460                 goto error;
1461             }
1462             opt_disable_mod = atol(argv[i + 1]);
1463             if (opt_disable_mod < 0) {
1464                 show_usage(argc, argv);
1465                 goto error;
1466             }
1467             i++;
1468             break;
1469         case 't':
1470             if (argc < i + 2) {
1471                 show_usage(argc, argv);
1472                 goto error;
1473             }
1474             opt_threads = atol(argv[i + 1]);
1475             if (opt_threads < 0) {
1476                 show_usage(argc, argv);
1477                 goto error;
1478             }
1479             i++;
1480             break;
1481         case 'r':
1482             if (argc < i + 2) {
1483                 show_usage(argc, argv);
1484                 goto error;
1485             }
1486             opt_reps = atoll(argv[i + 1]);
1487             if (opt_reps < 0) {
1488                 show_usage(argc, argv);
1489                 goto error;
1490             }
1491             i++;
1492             break;
1493         case 'h':
1494             show_usage(argc, argv);
1495             goto end;
1496         case 'T':
1497             if (argc < i + 2) {
1498                 show_usage(argc, argv);
1499                 goto error;
1500             }
1501             opt_test = *argv[i + 1];
1502             switch (opt_test) {
1503             case 's':
1504             case 'l':
1505             case 'i':
1506             case 'b':
1507             case 'm':
1508             case 'r':
1509                 break;
1510             default:
1511                 show_usage(argc, argv);
1512                 goto error;
1513             }
1514             i++;
1515             break;
1516         case 'v':
1517             verbose = 1;
1518             break;
1519         case 'M':
1520             opt_mb = 1;
1521             break;
1522         default:
1523             show_usage(argc, argv);
1524             goto error;
1525         }
1526     }
1527 
1528     loop_cnt_1 = loop_cnt[1];
1529     loop_cnt_2 = loop_cnt[2];
1530     loop_cnt_3 = loop_cnt[3];
1531     loop_cnt_4 = loop_cnt[4];
1532     loop_cnt_5 = loop_cnt[5];
1533     loop_cnt_6 = loop_cnt[6];
1534 
1535     if (set_signal_handler())
1536         goto error;
1537 
1538     if (!opt_disable_rseq && rseq_register_current_thread())
1539         goto error;
1540     switch (opt_test) {
1541     case 's':
1542         printf_verbose("spinlock\n");
1543         test_percpu_spinlock();
1544         break;
1545     case 'l':
1546         printf_verbose("linked list\n");
1547         test_percpu_list();
1548         break;
1549     case 'b':
1550         printf_verbose("buffer\n");
1551         test_percpu_buffer();
1552         break;
1553     case 'm':
1554         printf_verbose("memcpy buffer\n");
1555         test_percpu_memcpy_buffer();
1556         break;
1557     case 'i':
1558         printf_verbose("counter increment\n");
1559         test_percpu_inc();
1560         break;
1561     case 'r':
1562         printf_verbose("membarrier\n");
1563         test_membarrier();
1564         break;
1565     }
1566     if (!opt_disable_rseq && rseq_unregister_current_thread())
1567         abort();
1568 end:
1569     return 0;
1570 
1571 error:
1572     return -1;
1573 }