Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Test module for stress and analyze performance of vmalloc allocator.
0005  * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
0006  */
0007 #include <linux/init.h>
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/vmalloc.h>
0011 #include <linux/random.h>
0012 #include <linux/kthread.h>
0013 #include <linux/moduleparam.h>
0014 #include <linux/completion.h>
0015 #include <linux/delay.h>
0016 #include <linux/rwsem.h>
0017 #include <linux/mm.h>
0018 #include <linux/rcupdate.h>
0019 #include <linux/slab.h>
0020 
0021 #define __param(type, name, init, msg)      \
0022     static type name = init;                \
0023     module_param(name, type, 0444);         \
0024     MODULE_PARM_DESC(name, msg)             \
0025 
0026 __param(int, nr_threads, 0,
0027     "Number of workers to perform tests(min: 1 max: USHRT_MAX)");
0028 
0029 __param(bool, sequential_test_order, false,
0030     "Use sequential stress tests order");
0031 
0032 __param(int, test_repeat_count, 1,
0033     "Set test repeat counter");
0034 
0035 __param(int, test_loop_count, 1000000,
0036     "Set test loop counter");
0037 
0038 __param(int, nr_pages, 0,
0039     "Set number of pages for fix_size_alloc_test(default: 1)");
0040 
0041 __param(int, run_test_mask, INT_MAX,
0042     "Set tests specified in the mask.\n\n"
0043         "\t\tid: 1,    name: fix_size_alloc_test\n"
0044         "\t\tid: 2,    name: full_fit_alloc_test\n"
0045         "\t\tid: 4,    name: long_busy_list_alloc_test\n"
0046         "\t\tid: 8,    name: random_size_alloc_test\n"
0047         "\t\tid: 16,   name: fix_align_alloc_test\n"
0048         "\t\tid: 32,   name: random_size_align_alloc_test\n"
0049         "\t\tid: 64,   name: align_shift_alloc_test\n"
0050         "\t\tid: 128,  name: pcpu_alloc_test\n"
0051         "\t\tid: 256,  name: kvfree_rcu_1_arg_vmalloc_test\n"
0052         "\t\tid: 512,  name: kvfree_rcu_2_arg_vmalloc_test\n"
0053         /* Add a new test case description here. */
0054 );
0055 
0056 /*
0057  * Read write semaphore for synchronization of setup
0058  * phase that is done in main thread and workers.
0059  */
0060 static DECLARE_RWSEM(prepare_for_test_rwsem);
0061 
0062 /*
0063  * Completion tracking for worker threads.
0064  */
0065 static DECLARE_COMPLETION(test_all_done_comp);
0066 static atomic_t test_n_undone = ATOMIC_INIT(0);
0067 
0068 static inline void
0069 test_report_one_done(void)
0070 {
0071     if (atomic_dec_and_test(&test_n_undone))
0072         complete(&test_all_done_comp);
0073 }
0074 
0075 static int random_size_align_alloc_test(void)
0076 {
0077     unsigned long size, align;
0078     unsigned int rnd;
0079     void *ptr;
0080     int i;
0081 
0082     for (i = 0; i < test_loop_count; i++) {
0083         rnd = prandom_u32();
0084 
0085         /*
0086          * Maximum 1024 pages, if PAGE_SIZE is 4096.
0087          */
0088         align = 1 << (rnd % 23);
0089 
0090         /*
0091          * Maximum 10 pages.
0092          */
0093         size = ((rnd % 10) + 1) * PAGE_SIZE;
0094 
0095         ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
0096                 __builtin_return_address(0));
0097         if (!ptr)
0098             return -1;
0099 
0100         vfree(ptr);
0101     }
0102 
0103     return 0;
0104 }
0105 
0106 /*
0107  * This test case is supposed to be failed.
0108  */
0109 static int align_shift_alloc_test(void)
0110 {
0111     unsigned long align;
0112     void *ptr;
0113     int i;
0114 
0115     for (i = 0; i < BITS_PER_LONG; i++) {
0116         align = ((unsigned long) 1) << i;
0117 
0118         ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
0119                 __builtin_return_address(0));
0120         if (!ptr)
0121             return -1;
0122 
0123         vfree(ptr);
0124     }
0125 
0126     return 0;
0127 }
0128 
0129 static int fix_align_alloc_test(void)
0130 {
0131     void *ptr;
0132     int i;
0133 
0134     for (i = 0; i < test_loop_count; i++) {
0135         ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
0136                 GFP_KERNEL | __GFP_ZERO, 0,
0137                 __builtin_return_address(0));
0138         if (!ptr)
0139             return -1;
0140 
0141         vfree(ptr);
0142     }
0143 
0144     return 0;
0145 }
0146 
0147 static int random_size_alloc_test(void)
0148 {
0149     unsigned int n;
0150     void *p;
0151     int i;
0152 
0153     for (i = 0; i < test_loop_count; i++) {
0154         n = prandom_u32();
0155         n = (n % 100) + 1;
0156 
0157         p = vmalloc(n * PAGE_SIZE);
0158 
0159         if (!p)
0160             return -1;
0161 
0162         *((__u8 *)p) = 1;
0163         vfree(p);
0164     }
0165 
0166     return 0;
0167 }
0168 
0169 static int long_busy_list_alloc_test(void)
0170 {
0171     void *ptr_1, *ptr_2;
0172     void **ptr;
0173     int rv = -1;
0174     int i;
0175 
0176     ptr = vmalloc(sizeof(void *) * 15000);
0177     if (!ptr)
0178         return rv;
0179 
0180     for (i = 0; i < 15000; i++)
0181         ptr[i] = vmalloc(1 * PAGE_SIZE);
0182 
0183     for (i = 0; i < test_loop_count; i++) {
0184         ptr_1 = vmalloc(100 * PAGE_SIZE);
0185         if (!ptr_1)
0186             goto leave;
0187 
0188         ptr_2 = vmalloc(1 * PAGE_SIZE);
0189         if (!ptr_2) {
0190             vfree(ptr_1);
0191             goto leave;
0192         }
0193 
0194         *((__u8 *)ptr_1) = 0;
0195         *((__u8 *)ptr_2) = 1;
0196 
0197         vfree(ptr_1);
0198         vfree(ptr_2);
0199     }
0200 
0201     /*  Success */
0202     rv = 0;
0203 
0204 leave:
0205     for (i = 0; i < 15000; i++)
0206         vfree(ptr[i]);
0207 
0208     vfree(ptr);
0209     return rv;
0210 }
0211 
0212 static int full_fit_alloc_test(void)
0213 {
0214     void **ptr, **junk_ptr, *tmp;
0215     int junk_length;
0216     int rv = -1;
0217     int i;
0218 
0219     junk_length = fls(num_online_cpus());
0220     junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
0221 
0222     ptr = vmalloc(sizeof(void *) * junk_length);
0223     if (!ptr)
0224         return rv;
0225 
0226     junk_ptr = vmalloc(sizeof(void *) * junk_length);
0227     if (!junk_ptr) {
0228         vfree(ptr);
0229         return rv;
0230     }
0231 
0232     for (i = 0; i < junk_length; i++) {
0233         ptr[i] = vmalloc(1 * PAGE_SIZE);
0234         junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
0235     }
0236 
0237     for (i = 0; i < junk_length; i++)
0238         vfree(junk_ptr[i]);
0239 
0240     for (i = 0; i < test_loop_count; i++) {
0241         tmp = vmalloc(1 * PAGE_SIZE);
0242 
0243         if (!tmp)
0244             goto error;
0245 
0246         *((__u8 *)tmp) = 1;
0247         vfree(tmp);
0248     }
0249 
0250     /* Success */
0251     rv = 0;
0252 
0253 error:
0254     for (i = 0; i < junk_length; i++)
0255         vfree(ptr[i]);
0256 
0257     vfree(ptr);
0258     vfree(junk_ptr);
0259 
0260     return rv;
0261 }
0262 
0263 static int fix_size_alloc_test(void)
0264 {
0265     void *ptr;
0266     int i;
0267 
0268     for (i = 0; i < test_loop_count; i++) {
0269         ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE);
0270 
0271         if (!ptr)
0272             return -1;
0273 
0274         *((__u8 *)ptr) = 0;
0275 
0276         vfree(ptr);
0277     }
0278 
0279     return 0;
0280 }
0281 
0282 static int
0283 pcpu_alloc_test(void)
0284 {
0285     int rv = 0;
0286 #ifndef CONFIG_NEED_PER_CPU_KM
0287     void __percpu **pcpu;
0288     size_t size, align;
0289     int i;
0290 
0291     pcpu = vmalloc(sizeof(void __percpu *) * 35000);
0292     if (!pcpu)
0293         return -1;
0294 
0295     for (i = 0; i < 35000; i++) {
0296         unsigned int r;
0297 
0298         r = prandom_u32();
0299         size = (r % (PAGE_SIZE / 4)) + 1;
0300 
0301         /*
0302          * Maximum PAGE_SIZE
0303          */
0304         r = prandom_u32();
0305         align = 1 << ((r % 11) + 1);
0306 
0307         pcpu[i] = __alloc_percpu(size, align);
0308         if (!pcpu[i])
0309             rv = -1;
0310     }
0311 
0312     for (i = 0; i < 35000; i++)
0313         free_percpu(pcpu[i]);
0314 
0315     vfree(pcpu);
0316 #endif
0317     return rv;
0318 }
0319 
0320 struct test_kvfree_rcu {
0321     struct rcu_head rcu;
0322     unsigned char array[20];
0323 };
0324 
0325 static int
0326 kvfree_rcu_1_arg_vmalloc_test(void)
0327 {
0328     struct test_kvfree_rcu *p;
0329     int i;
0330 
0331     for (i = 0; i < test_loop_count; i++) {
0332         p = vmalloc(1 * PAGE_SIZE);
0333         if (!p)
0334             return -1;
0335 
0336         p->array[0] = 'a';
0337         kvfree_rcu(p);
0338     }
0339 
0340     return 0;
0341 }
0342 
0343 static int
0344 kvfree_rcu_2_arg_vmalloc_test(void)
0345 {
0346     struct test_kvfree_rcu *p;
0347     int i;
0348 
0349     for (i = 0; i < test_loop_count; i++) {
0350         p = vmalloc(1 * PAGE_SIZE);
0351         if (!p)
0352             return -1;
0353 
0354         p->array[0] = 'a';
0355         kvfree_rcu(p, rcu);
0356     }
0357 
0358     return 0;
0359 }
0360 
0361 struct test_case_desc {
0362     const char *test_name;
0363     int (*test_func)(void);
0364 };
0365 
0366 static struct test_case_desc test_case_array[] = {
0367     { "fix_size_alloc_test", fix_size_alloc_test },
0368     { "full_fit_alloc_test", full_fit_alloc_test },
0369     { "long_busy_list_alloc_test", long_busy_list_alloc_test },
0370     { "random_size_alloc_test", random_size_alloc_test },
0371     { "fix_align_alloc_test", fix_align_alloc_test },
0372     { "random_size_align_alloc_test", random_size_align_alloc_test },
0373     { "align_shift_alloc_test", align_shift_alloc_test },
0374     { "pcpu_alloc_test", pcpu_alloc_test },
0375     { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
0376     { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
0377     /* Add a new test case here. */
0378 };
0379 
0380 struct test_case_data {
0381     int test_failed;
0382     int test_passed;
0383     u64 time;
0384 };
0385 
0386 static struct test_driver {
0387     struct task_struct *task;
0388     struct test_case_data data[ARRAY_SIZE(test_case_array)];
0389 
0390     unsigned long start;
0391     unsigned long stop;
0392 } *tdriver;
0393 
0394 static void shuffle_array(int *arr, int n)
0395 {
0396     unsigned int rnd;
0397     int i, j;
0398 
0399     for (i = n - 1; i > 0; i--)  {
0400         rnd = prandom_u32();
0401 
0402         /* Cut the range. */
0403         j = rnd % i;
0404 
0405         /* Swap indexes. */
0406         swap(arr[i], arr[j]);
0407     }
0408 }
0409 
0410 static int test_func(void *private)
0411 {
0412     struct test_driver *t = private;
0413     int random_array[ARRAY_SIZE(test_case_array)];
0414     int index, i, j;
0415     ktime_t kt;
0416     u64 delta;
0417 
0418     for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
0419         random_array[i] = i;
0420 
0421     if (!sequential_test_order)
0422         shuffle_array(random_array, ARRAY_SIZE(test_case_array));
0423 
0424     /*
0425      * Block until initialization is done.
0426      */
0427     down_read(&prepare_for_test_rwsem);
0428 
0429     t->start = get_cycles();
0430     for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
0431         index = random_array[i];
0432 
0433         /*
0434          * Skip tests if run_test_mask has been specified.
0435          */
0436         if (!((run_test_mask & (1 << index)) >> index))
0437             continue;
0438 
0439         kt = ktime_get();
0440         for (j = 0; j < test_repeat_count; j++) {
0441             if (!test_case_array[index].test_func())
0442                 t->data[index].test_passed++;
0443             else
0444                 t->data[index].test_failed++;
0445         }
0446 
0447         /*
0448          * Take an average time that test took.
0449          */
0450         delta = (u64) ktime_us_delta(ktime_get(), kt);
0451         do_div(delta, (u32) test_repeat_count);
0452 
0453         t->data[index].time = delta;
0454     }
0455     t->stop = get_cycles();
0456 
0457     up_read(&prepare_for_test_rwsem);
0458     test_report_one_done();
0459 
0460     /*
0461      * Wait for the kthread_stop() call.
0462      */
0463     while (!kthread_should_stop())
0464         msleep(10);
0465 
0466     return 0;
0467 }
0468 
0469 static int
0470 init_test_configurtion(void)
0471 {
0472     /*
0473      * A maximum number of workers is defined as hard-coded
0474      * value and set to USHRT_MAX. We add such gap just in
0475      * case and for potential heavy stressing.
0476      */
0477     nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX);
0478 
0479     /* Allocate the space for test instances. */
0480     tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL);
0481     if (tdriver == NULL)
0482         return -1;
0483 
0484     if (test_repeat_count <= 0)
0485         test_repeat_count = 1;
0486 
0487     if (test_loop_count <= 0)
0488         test_loop_count = 1;
0489 
0490     return 0;
0491 }
0492 
0493 static void do_concurrent_test(void)
0494 {
0495     int i, ret;
0496 
0497     /*
0498      * Set some basic configurations plus sanity check.
0499      */
0500     ret = init_test_configurtion();
0501     if (ret < 0)
0502         return;
0503 
0504     /*
0505      * Put on hold all workers.
0506      */
0507     down_write(&prepare_for_test_rwsem);
0508 
0509     for (i = 0; i < nr_threads; i++) {
0510         struct test_driver *t = &tdriver[i];
0511 
0512         t->task = kthread_run(test_func, t, "vmalloc_test/%d", i);
0513 
0514         if (!IS_ERR(t->task))
0515             /* Success. */
0516             atomic_inc(&test_n_undone);
0517         else
0518             pr_err("Failed to start %d kthread\n", i);
0519     }
0520 
0521     /*
0522      * Now let the workers do their job.
0523      */
0524     up_write(&prepare_for_test_rwsem);
0525 
0526     /*
0527      * Sleep quiet until all workers are done with 1 second
0528      * interval. Since the test can take a lot of time we
0529      * can run into a stack trace of the hung task. That is
0530      * why we go with completion_timeout and HZ value.
0531      */
0532     do {
0533         ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
0534     } while (!ret);
0535 
0536     for (i = 0; i < nr_threads; i++) {
0537         struct test_driver *t = &tdriver[i];
0538         int j;
0539 
0540         if (!IS_ERR(t->task))
0541             kthread_stop(t->task);
0542 
0543         for (j = 0; j < ARRAY_SIZE(test_case_array); j++) {
0544             if (!((run_test_mask & (1 << j)) >> j))
0545                 continue;
0546 
0547             pr_info(
0548                 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
0549                 test_case_array[j].test_name,
0550                 t->data[j].test_passed,
0551                 t->data[j].test_failed,
0552                 test_repeat_count, test_loop_count,
0553                 t->data[j].time);
0554         }
0555 
0556         pr_info("All test took worker%d=%lu cycles\n",
0557             i, t->stop - t->start);
0558     }
0559 
0560     kvfree(tdriver);
0561 }
0562 
0563 static int vmalloc_test_init(void)
0564 {
0565     do_concurrent_test();
0566     return -EAGAIN; /* Fail will directly unload the module */
0567 }
0568 
0569 static void vmalloc_test_exit(void)
0570 {
0571 }
0572 
0573 module_init(vmalloc_test_init)
0574 module_exit(vmalloc_test_exit)
0575 
0576 MODULE_LICENSE("GPL");
0577 MODULE_AUTHOR("Uladzislau Rezki");
0578 MODULE_DESCRIPTION("vmalloc test module");