Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * mem-memcpy.c
0004  *
0005  * Simple memcpy() and memset() benchmarks
0006  *
0007  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
0008  */
0009 
0010 #include "debug.h"
0011 #include "../perf-sys.h"
0012 #include <subcmd/parse-options.h>
0013 #include "../util/header.h"
0014 #include "../util/cloexec.h"
0015 #include "../util/string2.h"
0016 #include "bench.h"
0017 #include "mem-memcpy-arch.h"
0018 #include "mem-memset-arch.h"
0019 
0020 #include <stdio.h>
0021 #include <stdlib.h>
0022 #include <string.h>
0023 #include <unistd.h>
0024 #include <sys/time.h>
0025 #include <errno.h>
0026 #include <linux/time64.h>
0027 #include <linux/zalloc.h>
0028 
0029 #define K 1024
0030 
0031 static const char   *size_str   = "1MB";
0032 static const char   *function_str   = "all";
0033 static int      nr_loops    = 1;
0034 static bool     use_cycles;
0035 static int      cycles_fd;
0036 
0037 static const struct option options[] = {
0038     OPT_STRING('s', "size", &size_str, "1MB",
0039             "Specify the size of the memory buffers. "
0040             "Available units: B, KB, MB, GB and TB (case insensitive)"),
0041 
0042     OPT_STRING('f', "function", &function_str, "all",
0043             "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
0044 
0045     OPT_INTEGER('l', "nr_loops", &nr_loops,
0046             "Specify the number of loops to run. (default: 1)"),
0047 
0048     OPT_BOOLEAN('c', "cycles", &use_cycles,
0049             "Use a cycles event instead of gettimeofday() to measure performance"),
0050 
0051     OPT_END()
0052 };
0053 
0054 typedef void *(*memcpy_t)(void *, const void *, size_t);
0055 typedef void *(*memset_t)(void *, int, size_t);
0056 
0057 struct function {
0058     const char *name;
0059     const char *desc;
0060     union {
0061         memcpy_t memcpy;
0062         memset_t memset;
0063     } fn;
0064 };
0065 
0066 static struct perf_event_attr cycle_attr = {
0067     .type       = PERF_TYPE_HARDWARE,
0068     .config     = PERF_COUNT_HW_CPU_CYCLES
0069 };
0070 
0071 static int init_cycles(void)
0072 {
0073     cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
0074 
0075     if (cycles_fd < 0 && errno == ENOSYS) {
0076         pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
0077         return -1;
0078     }
0079 
0080     return cycles_fd;
0081 }
0082 
0083 static u64 get_cycles(void)
0084 {
0085     int ret;
0086     u64 clk;
0087 
0088     ret = read(cycles_fd, &clk, sizeof(u64));
0089     BUG_ON(ret != sizeof(u64));
0090 
0091     return clk;
0092 }
0093 
0094 static double timeval2double(struct timeval *ts)
0095 {
0096     return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
0097 }
0098 
0099 #define print_bps(x) do {                       \
0100         if (x < K)                      \
0101             printf(" %14lf bytes/sec\n", x);        \
0102         else if (x < K * K)                 \
0103             printf(" %14lfd KB/sec\n", x / K);      \
0104         else if (x < K * K * K)                 \
0105             printf(" %14lf MB/sec\n", x / K / K);       \
0106         else                            \
0107             printf(" %14lf GB/sec\n", x / K / K / K);   \
0108     } while (0)
0109 
0110 struct bench_mem_info {
0111     const struct function *functions;
0112     u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
0113     double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
0114     const char *const *usage;
0115     bool alloc_src;
0116 };
0117 
0118 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
0119 {
0120     const struct function *r = &info->functions[r_idx];
0121     double result_bps = 0.0;
0122     u64 result_cycles = 0;
0123     void *src = NULL, *dst = zalloc(size);
0124 
0125     printf("# function '%s' (%s)\n", r->name, r->desc);
0126 
0127     if (dst == NULL)
0128         goto out_alloc_failed;
0129 
0130     if (info->alloc_src) {
0131         src = zalloc(size);
0132         if (src == NULL)
0133             goto out_alloc_failed;
0134     }
0135 
0136     if (bench_format == BENCH_FORMAT_DEFAULT)
0137         printf("# Copying %s bytes ...\n\n", size_str);
0138 
0139     if (use_cycles) {
0140         result_cycles = info->do_cycles(r, size, src, dst);
0141     } else {
0142         result_bps = info->do_gettimeofday(r, size, src, dst);
0143     }
0144 
0145     switch (bench_format) {
0146     case BENCH_FORMAT_DEFAULT:
0147         if (use_cycles) {
0148             printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
0149         } else {
0150             print_bps(result_bps);
0151         }
0152         break;
0153 
0154     case BENCH_FORMAT_SIMPLE:
0155         if (use_cycles) {
0156             printf("%lf\n", (double)result_cycles/size_total);
0157         } else {
0158             printf("%lf\n", result_bps);
0159         }
0160         break;
0161 
0162     default:
0163         BUG_ON(1);
0164         break;
0165     }
0166 
0167 out_free:
0168     free(src);
0169     free(dst);
0170     return;
0171 out_alloc_failed:
0172     printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
0173     goto out_free;
0174 }
0175 
0176 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
0177 {
0178     int i;
0179     size_t size;
0180     double size_total;
0181 
0182     argc = parse_options(argc, argv, options, info->usage, 0);
0183 
0184     if (use_cycles) {
0185         i = init_cycles();
0186         if (i < 0) {
0187             fprintf(stderr, "Failed to open cycles counter\n");
0188             return i;
0189         }
0190     }
0191 
0192     size = (size_t)perf_atoll((char *)size_str);
0193     size_total = (double)size * nr_loops;
0194 
0195     if ((s64)size <= 0) {
0196         fprintf(stderr, "Invalid size:%s\n", size_str);
0197         return 1;
0198     }
0199 
0200     if (!strncmp(function_str, "all", 3)) {
0201         for (i = 0; info->functions[i].name; i++)
0202             __bench_mem_function(info, i, size, size_total);
0203         return 0;
0204     }
0205 
0206     for (i = 0; info->functions[i].name; i++) {
0207         if (!strcmp(info->functions[i].name, function_str))
0208             break;
0209     }
0210     if (!info->functions[i].name) {
0211         if (strcmp(function_str, "help") && strcmp(function_str, "h"))
0212             printf("Unknown function: %s\n", function_str);
0213         printf("Available functions:\n");
0214         for (i = 0; info->functions[i].name; i++) {
0215             printf("\t%s ... %s\n",
0216                    info->functions[i].name, info->functions[i].desc);
0217         }
0218         return 1;
0219     }
0220 
0221     __bench_mem_function(info, i, size, size_total);
0222 
0223     return 0;
0224 }
0225 
0226 static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
0227 {
0228     /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
0229     memset(src, 0, size);
0230 
0231     /*
0232      * We prefault the freshly allocated memory range here,
0233      * to not measure page fault overhead:
0234      */
0235     fn(dst, src, size);
0236 }
0237 
0238 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
0239 {
0240     u64 cycle_start = 0ULL, cycle_end = 0ULL;
0241     memcpy_t fn = r->fn.memcpy;
0242     int i;
0243 
0244     memcpy_prefault(fn, size, src, dst);
0245 
0246     cycle_start = get_cycles();
0247     for (i = 0; i < nr_loops; ++i)
0248         fn(dst, src, size);
0249     cycle_end = get_cycles();
0250 
0251     return cycle_end - cycle_start;
0252 }
0253 
0254 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
0255 {
0256     struct timeval tv_start, tv_end, tv_diff;
0257     memcpy_t fn = r->fn.memcpy;
0258     int i;
0259 
0260     memcpy_prefault(fn, size, src, dst);
0261 
0262     BUG_ON(gettimeofday(&tv_start, NULL));
0263     for (i = 0; i < nr_loops; ++i)
0264         fn(dst, src, size);
0265     BUG_ON(gettimeofday(&tv_end, NULL));
0266 
0267     timersub(&tv_end, &tv_start, &tv_diff);
0268 
0269     return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
0270 }
0271 
0272 struct function memcpy_functions[] = {
0273     { .name     = "default",
0274       .desc     = "Default memcpy() provided by glibc",
0275       .fn.memcpy    = memcpy },
0276 
0277 #ifdef HAVE_ARCH_X86_64_SUPPORT
0278 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
0279 # include "mem-memcpy-x86-64-asm-def.h"
0280 # undef MEMCPY_FN
0281 #endif
0282 
0283     { .name = NULL, }
0284 };
0285 
0286 static const char * const bench_mem_memcpy_usage[] = {
0287     "perf bench mem memcpy <options>",
0288     NULL
0289 };
0290 
0291 int bench_mem_memcpy(int argc, const char **argv)
0292 {
0293     struct bench_mem_info info = {
0294         .functions      = memcpy_functions,
0295         .do_cycles      = do_memcpy_cycles,
0296         .do_gettimeofday    = do_memcpy_gettimeofday,
0297         .usage          = bench_mem_memcpy_usage,
0298         .alloc_src              = true,
0299     };
0300 
0301     return bench_mem_common(argc, argv, &info);
0302 }
0303 
0304 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
0305 {
0306     u64 cycle_start = 0ULL, cycle_end = 0ULL;
0307     memset_t fn = r->fn.memset;
0308     int i;
0309 
0310     /*
0311      * We prefault the freshly allocated memory range here,
0312      * to not measure page fault overhead:
0313      */
0314     fn(dst, -1, size);
0315 
0316     cycle_start = get_cycles();
0317     for (i = 0; i < nr_loops; ++i)
0318         fn(dst, i, size);
0319     cycle_end = get_cycles();
0320 
0321     return cycle_end - cycle_start;
0322 }
0323 
0324 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
0325 {
0326     struct timeval tv_start, tv_end, tv_diff;
0327     memset_t fn = r->fn.memset;
0328     int i;
0329 
0330     /*
0331      * We prefault the freshly allocated memory range here,
0332      * to not measure page fault overhead:
0333      */
0334     fn(dst, -1, size);
0335 
0336     BUG_ON(gettimeofday(&tv_start, NULL));
0337     for (i = 0; i < nr_loops; ++i)
0338         fn(dst, i, size);
0339     BUG_ON(gettimeofday(&tv_end, NULL));
0340 
0341     timersub(&tv_end, &tv_start, &tv_diff);
0342 
0343     return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
0344 }
0345 
0346 static const char * const bench_mem_memset_usage[] = {
0347     "perf bench mem memset <options>",
0348     NULL
0349 };
0350 
0351 static const struct function memset_functions[] = {
0352     { .name     = "default",
0353       .desc     = "Default memset() provided by glibc",
0354       .fn.memset    = memset },
0355 
0356 #ifdef HAVE_ARCH_X86_64_SUPPORT
0357 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
0358 # include "mem-memset-x86-64-asm-def.h"
0359 # undef MEMSET_FN
0360 #endif
0361 
0362     { .name = NULL, }
0363 };
0364 
0365 int bench_mem_memset(int argc, const char **argv)
0366 {
0367     struct bench_mem_info info = {
0368         .functions      = memset_functions,
0369         .do_cycles      = do_memset_cycles,
0370         .do_gettimeofday    = do_memset_gettimeofday,
0371         .usage          = bench_mem_memset_usage,
0372     };
0373 
0374     return bench_mem_common(argc, argv, &info);
0375 }