Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * KVM demand paging test
0004  * Adapted from dirty_log_test.c
0005  *
0006  * Copyright (C) 2018, Red Hat, Inc.
0007  * Copyright (C) 2019, Google, Inc.
0008  */
0009 
0010 #define _GNU_SOURCE /* for pipe2 */
0011 
0012 #include <inttypes.h>
0013 #include <stdio.h>
0014 #include <stdlib.h>
0015 #include <time.h>
0016 #include <poll.h>
0017 #include <pthread.h>
0018 #include <linux/userfaultfd.h>
0019 #include <sys/syscall.h>
0020 
0021 #include "kvm_util.h"
0022 #include "test_util.h"
0023 #include "perf_test_util.h"
0024 #include "guest_modes.h"
0025 
0026 #ifdef __NR_userfaultfd
0027 
0028 #ifdef PRINT_PER_PAGE_UPDATES
0029 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
0030 #else
0031 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
0032 #endif
0033 
0034 #ifdef PRINT_PER_VCPU_UPDATES
0035 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
0036 #else
0037 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
0038 #endif
0039 
0040 static int nr_vcpus = 1;
0041 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
0042 static size_t demand_paging_size;
0043 static char *guest_data_prototype;
0044 
0045 static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
0046 {
0047     struct kvm_vcpu *vcpu = vcpu_args->vcpu;
0048     int vcpu_idx = vcpu_args->vcpu_idx;
0049     struct kvm_run *run = vcpu->run;
0050     struct timespec start;
0051     struct timespec ts_diff;
0052     int ret;
0053 
0054     clock_gettime(CLOCK_MONOTONIC, &start);
0055 
0056     /* Let the guest access its memory */
0057     ret = _vcpu_run(vcpu);
0058     TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
0059     if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
0060         TEST_ASSERT(false,
0061                 "Invalid guest sync status: exit_reason=%s\n",
0062                 exit_reason_str(run->exit_reason));
0063     }
0064 
0065     ts_diff = timespec_elapsed(start);
0066     PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
0067                ts_diff.tv_sec, ts_diff.tv_nsec);
0068 }
0069 
0070 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
0071 {
0072     pid_t tid = syscall(__NR_gettid);
0073     struct timespec start;
0074     struct timespec ts_diff;
0075     int r;
0076 
0077     clock_gettime(CLOCK_MONOTONIC, &start);
0078 
0079     if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
0080         struct uffdio_copy copy;
0081 
0082         copy.src = (uint64_t)guest_data_prototype;
0083         copy.dst = addr;
0084         copy.len = demand_paging_size;
0085         copy.mode = 0;
0086 
0087         r = ioctl(uffd, UFFDIO_COPY, &copy);
0088         if (r == -1) {
0089             pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
0090                 addr, tid, errno);
0091             return r;
0092         }
0093     } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
0094         struct uffdio_continue cont = {0};
0095 
0096         cont.range.start = addr;
0097         cont.range.len = demand_paging_size;
0098 
0099         r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
0100         if (r == -1) {
0101             pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
0102                 addr, tid, errno);
0103             return r;
0104         }
0105     } else {
0106         TEST_FAIL("Invalid uffd mode %d", uffd_mode);
0107     }
0108 
0109     ts_diff = timespec_elapsed(start);
0110 
0111     PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
0112                timespec_to_ns(ts_diff));
0113     PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
0114                demand_paging_size, addr, tid);
0115 
0116     return 0;
0117 }
0118 
0119 bool quit_uffd_thread;
0120 
0121 struct uffd_handler_args {
0122     int uffd_mode;
0123     int uffd;
0124     int pipefd;
0125     useconds_t delay;
0126 };
0127 
0128 static void *uffd_handler_thread_fn(void *arg)
0129 {
0130     struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
0131     int uffd = uffd_args->uffd;
0132     int pipefd = uffd_args->pipefd;
0133     useconds_t delay = uffd_args->delay;
0134     int64_t pages = 0;
0135     struct timespec start;
0136     struct timespec ts_diff;
0137 
0138     clock_gettime(CLOCK_MONOTONIC, &start);
0139     while (!quit_uffd_thread) {
0140         struct uffd_msg msg;
0141         struct pollfd pollfd[2];
0142         char tmp_chr;
0143         int r;
0144         uint64_t addr;
0145 
0146         pollfd[0].fd = uffd;
0147         pollfd[0].events = POLLIN;
0148         pollfd[1].fd = pipefd;
0149         pollfd[1].events = POLLIN;
0150 
0151         r = poll(pollfd, 2, -1);
0152         switch (r) {
0153         case -1:
0154             pr_info("poll err");
0155             continue;
0156         case 0:
0157             continue;
0158         case 1:
0159             break;
0160         default:
0161             pr_info("Polling uffd returned %d", r);
0162             return NULL;
0163         }
0164 
0165         if (pollfd[0].revents & POLLERR) {
0166             pr_info("uffd revents has POLLERR");
0167             return NULL;
0168         }
0169 
0170         if (pollfd[1].revents & POLLIN) {
0171             r = read(pollfd[1].fd, &tmp_chr, 1);
0172             TEST_ASSERT(r == 1,
0173                     "Error reading pipefd in UFFD thread\n");
0174             return NULL;
0175         }
0176 
0177         if (!(pollfd[0].revents & POLLIN))
0178             continue;
0179 
0180         r = read(uffd, &msg, sizeof(msg));
0181         if (r == -1) {
0182             if (errno == EAGAIN)
0183                 continue;
0184             pr_info("Read of uffd got errno %d\n", errno);
0185             return NULL;
0186         }
0187 
0188         if (r != sizeof(msg)) {
0189             pr_info("Read on uffd returned unexpected size: %d bytes", r);
0190             return NULL;
0191         }
0192 
0193         if (!(msg.event & UFFD_EVENT_PAGEFAULT))
0194             continue;
0195 
0196         if (delay)
0197             usleep(delay);
0198         addr =  msg.arg.pagefault.address;
0199         r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
0200         if (r < 0)
0201             return NULL;
0202         pages++;
0203     }
0204 
0205     ts_diff = timespec_elapsed(start);
0206     PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
0207                pages, ts_diff.tv_sec, ts_diff.tv_nsec,
0208                pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
0209 
0210     return NULL;
0211 }
0212 
0213 static void setup_demand_paging(struct kvm_vm *vm,
0214                 pthread_t *uffd_handler_thread, int pipefd,
0215                 int uffd_mode, useconds_t uffd_delay,
0216                 struct uffd_handler_args *uffd_args,
0217                 void *hva, void *alias, uint64_t len)
0218 {
0219     bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
0220     int uffd;
0221     struct uffdio_api uffdio_api;
0222     struct uffdio_register uffdio_register;
0223     uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
0224     int ret;
0225 
0226     PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
0227                is_minor ? "MINOR" : "MISSING",
0228                is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
0229 
0230     /* In order to get minor faults, prefault via the alias. */
0231     if (is_minor) {
0232         size_t p;
0233 
0234         expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
0235 
0236         TEST_ASSERT(alias != NULL, "Alias required for minor faults");
0237         for (p = 0; p < (len / demand_paging_size); ++p) {
0238             memcpy(alias + (p * demand_paging_size),
0239                    guest_data_prototype, demand_paging_size);
0240         }
0241     }
0242 
0243     uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
0244     TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
0245 
0246     uffdio_api.api = UFFD_API;
0247     uffdio_api.features = 0;
0248     ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
0249     TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
0250 
0251     uffdio_register.range.start = (uint64_t)hva;
0252     uffdio_register.range.len = len;
0253     uffdio_register.mode = uffd_mode;
0254     ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
0255     TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
0256     TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
0257             expected_ioctls, "missing userfaultfd ioctls");
0258 
0259     uffd_args->uffd_mode = uffd_mode;
0260     uffd_args->uffd = uffd;
0261     uffd_args->pipefd = pipefd;
0262     uffd_args->delay = uffd_delay;
0263     pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
0264                uffd_args);
0265 
0266     PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
0267                hva, hva + len);
0268 }
0269 
0270 struct test_params {
0271     int uffd_mode;
0272     useconds_t uffd_delay;
0273     enum vm_mem_backing_src_type src_type;
0274     bool partition_vcpu_memory_access;
0275 };
0276 
0277 static void run_test(enum vm_guest_mode mode, void *arg)
0278 {
0279     struct test_params *p = arg;
0280     pthread_t *uffd_handler_threads = NULL;
0281     struct uffd_handler_args *uffd_args = NULL;
0282     struct timespec start;
0283     struct timespec ts_diff;
0284     int *pipefds = NULL;
0285     struct kvm_vm *vm;
0286     int r, i;
0287 
0288     vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
0289                  p->src_type, p->partition_vcpu_memory_access);
0290 
0291     demand_paging_size = get_backing_src_pagesz(p->src_type);
0292 
0293     guest_data_prototype = malloc(demand_paging_size);
0294     TEST_ASSERT(guest_data_prototype,
0295             "Failed to allocate buffer for guest data pattern");
0296     memset(guest_data_prototype, 0xAB, demand_paging_size);
0297 
0298     if (p->uffd_mode) {
0299         uffd_handler_threads =
0300             malloc(nr_vcpus * sizeof(*uffd_handler_threads));
0301         TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
0302 
0303         uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
0304         TEST_ASSERT(uffd_args, "Memory allocation failed");
0305 
0306         pipefds = malloc(sizeof(int) * nr_vcpus * 2);
0307         TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
0308 
0309         for (i = 0; i < nr_vcpus; i++) {
0310             struct perf_test_vcpu_args *vcpu_args;
0311             void *vcpu_hva;
0312             void *vcpu_alias;
0313 
0314             vcpu_args = &perf_test_args.vcpu_args[i];
0315 
0316             /* Cache the host addresses of the region */
0317             vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
0318             vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
0319 
0320             /*
0321              * Set up user fault fd to handle demand paging
0322              * requests.
0323              */
0324             r = pipe2(&pipefds[i * 2],
0325                   O_CLOEXEC | O_NONBLOCK);
0326             TEST_ASSERT(!r, "Failed to set up pipefd");
0327 
0328             setup_demand_paging(vm, &uffd_handler_threads[i],
0329                         pipefds[i * 2], p->uffd_mode,
0330                         p->uffd_delay, &uffd_args[i],
0331                         vcpu_hva, vcpu_alias,
0332                         vcpu_args->pages * perf_test_args.guest_page_size);
0333         }
0334     }
0335 
0336     pr_info("Finished creating vCPUs and starting uffd threads\n");
0337 
0338     clock_gettime(CLOCK_MONOTONIC, &start);
0339     perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
0340     pr_info("Started all vCPUs\n");
0341 
0342     perf_test_join_vcpu_threads(nr_vcpus);
0343     ts_diff = timespec_elapsed(start);
0344     pr_info("All vCPU threads joined\n");
0345 
0346     if (p->uffd_mode) {
0347         char c;
0348 
0349         /* Tell the user fault fd handler threads to quit */
0350         for (i = 0; i < nr_vcpus; i++) {
0351             r = write(pipefds[i * 2 + 1], &c, 1);
0352             TEST_ASSERT(r == 1, "Unable to write to pipefd");
0353 
0354             pthread_join(uffd_handler_threads[i], NULL);
0355         }
0356     }
0357 
0358     pr_info("Total guest execution time: %ld.%.9lds\n",
0359         ts_diff.tv_sec, ts_diff.tv_nsec);
0360     pr_info("Overall demand paging rate: %f pgs/sec\n",
0361         perf_test_args.vcpu_args[0].pages * nr_vcpus /
0362         ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
0363 
0364     perf_test_destroy_vm(vm);
0365 
0366     free(guest_data_prototype);
0367     if (p->uffd_mode) {
0368         free(uffd_handler_threads);
0369         free(uffd_args);
0370         free(pipefds);
0371     }
0372 }
0373 
0374 static void help(char *name)
0375 {
0376     puts("");
0377     printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
0378            "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
0379     guest_modes_help();
0380     printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
0381            "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
0382     printf(" -d: add a delay in usec to the User Fault\n"
0383            "     FD handler to simulate demand paging\n"
0384            "     overheads. Ignored without -u.\n");
0385     printf(" -b: specify the size of the memory region which should be\n"
0386            "     demand paged by each vCPU. e.g. 10M or 3G.\n"
0387            "     Default: 1G\n");
0388     backing_src_help("-s");
0389     printf(" -v: specify the number of vCPUs to run.\n");
0390     printf(" -o: Overlap guest memory accesses instead of partitioning\n"
0391            "     them into a separate region of memory for each vCPU.\n");
0392     puts("");
0393     exit(0);
0394 }
0395 
0396 int main(int argc, char *argv[])
0397 {
0398     int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
0399     struct test_params p = {
0400         .src_type = DEFAULT_VM_MEM_SRC,
0401         .partition_vcpu_memory_access = true,
0402     };
0403     int opt;
0404 
0405     guest_modes_append_default();
0406 
0407     while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
0408         switch (opt) {
0409         case 'm':
0410             guest_modes_cmdline(optarg);
0411             break;
0412         case 'u':
0413             if (!strcmp("MISSING", optarg))
0414                 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
0415             else if (!strcmp("MINOR", optarg))
0416                 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
0417             TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
0418             break;
0419         case 'd':
0420             p.uffd_delay = strtoul(optarg, NULL, 0);
0421             TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
0422             break;
0423         case 'b':
0424             guest_percpu_mem_size = parse_size(optarg);
0425             break;
0426         case 's':
0427             p.src_type = parse_backing_src_type(optarg);
0428             break;
0429         case 'v':
0430             nr_vcpus = atoi(optarg);
0431             TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
0432                     "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
0433             break;
0434         case 'o':
0435             p.partition_vcpu_memory_access = false;
0436             break;
0437         case 'h':
0438         default:
0439             help(argv[0]);
0440             break;
0441         }
0442     }
0443 
0444     if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
0445         !backing_src_is_shared(p.src_type)) {
0446         TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
0447     }
0448 
0449     for_each_guest_mode(run_test, &p);
0450 
0451     return 0;
0452 }
0453 
0454 #else /* __NR_userfaultfd */
0455 
0456 #warning "missing __NR_userfaultfd definition"
0457 
0458 int main(void)
0459 {
0460     print_skip("__NR_userfaultfd must be present for userfaultfd test");
0461     return KSFT_SKIP;
0462 }
0463 
0464 #endif /* __NR_userfaultfd */