0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #define _GNU_SOURCE
0011
0012 #include <inttypes.h>
0013 #include <stdio.h>
0014 #include <stdlib.h>
0015 #include <time.h>
0016 #include <poll.h>
0017 #include <pthread.h>
0018 #include <linux/userfaultfd.h>
0019 #include <sys/syscall.h>
0020
0021 #include "kvm_util.h"
0022 #include "test_util.h"
0023 #include "perf_test_util.h"
0024 #include "guest_modes.h"
0025
0026 #ifdef __NR_userfaultfd
0027
0028 #ifdef PRINT_PER_PAGE_UPDATES
0029 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
0030 #else
0031 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
0032 #endif
0033
0034 #ifdef PRINT_PER_VCPU_UPDATES
0035 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
0036 #else
0037 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
0038 #endif
0039
0040 static int nr_vcpus = 1;
0041 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
0042 static size_t demand_paging_size;
0043 static char *guest_data_prototype;
0044
0045 static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
0046 {
0047 struct kvm_vcpu *vcpu = vcpu_args->vcpu;
0048 int vcpu_idx = vcpu_args->vcpu_idx;
0049 struct kvm_run *run = vcpu->run;
0050 struct timespec start;
0051 struct timespec ts_diff;
0052 int ret;
0053
0054 clock_gettime(CLOCK_MONOTONIC, &start);
0055
0056
0057 ret = _vcpu_run(vcpu);
0058 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
0059 if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
0060 TEST_ASSERT(false,
0061 "Invalid guest sync status: exit_reason=%s\n",
0062 exit_reason_str(run->exit_reason));
0063 }
0064
0065 ts_diff = timespec_elapsed(start);
0066 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
0067 ts_diff.tv_sec, ts_diff.tv_nsec);
0068 }
0069
0070 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
0071 {
0072 pid_t tid = syscall(__NR_gettid);
0073 struct timespec start;
0074 struct timespec ts_diff;
0075 int r;
0076
0077 clock_gettime(CLOCK_MONOTONIC, &start);
0078
0079 if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
0080 struct uffdio_copy copy;
0081
0082 copy.src = (uint64_t)guest_data_prototype;
0083 copy.dst = addr;
0084 copy.len = demand_paging_size;
0085 copy.mode = 0;
0086
0087 r = ioctl(uffd, UFFDIO_COPY, ©);
0088 if (r == -1) {
0089 pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
0090 addr, tid, errno);
0091 return r;
0092 }
0093 } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
0094 struct uffdio_continue cont = {0};
0095
0096 cont.range.start = addr;
0097 cont.range.len = demand_paging_size;
0098
0099 r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
0100 if (r == -1) {
0101 pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
0102 addr, tid, errno);
0103 return r;
0104 }
0105 } else {
0106 TEST_FAIL("Invalid uffd mode %d", uffd_mode);
0107 }
0108
0109 ts_diff = timespec_elapsed(start);
0110
0111 PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
0112 timespec_to_ns(ts_diff));
0113 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
0114 demand_paging_size, addr, tid);
0115
0116 return 0;
0117 }
0118
0119 bool quit_uffd_thread;
0120
0121 struct uffd_handler_args {
0122 int uffd_mode;
0123 int uffd;
0124 int pipefd;
0125 useconds_t delay;
0126 };
0127
0128 static void *uffd_handler_thread_fn(void *arg)
0129 {
0130 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
0131 int uffd = uffd_args->uffd;
0132 int pipefd = uffd_args->pipefd;
0133 useconds_t delay = uffd_args->delay;
0134 int64_t pages = 0;
0135 struct timespec start;
0136 struct timespec ts_diff;
0137
0138 clock_gettime(CLOCK_MONOTONIC, &start);
0139 while (!quit_uffd_thread) {
0140 struct uffd_msg msg;
0141 struct pollfd pollfd[2];
0142 char tmp_chr;
0143 int r;
0144 uint64_t addr;
0145
0146 pollfd[0].fd = uffd;
0147 pollfd[0].events = POLLIN;
0148 pollfd[1].fd = pipefd;
0149 pollfd[1].events = POLLIN;
0150
0151 r = poll(pollfd, 2, -1);
0152 switch (r) {
0153 case -1:
0154 pr_info("poll err");
0155 continue;
0156 case 0:
0157 continue;
0158 case 1:
0159 break;
0160 default:
0161 pr_info("Polling uffd returned %d", r);
0162 return NULL;
0163 }
0164
0165 if (pollfd[0].revents & POLLERR) {
0166 pr_info("uffd revents has POLLERR");
0167 return NULL;
0168 }
0169
0170 if (pollfd[1].revents & POLLIN) {
0171 r = read(pollfd[1].fd, &tmp_chr, 1);
0172 TEST_ASSERT(r == 1,
0173 "Error reading pipefd in UFFD thread\n");
0174 return NULL;
0175 }
0176
0177 if (!(pollfd[0].revents & POLLIN))
0178 continue;
0179
0180 r = read(uffd, &msg, sizeof(msg));
0181 if (r == -1) {
0182 if (errno == EAGAIN)
0183 continue;
0184 pr_info("Read of uffd got errno %d\n", errno);
0185 return NULL;
0186 }
0187
0188 if (r != sizeof(msg)) {
0189 pr_info("Read on uffd returned unexpected size: %d bytes", r);
0190 return NULL;
0191 }
0192
0193 if (!(msg.event & UFFD_EVENT_PAGEFAULT))
0194 continue;
0195
0196 if (delay)
0197 usleep(delay);
0198 addr = msg.arg.pagefault.address;
0199 r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
0200 if (r < 0)
0201 return NULL;
0202 pages++;
0203 }
0204
0205 ts_diff = timespec_elapsed(start);
0206 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
0207 pages, ts_diff.tv_sec, ts_diff.tv_nsec,
0208 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
0209
0210 return NULL;
0211 }
0212
0213 static void setup_demand_paging(struct kvm_vm *vm,
0214 pthread_t *uffd_handler_thread, int pipefd,
0215 int uffd_mode, useconds_t uffd_delay,
0216 struct uffd_handler_args *uffd_args,
0217 void *hva, void *alias, uint64_t len)
0218 {
0219 bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
0220 int uffd;
0221 struct uffdio_api uffdio_api;
0222 struct uffdio_register uffdio_register;
0223 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
0224 int ret;
0225
0226 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
0227 is_minor ? "MINOR" : "MISSING",
0228 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
0229
0230
0231 if (is_minor) {
0232 size_t p;
0233
0234 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
0235
0236 TEST_ASSERT(alias != NULL, "Alias required for minor faults");
0237 for (p = 0; p < (len / demand_paging_size); ++p) {
0238 memcpy(alias + (p * demand_paging_size),
0239 guest_data_prototype, demand_paging_size);
0240 }
0241 }
0242
0243 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
0244 TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
0245
0246 uffdio_api.api = UFFD_API;
0247 uffdio_api.features = 0;
0248 ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
0249 TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
0250
0251 uffdio_register.range.start = (uint64_t)hva;
0252 uffdio_register.range.len = len;
0253 uffdio_register.mode = uffd_mode;
0254 ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
0255 TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
0256 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
0257 expected_ioctls, "missing userfaultfd ioctls");
0258
0259 uffd_args->uffd_mode = uffd_mode;
0260 uffd_args->uffd = uffd;
0261 uffd_args->pipefd = pipefd;
0262 uffd_args->delay = uffd_delay;
0263 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
0264 uffd_args);
0265
0266 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
0267 hva, hva + len);
0268 }
0269
0270 struct test_params {
0271 int uffd_mode;
0272 useconds_t uffd_delay;
0273 enum vm_mem_backing_src_type src_type;
0274 bool partition_vcpu_memory_access;
0275 };
0276
0277 static void run_test(enum vm_guest_mode mode, void *arg)
0278 {
0279 struct test_params *p = arg;
0280 pthread_t *uffd_handler_threads = NULL;
0281 struct uffd_handler_args *uffd_args = NULL;
0282 struct timespec start;
0283 struct timespec ts_diff;
0284 int *pipefds = NULL;
0285 struct kvm_vm *vm;
0286 int r, i;
0287
0288 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
0289 p->src_type, p->partition_vcpu_memory_access);
0290
0291 demand_paging_size = get_backing_src_pagesz(p->src_type);
0292
0293 guest_data_prototype = malloc(demand_paging_size);
0294 TEST_ASSERT(guest_data_prototype,
0295 "Failed to allocate buffer for guest data pattern");
0296 memset(guest_data_prototype, 0xAB, demand_paging_size);
0297
0298 if (p->uffd_mode) {
0299 uffd_handler_threads =
0300 malloc(nr_vcpus * sizeof(*uffd_handler_threads));
0301 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
0302
0303 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
0304 TEST_ASSERT(uffd_args, "Memory allocation failed");
0305
0306 pipefds = malloc(sizeof(int) * nr_vcpus * 2);
0307 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
0308
0309 for (i = 0; i < nr_vcpus; i++) {
0310 struct perf_test_vcpu_args *vcpu_args;
0311 void *vcpu_hva;
0312 void *vcpu_alias;
0313
0314 vcpu_args = &perf_test_args.vcpu_args[i];
0315
0316
0317 vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
0318 vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
0319
0320
0321
0322
0323
0324 r = pipe2(&pipefds[i * 2],
0325 O_CLOEXEC | O_NONBLOCK);
0326 TEST_ASSERT(!r, "Failed to set up pipefd");
0327
0328 setup_demand_paging(vm, &uffd_handler_threads[i],
0329 pipefds[i * 2], p->uffd_mode,
0330 p->uffd_delay, &uffd_args[i],
0331 vcpu_hva, vcpu_alias,
0332 vcpu_args->pages * perf_test_args.guest_page_size);
0333 }
0334 }
0335
0336 pr_info("Finished creating vCPUs and starting uffd threads\n");
0337
0338 clock_gettime(CLOCK_MONOTONIC, &start);
0339 perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
0340 pr_info("Started all vCPUs\n");
0341
0342 perf_test_join_vcpu_threads(nr_vcpus);
0343 ts_diff = timespec_elapsed(start);
0344 pr_info("All vCPU threads joined\n");
0345
0346 if (p->uffd_mode) {
0347 char c;
0348
0349
0350 for (i = 0; i < nr_vcpus; i++) {
0351 r = write(pipefds[i * 2 + 1], &c, 1);
0352 TEST_ASSERT(r == 1, "Unable to write to pipefd");
0353
0354 pthread_join(uffd_handler_threads[i], NULL);
0355 }
0356 }
0357
0358 pr_info("Total guest execution time: %ld.%.9lds\n",
0359 ts_diff.tv_sec, ts_diff.tv_nsec);
0360 pr_info("Overall demand paging rate: %f pgs/sec\n",
0361 perf_test_args.vcpu_args[0].pages * nr_vcpus /
0362 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
0363
0364 perf_test_destroy_vm(vm);
0365
0366 free(guest_data_prototype);
0367 if (p->uffd_mode) {
0368 free(uffd_handler_threads);
0369 free(uffd_args);
0370 free(pipefds);
0371 }
0372 }
0373
0374 static void help(char *name)
0375 {
0376 puts("");
0377 printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
0378 " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
0379 guest_modes_help();
0380 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
0381 " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
0382 printf(" -d: add a delay in usec to the User Fault\n"
0383 " FD handler to simulate demand paging\n"
0384 " overheads. Ignored without -u.\n");
0385 printf(" -b: specify the size of the memory region which should be\n"
0386 " demand paged by each vCPU. e.g. 10M or 3G.\n"
0387 " Default: 1G\n");
0388 backing_src_help("-s");
0389 printf(" -v: specify the number of vCPUs to run.\n");
0390 printf(" -o: Overlap guest memory accesses instead of partitioning\n"
0391 " them into a separate region of memory for each vCPU.\n");
0392 puts("");
0393 exit(0);
0394 }
0395
0396 int main(int argc, char *argv[])
0397 {
0398 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
0399 struct test_params p = {
0400 .src_type = DEFAULT_VM_MEM_SRC,
0401 .partition_vcpu_memory_access = true,
0402 };
0403 int opt;
0404
0405 guest_modes_append_default();
0406
0407 while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
0408 switch (opt) {
0409 case 'm':
0410 guest_modes_cmdline(optarg);
0411 break;
0412 case 'u':
0413 if (!strcmp("MISSING", optarg))
0414 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
0415 else if (!strcmp("MINOR", optarg))
0416 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
0417 TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
0418 break;
0419 case 'd':
0420 p.uffd_delay = strtoul(optarg, NULL, 0);
0421 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
0422 break;
0423 case 'b':
0424 guest_percpu_mem_size = parse_size(optarg);
0425 break;
0426 case 's':
0427 p.src_type = parse_backing_src_type(optarg);
0428 break;
0429 case 'v':
0430 nr_vcpus = atoi(optarg);
0431 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
0432 "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
0433 break;
0434 case 'o':
0435 p.partition_vcpu_memory_access = false;
0436 break;
0437 case 'h':
0438 default:
0439 help(argv[0]);
0440 break;
0441 }
0442 }
0443
0444 if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
0445 !backing_src_is_shared(p.src_type)) {
0446 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
0447 }
0448
0449 for_each_guest_mode(run_test, &p);
0450
0451 return 0;
0452 }
0453
0454 #else
0455
0456 #warning "missing __NR_userfaultfd definition"
0457
0458 int main(void)
0459 {
0460 print_skip("__NR_userfaultfd must be present for userfaultfd test");
0461 return KSFT_SKIP;
0462 }
0463
0464 #endif