selftests/kvm/memslot_perf_test.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * A memslot-related performance benchmark.
0004  *
0005  * Copyright (C) 2021 Oracle and/or its affiliates.
0006  *
0007  * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
0008  */
0009 #include <pthread.h>
0010 #include <sched.h>
0011 #include <semaphore.h>
0012 #include <stdatomic.h>
0013 #include <stdbool.h>
0014 #include <stdint.h>
0015 #include <stdio.h>
0016 #include <stdlib.h>
0017 #include <string.h>
0018 #include <sys/mman.h>
0019 #include <time.h>
0020 #include <unistd.h>
0021
0022 #include <linux/compiler.h>
0023
0024 #include <test_util.h>
0025 #include <kvm_util.h>
0026 #include <processor.h>
0027
0028 #define MEM_SIZE        ((512U << 20) + 4096)
0029 #define MEM_SIZE_PAGES      (MEM_SIZE / 4096)
0030 #define MEM_GPA     0x10000000UL
0031 #define MEM_AUX_GPA     MEM_GPA
0032 #define MEM_SYNC_GPA        MEM_AUX_GPA
0033 #define MEM_TEST_GPA        (MEM_AUX_GPA + 4096)
0034 #define MEM_TEST_SIZE       (MEM_SIZE - 4096)
0035 static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
0036 static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
0037
0038 /*
0039  * 32 MiB is max size that gets well over 100 iterations on 509 slots.
0040  * Considering that each slot needs to have at least one page up to
0041  * 8194 slots in use can then be tested (although with slightly
0042  * limited resolution).
0043  */
0044 #define MEM_SIZE_MAP        ((32U << 20) + 4096)
0045 #define MEM_SIZE_MAP_PAGES  (MEM_SIZE_MAP / 4096)
0046 #define MEM_TEST_MAP_SIZE   (MEM_SIZE_MAP - 4096)
0047 #define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
0048 static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
0049 static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
0050 static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
0051 static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
0052
0053 /*
0054  * 128 MiB is min size that fills 32k slots with at least one page in each
0055  * while at the same time gets 100+ iterations in such test
0056  */
0057 #define MEM_TEST_UNMAP_SIZE     (128U << 20)
0058 #define MEM_TEST_UNMAP_SIZE_PAGES   (MEM_TEST_UNMAP_SIZE / 4096)
0059 /* 2 MiB chunk size like a typical huge page */
0060 #define MEM_TEST_UNMAP_CHUNK_PAGES  (2U << (20 - 12))
0061 static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
0062           "invalid unmap test region size");
0063 static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
0064           "invalid unmap test region size");
0065 static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
0066           (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
0067           "invalid unmap test region size");
0068
0069 /*
0070  * For the move active test the middle of the test area is placed on
0071  * a memslot boundary: half lies in the memslot being moved, half in
0072  * other memslot(s).
0073  *
0074  * When running this test with 32k memslots (32764, really) each memslot
0075  * contains 4 pages.
0076  * The last one additionally contains the remaining 21 pages of memory,
0077  * for the total size of 25 pages.
0078  * Hence, the maximum size here is 50 pages.
0079  */
0080 #define MEM_TEST_MOVE_SIZE_PAGES    (50)
0081 #define MEM_TEST_MOVE_SIZE      (MEM_TEST_MOVE_SIZE_PAGES * 4096)
0082 #define MEM_TEST_MOVE_GPA_DEST      (MEM_GPA + MEM_SIZE)
0083 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
0084           "invalid move test region size");
0085
0086 #define MEM_TEST_VAL_1 0x1122334455667788
0087 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
0088
0089 struct vm_data {
0090     struct kvm_vm *vm;
0091     struct kvm_vcpu *vcpu;
0092     pthread_t vcpu_thread;
0093     uint32_t nslots;
0094     uint64_t npages;
0095     uint64_t pages_per_slot;
0096     void **hva_slots;
0097     bool mmio_ok;
0098     uint64_t mmio_gpa_min;
0099     uint64_t mmio_gpa_max;
0100 };
0101
0102 struct sync_area {
0103     atomic_bool start_flag;
0104     atomic_bool exit_flag;
0105     atomic_bool sync_flag;
0106     void *move_area_ptr;
0107 };
0108
0109 /*
0110  * Technically, we need also for the atomic bool to be address-free, which
0111  * is recommended, but not strictly required, by C11 for lockless
0112  * implementations.
0113  * However, in practice both GCC and Clang fulfill this requirement on
0114  * all KVM-supported platforms.
0115  */
0116 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
0117
0118 static sem_t vcpu_ready;
0119
0120 static bool map_unmap_verify;
0121
0122 static bool verbose;
0123 #define pr_info_v(...)              \
0124     do {                    \
0125         if (verbose)            \
0126             pr_info(__VA_ARGS__);   \
0127     } while (0)
0128
0129 static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
0130 {
0131     TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
0132     TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
0133     TEST_ASSERT(run->mmio.len == 8,
0134             "Unexpected exit mmio size = %u", run->mmio.len);
0135     TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
0136             run->mmio.phys_addr <= data->mmio_gpa_max,
0137             "Unexpected exit mmio address = 0x%llx",
0138             run->mmio.phys_addr);
0139 }
0140
0141 static void *vcpu_worker(void *__data)
0142 {
0143     struct vm_data *data = __data;
0144     struct kvm_vcpu *vcpu = data->vcpu;
0145     struct kvm_run *run = vcpu->run;
0146     struct ucall uc;
0147
0148     while (1) {
0149         vcpu_run(vcpu);
0150
0151         switch (get_ucall(vcpu, &uc)) {
0152         case UCALL_SYNC:
0153             TEST_ASSERT(uc.args[1] == 0,
0154                 "Unexpected sync ucall, got %lx",
0155                 (ulong)uc.args[1]);
0156             sem_post(&vcpu_ready);
0157             continue;
0158         case UCALL_NONE:
0159             if (run->exit_reason == KVM_EXIT_MMIO)
0160                 check_mmio_access(data, run);
0161             else
0162                 goto done;
0163             break;
0164         case UCALL_ABORT:
0165             REPORT_GUEST_ASSERT_1(uc, "val = %lu");
0166             break;
0167         case UCALL_DONE:
0168             goto done;
0169         default:
0170             TEST_FAIL("Unknown ucall %lu", uc.cmd);
0171         }
0172     }
0173
0174 done:
0175     return NULL;
0176 }
0177
0178 static void wait_for_vcpu(void)
0179 {
0180     struct timespec ts;
0181
0182     TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
0183             "clock_gettime() failed: %d\n", errno);
0184
0185     ts.tv_sec += 2;
0186     TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
0187             "sem_timedwait() failed: %d\n", errno);
0188 }
0189
0190 static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
0191 {
0192     uint64_t gpage, pgoffs;
0193     uint32_t slot, slotoffs;
0194     void *base;
0195
0196     TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
0197     TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
0198             "Too high gpa to translate");
0199     gpa -= MEM_GPA;
0200
0201     gpage = gpa / 4096;
0202     pgoffs = gpa % 4096;
0203     slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
0204     slotoffs = gpage - (slot * data->pages_per_slot);
0205
0206     if (rempages) {
0207         uint64_t slotpages;
0208
0209         if (slot == data->nslots - 1)
0210             slotpages = data->npages - slot * data->pages_per_slot;
0211         else
0212             slotpages = data->pages_per_slot;
0213
0214         TEST_ASSERT(!pgoffs,
0215                 "Asking for remaining pages in slot but gpa not page aligned");
0216         *rempages = slotpages - slotoffs;
0217     }
0218
0219     base = data->hva_slots[slot];
0220     return (uint8_t *)base + slotoffs * 4096 + pgoffs;
0221 }
0222
0223 static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
0224 {
0225     TEST_ASSERT(slot < data->nslots, "Too high slot number");
0226
0227     return MEM_GPA + slot * data->pages_per_slot * 4096;
0228 }
0229
0230 static struct vm_data *alloc_vm(void)
0231 {
0232     struct vm_data *data;
0233
0234     data = malloc(sizeof(*data));
0235     TEST_ASSERT(data, "malloc(vmdata) failed");
0236
0237     data->vm = NULL;
0238     data->vcpu = NULL;
0239     data->hva_slots = NULL;
0240
0241     return data;
0242 }
0243
0244 static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
0245                void *guest_code, uint64_t mempages,
0246                struct timespec *slot_runtime)
0247 {
0248     uint32_t max_mem_slots;
0249     uint64_t rempages;
0250     uint64_t guest_addr;
0251     uint32_t slot;
0252     struct timespec tstart;
0253     struct sync_area *sync;
0254
0255     max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
0256     TEST_ASSERT(max_mem_slots > 1,
0257             "KVM_CAP_NR_MEMSLOTS should be greater than 1");
0258     TEST_ASSERT(nslots > 1 || nslots == -1,
0259             "Slot count cap should be greater than 1");
0260     if (nslots != -1)
0261         max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
0262     pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
0263
0264     TEST_ASSERT(mempages > 1,
0265             "Can't test without any memory");
0266
0267     data->npages = mempages;
0268     data->nslots = max_mem_slots - 1;
0269     data->pages_per_slot = mempages / data->nslots;
0270     if (!data->pages_per_slot) {
0271         *maxslots = mempages + 1;
0272         return false;
0273     }
0274
0275     rempages = mempages % data->nslots;
0276     data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
0277     TEST_ASSERT(data->hva_slots, "malloc() fail");
0278
0279     data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
0280     ucall_init(data->vm, NULL);
0281
0282     pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
0283         max_mem_slots - 1, data->pages_per_slot, rempages);
0284
0285     clock_gettime(CLOCK_MONOTONIC, &tstart);
0286     for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
0287         uint64_t npages;
0288
0289         npages = data->pages_per_slot;
0290         if (slot == max_mem_slots - 1)
0291             npages += rempages;
0292
0293         vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
0294                         guest_addr, slot, npages,
0295                         0);
0296         guest_addr += npages * 4096;
0297     }
0298     *slot_runtime = timespec_elapsed(tstart);
0299
0300     for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
0301         uint64_t npages;
0302         uint64_t gpa;
0303
0304         npages = data->pages_per_slot;
0305         if (slot == max_mem_slots - 2)
0306             npages += rempages;
0307
0308         gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
0309                      slot + 1);
0310         TEST_ASSERT(gpa == guest_addr,
0311                 "vm_phy_pages_alloc() failed\n");
0312
0313         data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
0314         memset(data->hva_slots[slot], 0, npages * 4096);
0315
0316         guest_addr += npages * 4096;
0317     }
0318
0319     virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
0320
0321     sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
0322     atomic_init(&sync->start_flag, false);
0323     atomic_init(&sync->exit_flag, false);
0324     atomic_init(&sync->sync_flag, false);
0325
0326     data->mmio_ok = false;
0327
0328     return true;
0329 }
0330
0331 static void launch_vm(struct vm_data *data)
0332 {
0333     pr_info_v("Launching the test VM\n");
0334
0335     pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
0336
0337     /* Ensure the guest thread is spun up. */
0338     wait_for_vcpu();
0339 }
0340
0341 static void free_vm(struct vm_data *data)
0342 {
0343     kvm_vm_free(data->vm);
0344     free(data->hva_slots);
0345     free(data);
0346 }
0347
0348 static void wait_guest_exit(struct vm_data *data)
0349 {
0350     pthread_join(data->vcpu_thread, NULL);
0351 }
0352
0353 static void let_guest_run(struct sync_area *sync)
0354 {
0355     atomic_store_explicit(&sync->start_flag, true, memory_order_release);
0356 }
0357
0358 static void guest_spin_until_start(void)
0359 {
0360     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0361
0362     while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
0363         ;
0364 }
0365
0366 static void make_guest_exit(struct sync_area *sync)
0367 {
0368     atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
0369 }
0370
0371 static bool _guest_should_exit(void)
0372 {
0373     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0374
0375     return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
0376 }
0377
0378 #define guest_should_exit() unlikely(_guest_should_exit())
0379
0380 /*
0381  * noinline so we can easily see how much time the host spends waiting
0382  * for the guest.
0383  * For the same reason use alarm() instead of polling clock_gettime()
0384  * to implement a wait timeout.
0385  */
0386 static noinline void host_perform_sync(struct sync_area *sync)
0387 {
0388     alarm(2);
0389
0390     atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
0391     while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
0392         ;
0393
0394     alarm(0);
0395 }
0396
0397 static bool guest_perform_sync(void)
0398 {
0399     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0400     bool expected;
0401
0402     do {
0403         if (guest_should_exit())
0404             return false;
0405
0406         expected = true;
0407     } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
0408                             &expected, false,
0409                             memory_order_acq_rel,
0410                             memory_order_relaxed));
0411
0412     return true;
0413 }
0414
0415 static void guest_code_test_memslot_move(void)
0416 {
0417     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0418     uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
0419
0420     GUEST_SYNC(0);
0421
0422     guest_spin_until_start();
0423
0424     while (!guest_should_exit()) {
0425         uintptr_t ptr;
0426
0427         for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
0428              ptr += 4096)
0429             *(uint64_t *)ptr = MEM_TEST_VAL_1;
0430
0431         /*
0432          * No host sync here since the MMIO exits are so expensive
0433          * that the host would spend most of its time waiting for
0434          * the guest and so instead of measuring memslot move
0435          * performance we would measure the performance and
0436          * likelihood of MMIO exits
0437          */
0438     }
0439
0440     GUEST_DONE();
0441 }
0442
0443 static void guest_code_test_memslot_map(void)
0444 {
0445     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0446
0447     GUEST_SYNC(0);
0448
0449     guest_spin_until_start();
0450
0451     while (1) {
0452         uintptr_t ptr;
0453
0454         for (ptr = MEM_TEST_GPA;
0455              ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
0456             *(uint64_t *)ptr = MEM_TEST_VAL_1;
0457
0458         if (!guest_perform_sync())
0459             break;
0460
0461         for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
0462              ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
0463             *(uint64_t *)ptr = MEM_TEST_VAL_2;
0464
0465         if (!guest_perform_sync())
0466             break;
0467     }
0468
0469     GUEST_DONE();
0470 }
0471
0472 static void guest_code_test_memslot_unmap(void)
0473 {
0474     struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
0475
0476     GUEST_SYNC(0);
0477
0478     guest_spin_until_start();
0479
0480     while (1) {
0481         uintptr_t ptr = MEM_TEST_GPA;
0482
0483         /*
0484          * We can afford to access (map) just a small number of pages
0485          * per host sync as otherwise the host will spend
0486          * a significant amount of its time waiting for the guest
0487          * (instead of doing unmap operations), so this will
0488          * effectively turn this test into a map performance test.
0489          *
0490          * Just access a single page to be on the safe side.
0491          */
0492         *(uint64_t *)ptr = MEM_TEST_VAL_1;
0493
0494         if (!guest_perform_sync())
0495             break;
0496
0497         ptr += MEM_TEST_UNMAP_SIZE / 2;
0498         *(uint64_t *)ptr = MEM_TEST_VAL_2;
0499
0500         if (!guest_perform_sync())
0501             break;
0502     }
0503
0504     GUEST_DONE();
0505 }
0506
0507 static void guest_code_test_memslot_rw(void)
0508 {
0509     GUEST_SYNC(0);
0510
0511     guest_spin_until_start();
0512
0513     while (1) {
0514         uintptr_t ptr;
0515
0516         for (ptr = MEM_TEST_GPA;
0517              ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
0518             *(uint64_t *)ptr = MEM_TEST_VAL_1;
0519
0520         if (!guest_perform_sync())
0521             break;
0522
0523         for (ptr = MEM_TEST_GPA + 4096 / 2;
0524              ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
0525             uint64_t val = *(uint64_t *)ptr;
0526
0527             GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
0528             *(uint64_t *)ptr = 0;
0529         }
0530
0531         if (!guest_perform_sync())
0532             break;
0533     }
0534
0535     GUEST_DONE();
0536 }
0537
0538 static bool test_memslot_move_prepare(struct vm_data *data,
0539                       struct sync_area *sync,
0540                       uint64_t *maxslots, bool isactive)
0541 {
0542     uint64_t movesrcgpa, movetestgpa;
0543
0544     movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
0545
0546     if (isactive) {
0547         uint64_t lastpages;
0548
0549         vm_gpa2hva(data, movesrcgpa, &lastpages);
0550         if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
0551             *maxslots = 0;
0552             return false;
0553         }
0554     }
0555
0556     movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
0557     sync->move_area_ptr = (void *)movetestgpa;
0558
0559     if (isactive) {
0560         data->mmio_ok = true;
0561         data->mmio_gpa_min = movesrcgpa;
0562         data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
0563     }
0564
0565     return true;
0566 }
0567
0568 static bool test_memslot_move_prepare_active(struct vm_data *data,
0569                          struct sync_area *sync,
0570                          uint64_t *maxslots)
0571 {
0572     return test_memslot_move_prepare(data, sync, maxslots, true);
0573 }
0574
0575 static bool test_memslot_move_prepare_inactive(struct vm_data *data,
0576                            struct sync_area *sync,
0577                            uint64_t *maxslots)
0578 {
0579     return test_memslot_move_prepare(data, sync, maxslots, false);
0580 }
0581
0582 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
0583 {
0584     uint64_t movesrcgpa;
0585
0586     movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
0587     vm_mem_region_move(data->vm, data->nslots - 1 + 1,
0588                MEM_TEST_MOVE_GPA_DEST);
0589     vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
0590 }
0591
0592 static void test_memslot_do_unmap(struct vm_data *data,
0593                   uint64_t offsp, uint64_t count)
0594 {
0595     uint64_t gpa, ctr;
0596
0597     for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
0598         uint64_t npages;
0599         void *hva;
0600         int ret;
0601
0602         hva = vm_gpa2hva(data, gpa, &npages);
0603         TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
0604         npages = min(npages, count - ctr);
0605         ret = madvise(hva, npages * 4096, MADV_DONTNEED);
0606         TEST_ASSERT(!ret,
0607                 "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
0608                 hva, gpa);
0609         ctr += npages;
0610         gpa += npages * 4096;
0611     }
0612     TEST_ASSERT(ctr == count,
0613             "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
0614 }
0615
0616 static void test_memslot_map_unmap_check(struct vm_data *data,
0617                      uint64_t offsp, uint64_t valexp)
0618 {
0619     uint64_t gpa;
0620     uint64_t *val;
0621
0622     if (!map_unmap_verify)
0623         return;
0624
0625     gpa = MEM_TEST_GPA + offsp * 4096;
0626     val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
0627     TEST_ASSERT(*val == valexp,
0628             "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
0629             *val, valexp, gpa);
0630     *val = 0;
0631 }
0632
0633 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
0634 {
0635     /*
0636      * Unmap the second half of the test area while guest writes to (maps)
0637      * the first half.
0638      */
0639     test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
0640                   MEM_TEST_MAP_SIZE_PAGES / 2);
0641
0642     /*
0643      * Wait for the guest to finish writing the first half of the test
0644      * area, verify the written value on the first and the last page of
0645      * this area and then unmap it.
0646      * Meanwhile, the guest is writing to (mapping) the second half of
0647      * the test area.
0648      */
0649     host_perform_sync(sync);
0650     test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
0651     test_memslot_map_unmap_check(data,
0652                      MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
0653                      MEM_TEST_VAL_1);
0654     test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
0655
0656
0657     /*
0658      * Wait for the guest to finish writing the second half of the test
0659      * area and verify the written value on the first and the last page
0660      * of this area.
0661      * The area will be unmapped at the beginning of the next loop
0662      * iteration.
0663      * Meanwhile, the guest is writing to (mapping) the first half of
0664      * the test area.
0665      */
0666     host_perform_sync(sync);
0667     test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
0668                      MEM_TEST_VAL_2);
0669     test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
0670                      MEM_TEST_VAL_2);
0671 }
0672
0673 static void test_memslot_unmap_loop_common(struct vm_data *data,
0674                        struct sync_area *sync,
0675                        uint64_t chunk)
0676 {
0677     uint64_t ctr;
0678
0679     /*
0680      * Wait for the guest to finish mapping page(s) in the first half
0681      * of the test area, verify the written value and then perform unmap
0682      * of this area.
0683      * Meanwhile, the guest is writing to (mapping) page(s) in the second
0684      * half of the test area.
0685      */
0686     host_perform_sync(sync);
0687     test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
0688     for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
0689         test_memslot_do_unmap(data, ctr, chunk);
0690
0691     /* Likewise, but for the opposite host / guest areas */
0692     host_perform_sync(sync);
0693     test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
0694                      MEM_TEST_VAL_2);
0695     for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
0696          ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
0697         test_memslot_do_unmap(data, ctr, chunk);
0698 }
0699
0700 static void test_memslot_unmap_loop(struct vm_data *data,
0701                     struct sync_area *sync)
0702 {
0703     test_memslot_unmap_loop_common(data, sync, 1);
0704 }
0705
0706 static void test_memslot_unmap_loop_chunked(struct vm_data *data,
0707                         struct sync_area *sync)
0708 {
0709     test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
0710 }
0711
0712 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
0713 {
0714     uint64_t gptr;
0715
0716     for (gptr = MEM_TEST_GPA + 4096 / 2;
0717          gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
0718         *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
0719
0720     host_perform_sync(sync);
0721
0722     for (gptr = MEM_TEST_GPA;
0723          gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
0724         uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
0725         uint64_t val = *vptr;
0726
0727         TEST_ASSERT(val == MEM_TEST_VAL_1,
0728                 "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
0729                 val, gptr);
0730         *vptr = 0;
0731     }
0732
0733     host_perform_sync(sync);
0734 }
0735
0736 struct test_data {
0737     const char *name;
0738     uint64_t mem_size;
0739     void (*guest_code)(void);
0740     bool (*prepare)(struct vm_data *data, struct sync_area *sync,
0741             uint64_t *maxslots);
0742     void (*loop)(struct vm_data *data, struct sync_area *sync);
0743 };
0744
0745 static bool test_execute(int nslots, uint64_t *maxslots,
0746              unsigned int maxtime,
0747              const struct test_data *tdata,
0748              uint64_t *nloops,
0749              struct timespec *slot_runtime,
0750              struct timespec *guest_runtime)
0751 {
0752     uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
0753     struct vm_data *data;
0754     struct sync_area *sync;
0755     struct timespec tstart;
0756     bool ret = true;
0757
0758     data = alloc_vm();
0759     if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
0760             mem_size, slot_runtime)) {
0761         ret = false;
0762         goto exit_free;
0763     }
0764
0765     sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
0766
0767     if (tdata->prepare &&
0768         !tdata->prepare(data, sync, maxslots)) {
0769         ret = false;
0770         goto exit_free;
0771     }
0772
0773     launch_vm(data);
0774
0775     clock_gettime(CLOCK_MONOTONIC, &tstart);
0776     let_guest_run(sync);
0777
0778     while (1) {
0779         *guest_runtime = timespec_elapsed(tstart);
0780         if (guest_runtime->tv_sec >= maxtime)
0781             break;
0782
0783         tdata->loop(data, sync);
0784
0785         (*nloops)++;
0786     }
0787
0788     make_guest_exit(sync);
0789     wait_guest_exit(data);
0790
0791 exit_free:
0792     free_vm(data);
0793
0794     return ret;
0795 }
0796
0797 static const struct test_data tests[] = {
0798     {
0799         .name = "map",
0800         .mem_size = MEM_SIZE_MAP_PAGES,
0801         .guest_code = guest_code_test_memslot_map,
0802         .loop = test_memslot_map_loop,
0803     },
0804     {
0805         .name = "unmap",
0806         .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
0807         .guest_code = guest_code_test_memslot_unmap,
0808         .loop = test_memslot_unmap_loop,
0809     },
0810     {
0811         .name = "unmap chunked",
0812         .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
0813         .guest_code = guest_code_test_memslot_unmap,
0814         .loop = test_memslot_unmap_loop_chunked,
0815     },
0816     {
0817         .name = "move active area",
0818         .guest_code = guest_code_test_memslot_move,
0819         .prepare = test_memslot_move_prepare_active,
0820         .loop = test_memslot_move_loop,
0821     },
0822     {
0823         .name = "move inactive area",
0824         .guest_code = guest_code_test_memslot_move,
0825         .prepare = test_memslot_move_prepare_inactive,
0826         .loop = test_memslot_move_loop,
0827     },
0828     {
0829         .name = "RW",
0830         .guest_code = guest_code_test_memslot_rw,
0831         .loop = test_memslot_rw_loop
0832     },
0833 };
0834
0835 #define NTESTS ARRAY_SIZE(tests)
0836
0837 struct test_args {
0838     int tfirst;
0839     int tlast;
0840     int nslots;
0841     int seconds;
0842     int runs;
0843 };
0844
0845 static void help(char *name, struct test_args *targs)
0846 {
0847     int ctr;
0848
0849     pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
0850         name);
0851     pr_info(" -h: print this help screen.\n");
0852     pr_info(" -v: enable verbose mode (not for benchmarking).\n");
0853     pr_info(" -d: enable extra debug checks.\n");
0854     pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
0855         targs->nslots);
0856     pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
0857         targs->tfirst, NTESTS - 1);
0858     pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
0859         targs->tlast, NTESTS - 1);
0860     pr_info(" -l: specify the test length in seconds (currently: %i)\n",
0861         targs->seconds);
0862     pr_info(" -r: specify the number of runs per test (currently: %i)\n",
0863         targs->runs);
0864
0865     pr_info("\nAvailable tests:\n");
0866     for (ctr = 0; ctr < NTESTS; ctr++)
0867         pr_info("%d: %s\n", ctr, tests[ctr].name);
0868 }
0869
0870 static bool parse_args(int argc, char *argv[],
0871                struct test_args *targs)
0872 {
0873     int opt;
0874
0875     while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
0876         switch (opt) {
0877         case 'h':
0878         default:
0879             help(argv[0], targs);
0880             return false;
0881         case 'v':
0882             verbose = true;
0883             break;
0884         case 'd':
0885             map_unmap_verify = true;
0886             break;
0887         case 's':
0888             targs->nslots = atoi(optarg);
0889             if (targs->nslots <= 0 && targs->nslots != -1) {
0890                 pr_info("Slot count cap has to be positive or -1 for no cap\n");
0891                 return false;
0892             }
0893             break;
0894         case 'f':
0895             targs->tfirst = atoi(optarg);
0896             if (targs->tfirst < 0) {
0897                 pr_info("First test to run has to be non-negative\n");
0898                 return false;
0899             }
0900             break;
0901         case 'e':
0902             targs->tlast = atoi(optarg);
0903             if (targs->tlast < 0 || targs->tlast >= NTESTS) {
0904                 pr_info("Last test to run has to be non-negative and less than %zu\n",
0905                     NTESTS);
0906                 return false;
0907             }
0908             break;
0909         case 'l':
0910             targs->seconds = atoi(optarg);
0911             if (targs->seconds < 0) {
0912                 pr_info("Test length in seconds has to be non-negative\n");
0913                 return false;
0914             }
0915             break;
0916         case 'r':
0917             targs->runs = atoi(optarg);
0918             if (targs->runs <= 0) {
0919                 pr_info("Runs per test has to be positive\n");
0920                 return false;
0921             }
0922             break;
0923         }
0924     }
0925
0926     if (optind < argc) {
0927         help(argv[0], targs);
0928         return false;
0929     }
0930
0931     if (targs->tfirst > targs->tlast) {
0932         pr_info("First test to run cannot be greater than the last test to run\n");
0933         return false;
0934     }
0935
0936     return true;
0937 }
0938
0939 struct test_result {
0940     struct timespec slot_runtime, guest_runtime, iter_runtime;
0941     int64_t slottimens, runtimens;
0942     uint64_t nloops;
0943 };
0944
0945 static bool test_loop(const struct test_data *data,
0946               const struct test_args *targs,
0947               struct test_result *rbestslottime,
0948               struct test_result *rbestruntime)
0949 {
0950     uint64_t maxslots;
0951     struct test_result result;
0952
0953     result.nloops = 0;
0954     if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
0955               &result.nloops,
0956               &result.slot_runtime, &result.guest_runtime)) {
0957         if (maxslots)
0958             pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
0959                 maxslots);
0960         else
0961             pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
0962
0963         return false;
0964     }
0965
0966     pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
0967         result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
0968         result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
0969     if (!result.nloops) {
0970         pr_info("No full loops done - too short test time or system too loaded?\n");
0971         return true;
0972     }
0973
0974     result.iter_runtime = timespec_div(result.guest_runtime,
0975                        result.nloops);
0976     pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
0977         result.nloops,
0978         result.iter_runtime.tv_sec,
0979         result.iter_runtime.tv_nsec);
0980     result.slottimens = timespec_to_ns(result.slot_runtime);
0981     result.runtimens = timespec_to_ns(result.iter_runtime);
0982
0983     /*
0984      * Only rank the slot setup time for tests using the whole test memory
0985      * area so they are comparable
0986      */
0987     if (!data->mem_size &&
0988         (!rbestslottime->slottimens ||
0989          result.slottimens < rbestslottime->slottimens))
0990         *rbestslottime = result;
0991     if (!rbestruntime->runtimens ||
0992         result.runtimens < rbestruntime->runtimens)
0993         *rbestruntime = result;
0994
0995     return true;
0996 }
0997
0998 int main(int argc, char *argv[])
0999 {
1000     struct test_args targs = {
1001         .tfirst = 0,
1002         .tlast = NTESTS - 1,
1003         .nslots = -1,
1004         .seconds = 5,
1005         .runs = 1,
1006     };
1007     struct test_result rbestslottime;
1008     int tctr;
1009
1010     /* Tell stdout not to buffer its content */
1011     setbuf(stdout, NULL);
1012
1013     if (!parse_args(argc, argv, &targs))
1014         return -1;
1015
1016     rbestslottime.slottimens = 0;
1017     for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1018         const struct test_data *data = &tests[tctr];
1019         unsigned int runctr;
1020         struct test_result rbestruntime;
1021
1022         if (tctr > targs.tfirst)
1023             pr_info("\n");
1024
1025         pr_info("Testing %s performance with %i runs, %d seconds each\n",
1026             data->name, targs.runs, targs.seconds);
1027
1028         rbestruntime.runtimens = 0;
1029         for (runctr = 0; runctr < targs.runs; runctr++)
1030             if (!test_loop(data, &targs,
1031                        &rbestslottime, &rbestruntime))
1032                 break;
1033
1034         if (rbestruntime.runtimens)
1035             pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1036                 rbestruntime.iter_runtime.tv_sec,
1037                 rbestruntime.iter_runtime.tv_nsec,
1038                 rbestruntime.nloops);
1039     }
1040
1041     if (rbestslottime.slottimens)
1042         pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1043             rbestslottime.slot_runtime.tv_sec,
1044             rbestslottime.slot_runtime.tv_nsec);
1045
1046     return 0;
1047 }