Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #define _GNU_SOURCE /* for program_invocation_short_name */
0003 #include <fcntl.h>
0004 #include <pthread.h>
0005 #include <sched.h>
0006 #include <semaphore.h>
0007 #include <signal.h>
0008 #include <stdio.h>
0009 #include <stdlib.h>
0010 #include <string.h>
0011 #include <sys/ioctl.h>
0012 #include <sys/mman.h>
0013 
0014 #include <linux/compiler.h>
0015 
0016 #include <test_util.h>
0017 #include <kvm_util.h>
0018 #include <processor.h>
0019 
0020 /*
0021  * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
0022  * 2MB sized and aligned region so that the initial region corresponds to
0023  * exactly one large page.
0024  */
0025 #define MEM_REGION_SIZE     0x200000
0026 
0027 #ifdef __x86_64__
0028 /*
0029  * Somewhat arbitrary location and slot, intended to not overlap anything.
0030  */
0031 #define MEM_REGION_GPA      0xc0000000
0032 #define MEM_REGION_SLOT     10
0033 
0034 static const uint64_t MMIO_VAL = 0xbeefull;
0035 
0036 extern const uint64_t final_rip_start;
0037 extern const uint64_t final_rip_end;
0038 
0039 static sem_t vcpu_ready;
0040 
0041 static inline uint64_t guest_spin_on_val(uint64_t spin_val)
0042 {
0043     uint64_t val;
0044 
0045     do {
0046         val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
0047     } while (val == spin_val);
0048 
0049     GUEST_SYNC(0);
0050     return val;
0051 }
0052 
0053 static void *vcpu_worker(void *data)
0054 {
0055     struct kvm_vcpu *vcpu = data;
0056     struct kvm_run *run = vcpu->run;
0057     struct ucall uc;
0058     uint64_t cmd;
0059 
0060     /*
0061      * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
0062      * which will occur if the guest attempts to access a memslot after it
0063      * has been deleted or while it is being moved .
0064      */
0065     while (1) {
0066         vcpu_run(vcpu);
0067 
0068         if (run->exit_reason == KVM_EXIT_IO) {
0069             cmd = get_ucall(vcpu, &uc);
0070             if (cmd != UCALL_SYNC)
0071                 break;
0072 
0073             sem_post(&vcpu_ready);
0074             continue;
0075         }
0076 
0077         if (run->exit_reason != KVM_EXIT_MMIO)
0078             break;
0079 
0080         TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
0081         TEST_ASSERT(run->mmio.len == 8,
0082                 "Unexpected exit mmio size = %u", run->mmio.len);
0083 
0084         TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
0085                 "Unexpected exit mmio address = 0x%llx",
0086                 run->mmio.phys_addr);
0087         memcpy(run->mmio.data, &MMIO_VAL, 8);
0088     }
0089 
0090     if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
0091         REPORT_GUEST_ASSERT_1(uc, "val = %lu");
0092 
0093     return NULL;
0094 }
0095 
0096 static void wait_for_vcpu(void)
0097 {
0098     struct timespec ts;
0099 
0100     TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
0101             "clock_gettime() failed: %d\n", errno);
0102 
0103     ts.tv_sec += 2;
0104     TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
0105             "sem_timedwait() failed: %d\n", errno);
0106 
0107     /* Wait for the vCPU thread to reenter the guest. */
0108     usleep(100000);
0109 }
0110 
0111 static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
0112                    void *guest_code)
0113 {
0114     struct kvm_vm *vm;
0115     uint64_t *hva;
0116     uint64_t gpa;
0117 
0118     vm = vm_create_with_one_vcpu(vcpu, guest_code);
0119 
0120     vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
0121                     MEM_REGION_GPA, MEM_REGION_SLOT,
0122                     MEM_REGION_SIZE / getpagesize(), 0);
0123 
0124     /*
0125      * Allocate and map two pages so that the GPA accessed by guest_code()
0126      * stays valid across the memslot move.
0127      */
0128     gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
0129     TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
0130 
0131     virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
0132 
0133     /* Ditto for the host mapping so that both pages can be zeroed. */
0134     hva = addr_gpa2hva(vm, MEM_REGION_GPA);
0135     memset(hva, 0, 2 * 4096);
0136 
0137     pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu);
0138 
0139     /* Ensure the guest thread is spun up. */
0140     wait_for_vcpu();
0141 
0142     return vm;
0143 }
0144 
0145 
0146 static void guest_code_move_memory_region(void)
0147 {
0148     uint64_t val;
0149 
0150     GUEST_SYNC(0);
0151 
0152     /*
0153      * Spin until the memory region starts getting moved to a
0154      * misaligned address.
0155      * Every region move may or may not trigger MMIO, as the
0156      * window where the memslot is invalid is usually quite small.
0157      */
0158     val = guest_spin_on_val(0);
0159     GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
0160 
0161     /* Spin until the misaligning memory region move completes. */
0162     val = guest_spin_on_val(MMIO_VAL);
0163     GUEST_ASSERT_1(val == 1 || val == 0, val);
0164 
0165     /* Spin until the memory region starts to get re-aligned. */
0166     val = guest_spin_on_val(0);
0167     GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
0168 
0169     /* Spin until the re-aligning memory region move completes. */
0170     val = guest_spin_on_val(MMIO_VAL);
0171     GUEST_ASSERT_1(val == 1, val);
0172 
0173     GUEST_DONE();
0174 }
0175 
0176 static void test_move_memory_region(void)
0177 {
0178     pthread_t vcpu_thread;
0179     struct kvm_vcpu *vcpu;
0180     struct kvm_vm *vm;
0181     uint64_t *hva;
0182 
0183     vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
0184 
0185     hva = addr_gpa2hva(vm, MEM_REGION_GPA);
0186 
0187     /*
0188      * Shift the region's base GPA.  The guest should not see "2" as the
0189      * hva->gpa translation is misaligned, i.e. the guest is accessing a
0190      * different host pfn.
0191      */
0192     vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
0193     WRITE_ONCE(*hva, 2);
0194 
0195     /*
0196      * The guest _might_ see an invalid memslot and trigger MMIO, but it's
0197      * a tiny window.  Spin and defer the sync until the memslot is
0198      * restored and guest behavior is once again deterministic.
0199      */
0200     usleep(100000);
0201 
0202     /*
0203      * Note, value in memory needs to be changed *before* restoring the
0204      * memslot, else the guest could race the update and see "2".
0205      */
0206     WRITE_ONCE(*hva, 1);
0207 
0208     /* Restore the original base, the guest should see "1". */
0209     vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
0210     wait_for_vcpu();
0211     /* Defered sync from when the memslot was misaligned (above). */
0212     wait_for_vcpu();
0213 
0214     pthread_join(vcpu_thread, NULL);
0215 
0216     kvm_vm_free(vm);
0217 }
0218 
0219 static void guest_code_delete_memory_region(void)
0220 {
0221     uint64_t val;
0222 
0223     GUEST_SYNC(0);
0224 
0225     /* Spin until the memory region is deleted. */
0226     val = guest_spin_on_val(0);
0227     GUEST_ASSERT_1(val == MMIO_VAL, val);
0228 
0229     /* Spin until the memory region is recreated. */
0230     val = guest_spin_on_val(MMIO_VAL);
0231     GUEST_ASSERT_1(val == 0, val);
0232 
0233     /* Spin until the memory region is deleted. */
0234     val = guest_spin_on_val(0);
0235     GUEST_ASSERT_1(val == MMIO_VAL, val);
0236 
0237     asm("1:\n\t"
0238         ".pushsection .rodata\n\t"
0239         ".global final_rip_start\n\t"
0240         "final_rip_start: .quad 1b\n\t"
0241         ".popsection");
0242 
0243     /* Spin indefinitely (until the code memslot is deleted). */
0244     guest_spin_on_val(MMIO_VAL);
0245 
0246     asm("1:\n\t"
0247         ".pushsection .rodata\n\t"
0248         ".global final_rip_end\n\t"
0249         "final_rip_end: .quad 1b\n\t"
0250         ".popsection");
0251 
0252     GUEST_ASSERT_1(0, 0);
0253 }
0254 
0255 static void test_delete_memory_region(void)
0256 {
0257     pthread_t vcpu_thread;
0258     struct kvm_vcpu *vcpu;
0259     struct kvm_regs regs;
0260     struct kvm_run *run;
0261     struct kvm_vm *vm;
0262 
0263     vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region);
0264 
0265     /* Delete the memory region, the guest should not die. */
0266     vm_mem_region_delete(vm, MEM_REGION_SLOT);
0267     wait_for_vcpu();
0268 
0269     /* Recreate the memory region.  The guest should see "0". */
0270     vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
0271                     MEM_REGION_GPA, MEM_REGION_SLOT,
0272                     MEM_REGION_SIZE / getpagesize(), 0);
0273     wait_for_vcpu();
0274 
0275     /* Delete the region again so that there's only one memslot left. */
0276     vm_mem_region_delete(vm, MEM_REGION_SLOT);
0277     wait_for_vcpu();
0278 
0279     /*
0280      * Delete the primary memslot.  This should cause an emulation error or
0281      * shutdown due to the page tables getting nuked.
0282      */
0283     vm_mem_region_delete(vm, 0);
0284 
0285     pthread_join(vcpu_thread, NULL);
0286 
0287     run = vcpu->run;
0288 
0289     TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
0290             run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
0291             "Unexpected exit reason = %d", run->exit_reason);
0292 
0293     vcpu_regs_get(vcpu, &regs);
0294 
0295     /*
0296      * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
0297      * so the instruction pointer would point to the reset vector.
0298      */
0299     if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
0300         TEST_ASSERT(regs.rip >= final_rip_start &&
0301                 regs.rip < final_rip_end,
0302                 "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
0303                 final_rip_start, final_rip_end, regs.rip);
0304 
0305     kvm_vm_free(vm);
0306 }
0307 
0308 static void test_zero_memory_regions(void)
0309 {
0310     struct kvm_vcpu *vcpu;
0311     struct kvm_run *run;
0312     struct kvm_vm *vm;
0313 
0314     pr_info("Testing KVM_RUN with zero added memory regions\n");
0315 
0316     vm = vm_create_barebones();
0317     vcpu = __vm_vcpu_add(vm, 0);
0318 
0319     vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul);
0320     vcpu_run(vcpu);
0321 
0322     run = vcpu->run;
0323     TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
0324             "Unexpected exit_reason = %u\n", run->exit_reason);
0325 
0326     kvm_vm_free(vm);
0327 }
0328 #endif /* __x86_64__ */
0329 
0330 /*
0331  * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
0332  * tentative to add further slots should fail.
0333  */
0334 static void test_add_max_memory_regions(void)
0335 {
0336     int ret;
0337     struct kvm_vm *vm;
0338     uint32_t max_mem_slots;
0339     uint32_t slot;
0340     void *mem, *mem_aligned, *mem_extra;
0341     size_t alignment;
0342 
0343 #ifdef __s390x__
0344     /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
0345     alignment = 0x100000;
0346 #else
0347     alignment = 1;
0348 #endif
0349 
0350     max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
0351     TEST_ASSERT(max_mem_slots > 0,
0352             "KVM_CAP_NR_MEMSLOTS should be greater than 0");
0353     pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
0354 
0355     vm = vm_create_barebones();
0356 
0357     /* Check it can be added memory slots up to the maximum allowed */
0358     pr_info("Adding slots 0..%i, each memory region with %dK size\n",
0359         (max_mem_slots - 1), MEM_REGION_SIZE >> 10);
0360 
0361     mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
0362            PROT_READ | PROT_WRITE,
0363            MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
0364     TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
0365     mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
0366 
0367     for (slot = 0; slot < max_mem_slots; slot++)
0368         vm_set_user_memory_region(vm, slot, 0,
0369                       ((uint64_t)slot * MEM_REGION_SIZE),
0370                       MEM_REGION_SIZE,
0371                       mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
0372 
0373     /* Check it cannot be added memory slots beyond the limit */
0374     mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
0375              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
0376     TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
0377 
0378     ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
0379                       (uint64_t)max_mem_slots * MEM_REGION_SIZE,
0380                       MEM_REGION_SIZE, mem_extra);
0381     TEST_ASSERT(ret == -1 && errno == EINVAL,
0382             "Adding one more memory slot should fail with EINVAL");
0383 
0384     munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
0385     munmap(mem_extra, MEM_REGION_SIZE);
0386     kvm_vm_free(vm);
0387 }
0388 
0389 int main(int argc, char *argv[])
0390 {
0391 #ifdef __x86_64__
0392     int i, loops;
0393 #endif
0394 
0395     /* Tell stdout not to buffer its content */
0396     setbuf(stdout, NULL);
0397 
0398 #ifdef __x86_64__
0399     /*
0400      * FIXME: the zero-memslot test fails on aarch64 and s390x because
0401      * KVM_RUN fails with ENOEXEC or EFAULT.
0402      */
0403     test_zero_memory_regions();
0404 #endif
0405 
0406     test_add_max_memory_regions();
0407 
0408 #ifdef __x86_64__
0409     if (argc > 1)
0410         loops = atoi(argv[1]);
0411     else
0412         loops = 10;
0413 
0414     pr_info("Testing MOVE of in-use region, %d loops\n", loops);
0415     for (i = 0; i < loops; i++)
0416         test_move_memory_region();
0417 
0418     pr_info("Testing DELETE of in-use region, %d loops\n", loops);
0419     for (i = 0; i < loops; i++)
0420         test_delete_memory_region();
0421 #endif
0422 
0423     return 0;
0424 }