0001
0002
0003
0004
0005
0006
0007
0008 #define _GNU_SOURCE
0009 #include "test_util.h"
0010 #include "kvm_util.h"
0011 #include "processor.h"
0012
0013 #include <assert.h>
0014 #include <sys/mman.h>
0015 #include <sys/types.h>
0016 #include <sys/stat.h>
0017 #include <unistd.h>
0018 #include <linux/kernel.h>
0019
0020 #define KVM_UTIL_MIN_PFN 2
0021
0022 static int vcpu_mmap_sz(void);
0023
0024 int open_path_or_exit(const char *path, int flags)
0025 {
0026 int fd;
0027
0028 fd = open(path, flags);
0029 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno);
0030
0031 return fd;
0032 }
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043 static int _open_kvm_dev_path_or_exit(int flags)
0044 {
0045 return open_path_or_exit(KVM_DEV_PATH, flags);
0046 }
0047
0048 int open_kvm_dev_path_or_exit(void)
0049 {
0050 return _open_kvm_dev_path_or_exit(O_RDONLY);
0051 }
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069 unsigned int kvm_check_cap(long cap)
0070 {
0071 int ret;
0072 int kvm_fd;
0073
0074 kvm_fd = open_kvm_dev_path_or_exit();
0075 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
0076 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
0077
0078 close(kvm_fd);
0079
0080 return (unsigned int)ret;
0081 }
0082
0083 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
0084 {
0085 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
0086 vm->dirty_ring_size = ring_size;
0087 }
0088
0089 static void vm_open(struct kvm_vm *vm)
0090 {
0091 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
0092
0093 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
0094
0095 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
0096 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
0097 }
0098
0099 const char *vm_guest_mode_string(uint32_t i)
0100 {
0101 static const char * const strings[] = {
0102 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
0103 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
0104 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
0105 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
0106 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
0107 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
0108 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
0109 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
0110 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
0111 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
0112 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
0113 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
0114 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
0115 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
0116 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
0117 };
0118 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
0119 "Missing new mode strings?");
0120
0121 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
0122
0123 return strings[i];
0124 }
0125
0126 const struct vm_guest_mode_params vm_guest_mode_params[] = {
0127 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
0128 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
0129 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
0130 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
0131 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
0132 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
0133 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
0134 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
0135 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
0136 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
0137 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
0138 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
0139 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
0140 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
0141 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
0142 };
0143 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
0144 "Missing new mode params?");
0145
0146 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
0147 {
0148 struct kvm_vm *vm;
0149
0150 pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
0151 vm_guest_mode_string(mode), nr_pages);
0152
0153 vm = calloc(1, sizeof(*vm));
0154 TEST_ASSERT(vm != NULL, "Insufficient Memory");
0155
0156 INIT_LIST_HEAD(&vm->vcpus);
0157 vm->regions.gpa_tree = RB_ROOT;
0158 vm->regions.hva_tree = RB_ROOT;
0159 hash_init(vm->regions.slot_hash);
0160
0161 vm->mode = mode;
0162 vm->type = 0;
0163
0164 vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
0165 vm->va_bits = vm_guest_mode_params[mode].va_bits;
0166 vm->page_size = vm_guest_mode_params[mode].page_size;
0167 vm->page_shift = vm_guest_mode_params[mode].page_shift;
0168
0169
0170 switch (vm->mode) {
0171 case VM_MODE_P52V48_4K:
0172 vm->pgtable_levels = 4;
0173 break;
0174 case VM_MODE_P52V48_64K:
0175 vm->pgtable_levels = 3;
0176 break;
0177 case VM_MODE_P48V48_4K:
0178 vm->pgtable_levels = 4;
0179 break;
0180 case VM_MODE_P48V48_64K:
0181 vm->pgtable_levels = 3;
0182 break;
0183 case VM_MODE_P40V48_4K:
0184 case VM_MODE_P36V48_4K:
0185 vm->pgtable_levels = 4;
0186 break;
0187 case VM_MODE_P40V48_64K:
0188 case VM_MODE_P36V48_64K:
0189 vm->pgtable_levels = 3;
0190 break;
0191 case VM_MODE_P48V48_16K:
0192 case VM_MODE_P40V48_16K:
0193 case VM_MODE_P36V48_16K:
0194 vm->pgtable_levels = 4;
0195 break;
0196 case VM_MODE_P36V47_16K:
0197 vm->pgtable_levels = 3;
0198 break;
0199 case VM_MODE_PXXV48_4K:
0200 #ifdef __x86_64__
0201 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
0202
0203
0204
0205
0206
0207 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
0208 "Linear address width (%d bits) not supported",
0209 vm->va_bits);
0210 pr_debug("Guest physical address width detected: %d\n",
0211 vm->pa_bits);
0212 vm->pgtable_levels = 4;
0213 vm->va_bits = 48;
0214 #else
0215 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
0216 #endif
0217 break;
0218 case VM_MODE_P47V64_4K:
0219 vm->pgtable_levels = 5;
0220 break;
0221 case VM_MODE_P44V64_4K:
0222 vm->pgtable_levels = 5;
0223 break;
0224 default:
0225 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
0226 }
0227
0228 #ifdef __aarch64__
0229 if (vm->pa_bits != 40)
0230 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
0231 #endif
0232
0233 vm_open(vm);
0234
0235
0236 vm->vpages_valid = sparsebit_alloc();
0237 sparsebit_set_num(vm->vpages_valid,
0238 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
0239 sparsebit_set_num(vm->vpages_valid,
0240 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
0241 (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
0242
0243
0244 vm->max_gfn = vm_compute_max_gfn(vm);
0245
0246
0247 vm->vpages_mapped = sparsebit_alloc();
0248 if (nr_pages != 0)
0249 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
0250 0, 0, nr_pages, 0);
0251
0252 return vm;
0253 }
0254
0255 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
0256 uint32_t nr_runnable_vcpus,
0257 uint64_t extra_mem_pages)
0258 {
0259 uint64_t nr_pages;
0260
0261 TEST_ASSERT(nr_runnable_vcpus,
0262 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n");
0263
0264 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
0265 "nr_vcpus = %d too large for host, max-vcpus = %d",
0266 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
0267
0268
0269
0270
0271
0272 nr_pages = 512;
0273
0274
0275 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
0286
0287 return vm_adjust_num_guest_pages(mode, nr_pages);
0288 }
0289
0290 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
0291 uint64_t nr_extra_pages)
0292 {
0293 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
0294 nr_extra_pages);
0295 struct kvm_vm *vm;
0296
0297 vm = ____vm_create(mode, nr_pages);
0298
0299 kvm_vm_elf_load(vm, program_invocation_name);
0300
0301 #ifdef __x86_64__
0302 vm_create_irqchip(vm);
0303 #endif
0304 return vm;
0305 }
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
0327 uint64_t extra_mem_pages,
0328 void *guest_code, struct kvm_vcpu *vcpus[])
0329 {
0330 struct kvm_vm *vm;
0331 int i;
0332
0333 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
0334
0335 vm = __vm_create(mode, nr_vcpus, extra_mem_pages);
0336
0337 for (i = 0; i < nr_vcpus; ++i)
0338 vcpus[i] = vm_vcpu_add(vm, i, guest_code);
0339
0340 return vm;
0341 }
0342
0343 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
0344 uint64_t extra_mem_pages,
0345 void *guest_code)
0346 {
0347 struct kvm_vcpu *vcpus[1];
0348 struct kvm_vm *vm;
0349
0350 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages,
0351 guest_code, vcpus);
0352
0353 *vcpu = vcpus[0];
0354 return vm;
0355 }
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369 void kvm_vm_restart(struct kvm_vm *vmp)
0370 {
0371 int ctr;
0372 struct userspace_mem_region *region;
0373
0374 vm_open(vmp);
0375 if (vmp->has_irqchip)
0376 vm_create_irqchip(vmp);
0377
0378 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
0379 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region);
0380 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
0381 " rc: %i errno: %i\n"
0382 " slot: %u flags: 0x%x\n"
0383 " guest_phys_addr: 0x%llx size: 0x%llx",
0384 ret, errno, region->region.slot,
0385 region->region.flags,
0386 region->region.guest_phys_addr,
0387 region->region.memory_size);
0388 }
0389 }
0390
0391 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
0392 uint32_t vcpu_id)
0393 {
0394 return __vm_vcpu_add(vm, vcpu_id);
0395 }
0396
0397 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
0398 {
0399 kvm_vm_restart(vm);
0400
0401 return vm_vcpu_recreate(vm, 0);
0402 }
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423 static struct userspace_mem_region *
0424 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
0425 {
0426 struct rb_node *node;
0427
0428 for (node = vm->regions.gpa_tree.rb_node; node; ) {
0429 struct userspace_mem_region *region =
0430 container_of(node, struct userspace_mem_region, gpa_node);
0431 uint64_t existing_start = region->region.guest_phys_addr;
0432 uint64_t existing_end = region->region.guest_phys_addr
0433 + region->region.memory_size - 1;
0434 if (start <= existing_end && end >= existing_start)
0435 return region;
0436
0437 if (start < existing_start)
0438 node = node->rb_left;
0439 else
0440 node = node->rb_right;
0441 }
0442
0443 return NULL;
0444 }
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462 struct kvm_userspace_memory_region *
0463 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
0464 uint64_t end)
0465 {
0466 struct userspace_mem_region *region;
0467
0468 region = userspace_mem_region_find(vm, start, end);
0469 if (!region)
0470 return NULL;
0471
0472 return ®ion->region;
0473 }
0474
0475 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
0476 {
0477
0478 }
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
0493 {
0494 int ret;
0495
0496 if (vcpu->dirty_gfns) {
0497 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
0498 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
0499 vcpu->dirty_gfns = NULL;
0500 }
0501
0502 ret = munmap(vcpu->run, vcpu_mmap_sz());
0503 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
0504
0505 ret = close(vcpu->fd);
0506 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
0507
0508 list_del(&vcpu->list);
0509
0510 vcpu_arch_free(vcpu);
0511 free(vcpu);
0512 }
0513
0514 void kvm_vm_release(struct kvm_vm *vmp)
0515 {
0516 struct kvm_vcpu *vcpu, *tmp;
0517 int ret;
0518
0519 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
0520 vm_vcpu_rm(vmp, vcpu);
0521
0522 ret = close(vmp->fd);
0523 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
0524
0525 ret = close(vmp->kvm_fd);
0526 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
0527 }
0528
0529 static void __vm_mem_region_delete(struct kvm_vm *vm,
0530 struct userspace_mem_region *region,
0531 bool unlink)
0532 {
0533 int ret;
0534
0535 if (unlink) {
0536 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree);
0537 rb_erase(®ion->hva_node, &vm->regions.hva_tree);
0538 hash_del(®ion->slot_node);
0539 }
0540
0541 region->region.memory_size = 0;
0542 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
0543
0544 sparsebit_free(®ion->unused_phy_pages);
0545 ret = munmap(region->mmap_start, region->mmap_size);
0546 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
0547
0548 free(region);
0549 }
0550
0551
0552
0553
0554 void kvm_vm_free(struct kvm_vm *vmp)
0555 {
0556 int ctr;
0557 struct hlist_node *node;
0558 struct userspace_mem_region *region;
0559
0560 if (vmp == NULL)
0561 return;
0562
0563
0564 if (vmp->stats_fd) {
0565 free(vmp->stats_desc);
0566 close(vmp->stats_fd);
0567 }
0568
0569
0570 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
0571 __vm_mem_region_delete(vmp, region, false);
0572
0573
0574 sparsebit_free(&vmp->vpages_valid);
0575 sparsebit_free(&vmp->vpages_mapped);
0576
0577 kvm_vm_release(vmp);
0578
0579
0580 free(vmp);
0581 }
0582
0583 int kvm_memfd_alloc(size_t size, bool hugepages)
0584 {
0585 int memfd_flags = MFD_CLOEXEC;
0586 int fd, r;
0587
0588 if (hugepages)
0589 memfd_flags |= MFD_HUGETLB;
0590
0591 fd = memfd_create("kvm_selftest", memfd_flags);
0592 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
0593
0594 r = ftruncate(fd, size);
0595 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
0596
0597 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
0598 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
0599
0600 return fd;
0601 }
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624
0625
0626 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
0627 {
0628 size_t amt;
0629
0630
0631
0632
0633
0634 for (uintptr_t offset = 0; offset < len; offset += amt) {
0635 uintptr_t ptr1 = (uintptr_t)hva + offset;
0636
0637
0638
0639
0640
0641 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
0642
0643
0644
0645
0646
0647 amt = len - offset;
0648 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
0649 amt = vm->page_size - (ptr1 % vm->page_size);
0650 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
0651 amt = vm->page_size - (ptr2 % vm->page_size);
0652
0653 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
0654 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
0655
0656
0657
0658
0659
0660
0661 int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
0662 if (ret != 0)
0663 return ret;
0664 }
0665
0666
0667
0668
0669
0670 return 0;
0671 }
0672
0673 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
0674 struct userspace_mem_region *region)
0675 {
0676 struct rb_node **cur, *parent;
0677
0678 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
0679 struct userspace_mem_region *cregion;
0680
0681 cregion = container_of(*cur, typeof(*cregion), gpa_node);
0682 parent = *cur;
0683 if (region->region.guest_phys_addr <
0684 cregion->region.guest_phys_addr)
0685 cur = &(*cur)->rb_left;
0686 else {
0687 TEST_ASSERT(region->region.guest_phys_addr !=
0688 cregion->region.guest_phys_addr,
0689 "Duplicate GPA in region tree");
0690
0691 cur = &(*cur)->rb_right;
0692 }
0693 }
0694
0695 rb_link_node(®ion->gpa_node, parent, cur);
0696 rb_insert_color(®ion->gpa_node, gpa_tree);
0697 }
0698
0699 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
0700 struct userspace_mem_region *region)
0701 {
0702 struct rb_node **cur, *parent;
0703
0704 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
0705 struct userspace_mem_region *cregion;
0706
0707 cregion = container_of(*cur, typeof(*cregion), hva_node);
0708 parent = *cur;
0709 if (region->host_mem < cregion->host_mem)
0710 cur = &(*cur)->rb_left;
0711 else {
0712 TEST_ASSERT(region->host_mem !=
0713 cregion->host_mem,
0714 "Duplicate HVA in region tree");
0715
0716 cur = &(*cur)->rb_right;
0717 }
0718 }
0719
0720 rb_link_node(®ion->hva_node, parent, cur);
0721 rb_insert_color(®ion->hva_node, hva_tree);
0722 }
0723
0724
0725 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
0726 uint64_t gpa, uint64_t size, void *hva)
0727 {
0728 struct kvm_userspace_memory_region region = {
0729 .slot = slot,
0730 .flags = flags,
0731 .guest_phys_addr = gpa,
0732 .memory_size = size,
0733 .userspace_addr = (uintptr_t)hva,
0734 };
0735
0736 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion);
0737 }
0738
0739 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
0740 uint64_t gpa, uint64_t size, void *hva)
0741 {
0742 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
0743
0744 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
0745 errno, strerror(errno));
0746 }
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770 void vm_userspace_mem_region_add(struct kvm_vm *vm,
0771 enum vm_mem_backing_src_type src_type,
0772 uint64_t guest_paddr, uint32_t slot, uint64_t npages,
0773 uint32_t flags)
0774 {
0775 int ret;
0776 struct userspace_mem_region *region;
0777 size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
0778 size_t alignment;
0779
0780 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
0781 "Number of guest pages is not compatible with the host. "
0782 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
0783
0784 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
0785 "address not on a page boundary.\n"
0786 " guest_paddr: 0x%lx vm->page_size: 0x%x",
0787 guest_paddr, vm->page_size);
0788 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
0789 <= vm->max_gfn, "Physical range beyond maximum "
0790 "supported physical address,\n"
0791 " guest_paddr: 0x%lx npages: 0x%lx\n"
0792 " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
0793 guest_paddr, npages, vm->max_gfn, vm->page_size);
0794
0795
0796
0797
0798
0799 region = (struct userspace_mem_region *) userspace_mem_region_find(
0800 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
0801 if (region != NULL)
0802 TEST_FAIL("overlapping userspace_mem_region already "
0803 "exists\n"
0804 " requested guest_paddr: 0x%lx npages: 0x%lx "
0805 "page_size: 0x%x\n"
0806 " existing guest_paddr: 0x%lx size: 0x%lx",
0807 guest_paddr, npages, vm->page_size,
0808 (uint64_t) region->region.guest_phys_addr,
0809 (uint64_t) region->region.memory_size);
0810
0811
0812 hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
0813 slot) {
0814 if (region->region.slot != slot)
0815 continue;
0816
0817 TEST_FAIL("A mem region with the requested slot "
0818 "already exists.\n"
0819 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
0820 " existing slot: %u paddr: 0x%lx size: 0x%lx",
0821 slot, guest_paddr, npages,
0822 region->region.slot,
0823 (uint64_t) region->region.guest_phys_addr,
0824 (uint64_t) region->region.memory_size);
0825 }
0826
0827
0828 region = calloc(1, sizeof(*region));
0829 TEST_ASSERT(region != NULL, "Insufficient Memory");
0830 region->mmap_size = npages * vm->page_size;
0831
0832 #ifdef __s390x__
0833
0834 alignment = 0x100000;
0835 #else
0836 alignment = 1;
0837 #endif
0838
0839
0840
0841
0842
0843
0844
0845 if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
0846 alignment = max(backing_src_pagesz, alignment);
0847
0848 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
0849
0850
0851 if (alignment > 1)
0852 region->mmap_size += alignment;
0853
0854 region->fd = -1;
0855 if (backing_src_is_shared(src_type))
0856 region->fd = kvm_memfd_alloc(region->mmap_size,
0857 src_type == VM_MEM_SRC_SHARED_HUGETLB);
0858
0859 region->mmap_start = mmap(NULL, region->mmap_size,
0860 PROT_READ | PROT_WRITE,
0861 vm_mem_backing_src_alias(src_type)->flag,
0862 region->fd, 0);
0863 TEST_ASSERT(region->mmap_start != MAP_FAILED,
0864 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
0865
0866 TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
0867 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
0868 "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
0869 region->mmap_start, backing_src_pagesz);
0870
0871
0872 region->host_mem = align_ptr_up(region->mmap_start, alignment);
0873
0874
0875 if ((src_type == VM_MEM_SRC_ANONYMOUS ||
0876 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
0877 ret = madvise(region->host_mem, npages * vm->page_size,
0878 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
0879 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
0880 region->host_mem, npages * vm->page_size,
0881 vm_mem_backing_src_alias(src_type)->name);
0882 }
0883
0884 region->unused_phy_pages = sparsebit_alloc();
0885 sparsebit_set_num(region->unused_phy_pages,
0886 guest_paddr >> vm->page_shift, npages);
0887 region->region.slot = slot;
0888 region->region.flags = flags;
0889 region->region.guest_phys_addr = guest_paddr;
0890 region->region.memory_size = npages * vm->page_size;
0891 region->region.userspace_addr = (uintptr_t) region->host_mem;
0892 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
0893 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
0894 " rc: %i errno: %i\n"
0895 " slot: %u flags: 0x%x\n"
0896 " guest_phys_addr: 0x%lx size: 0x%lx",
0897 ret, errno, slot, flags,
0898 guest_paddr, (uint64_t) region->region.memory_size);
0899
0900
0901 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
0902 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
0903 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot);
0904
0905
0906 if (region->fd >= 0) {
0907 region->mmap_alias = mmap(NULL, region->mmap_size,
0908 PROT_READ | PROT_WRITE,
0909 vm_mem_backing_src_alias(src_type)->flag,
0910 region->fd, 0);
0911 TEST_ASSERT(region->mmap_alias != MAP_FAILED,
0912 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
0913
0914
0915 region->host_alias = align_ptr_up(region->mmap_alias, alignment);
0916 }
0917 }
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934 struct userspace_mem_region *
0935 memslot2region(struct kvm_vm *vm, uint32_t memslot)
0936 {
0937 struct userspace_mem_region *region;
0938
0939 hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
0940 memslot)
0941 if (region->region.slot == memslot)
0942 return region;
0943
0944 fprintf(stderr, "No mem region with the requested slot found,\n"
0945 " requested slot: %u\n", memslot);
0946 fputs("---- vm dump ----\n", stderr);
0947 vm_dump(stderr, vm, 2);
0948 TEST_FAIL("Mem region not found");
0949 return NULL;
0950 }
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
0967 {
0968 int ret;
0969 struct userspace_mem_region *region;
0970
0971 region = memslot2region(vm, slot);
0972
0973 region->region.flags = flags;
0974
0975 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
0976
0977 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
0978 " rc: %i errno: %i slot: %u flags: 0x%x",
0979 ret, errno, slot, flags);
0980 }
0981
0982
0983
0984
0985
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995
0996 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
0997 {
0998 struct userspace_mem_region *region;
0999 int ret;
1000
1001 region = memslot2region(vm, slot);
1002
1003 region->region.guest_phys_addr = new_gpa;
1004
1005 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
1006
1007 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
1008 "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
1009 ret, errno, slot, new_gpa);
1010 }
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
1026 {
1027 __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
1028 }
1029
1030
1031 static int vcpu_mmap_sz(void)
1032 {
1033 int dev_fd, ret;
1034
1035 dev_fd = open_kvm_dev_path_or_exit();
1036
1037 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
1038 TEST_ASSERT(ret >= sizeof(struct kvm_run),
1039 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
1040
1041 close(dev_fd);
1042
1043 return ret;
1044 }
1045
1046 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
1047 {
1048 struct kvm_vcpu *vcpu;
1049
1050 list_for_each_entry(vcpu, &vm->vcpus, list) {
1051 if (vcpu->id == vcpu_id)
1052 return true;
1053 }
1054
1055 return false;
1056 }
1057
1058
1059
1060
1061
1062 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
1063 {
1064 struct kvm_vcpu *vcpu;
1065
1066
1067 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id);
1068
1069
1070 vcpu = calloc(1, sizeof(*vcpu));
1071 TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
1072
1073 vcpu->vm = vm;
1074 vcpu->id = vcpu_id;
1075 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
1076 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd));
1077
1078 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
1079 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
1080 vcpu_mmap_sz(), sizeof(*vcpu->run));
1081 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
1082 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
1083 TEST_ASSERT(vcpu->run != MAP_FAILED,
1084 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
1085
1086
1087 list_add(&vcpu->list, &vm->vcpus);
1088
1089 return vcpu;
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
1113 vm_vaddr_t vaddr_min)
1114 {
1115 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
1116
1117
1118 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
1119 if ((pgidx_start * vm->page_size) < vaddr_min)
1120 goto no_va_found;
1121
1122
1123 if (!sparsebit_is_set_num(vm->vpages_valid,
1124 pgidx_start, pages))
1125 pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
1126 pgidx_start, pages);
1127 do {
1128
1129
1130
1131
1132
1133
1134 if (sparsebit_is_clear_num(vm->vpages_mapped,
1135 pgidx_start, pages))
1136 goto va_found;
1137 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
1138 pgidx_start, pages);
1139 if (pgidx_start == 0)
1140 goto no_va_found;
1141
1142
1143
1144
1145
1146 if (!sparsebit_is_set_num(vm->vpages_valid,
1147 pgidx_start, pages)) {
1148 pgidx_start = sparsebit_next_set_num(
1149 vm->vpages_valid, pgidx_start, pages);
1150 if (pgidx_start == 0)
1151 goto no_va_found;
1152 }
1153 } while (pgidx_start != 0);
1154
1155 no_va_found:
1156 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
1157
1158
1159 return -1;
1160
1161 va_found:
1162 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
1163 pgidx_start, pages),
1164 "Unexpected, invalid virtual page index range,\n"
1165 " pgidx_start: 0x%lx\n"
1166 " pages: 0x%lx",
1167 pgidx_start, pages);
1168 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
1169 pgidx_start, pages),
1170 "Unexpected, pages already mapped,\n"
1171 " pgidx_start: 0x%lx\n"
1172 " pages: 0x%lx",
1173 pgidx_start, pages);
1174
1175 return pgidx_start * vm->page_size;
1176 }
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
1198 {
1199 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1200
1201 virt_pgd_alloc(vm);
1202 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
1203 KVM_UTIL_MIN_PFN * vm->page_size, 0);
1204
1205
1206
1207
1208
1209 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1210
1211
1212 for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1213 pages--, vaddr += vm->page_size, paddr += vm->page_size) {
1214
1215 virt_pg_map(vm, vaddr, paddr);
1216
1217 sparsebit_set(vm->vpages_mapped,
1218 vaddr >> vm->page_shift);
1219 }
1220
1221 return vaddr_start;
1222 }
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
1239 {
1240 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
1241 }
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
1258 {
1259 return vm_vaddr_alloc_pages(vm, 1);
1260 }
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1279 unsigned int npages)
1280 {
1281 size_t page_size = vm->page_size;
1282 size_t size = npages * page_size;
1283
1284 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1285 TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1286
1287 while (npages--) {
1288 virt_pg_map(vm, vaddr, paddr);
1289 vaddr += page_size;
1290 paddr += page_size;
1291 }
1292 }
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1312 {
1313 struct userspace_mem_region *region;
1314
1315 region = userspace_mem_region_find(vm, gpa, gpa);
1316 if (!region) {
1317 TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1318 return NULL;
1319 }
1320
1321 return (void *)((uintptr_t)region->host_mem
1322 + (gpa - region->region.guest_phys_addr));
1323 }
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1343 {
1344 struct rb_node *node;
1345
1346 for (node = vm->regions.hva_tree.rb_node; node; ) {
1347 struct userspace_mem_region *region =
1348 container_of(node, struct userspace_mem_region, hva_node);
1349
1350 if (hva >= region->host_mem) {
1351 if (hva <= (region->host_mem
1352 + region->region.memory_size - 1))
1353 return (vm_paddr_t)((uintptr_t)
1354 region->region.guest_phys_addr
1355 + (hva - (uintptr_t)region->host_mem));
1356
1357 node = node->rb_right;
1358 } else
1359 node = node->rb_left;
1360 }
1361
1362 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1363 return -1;
1364 }
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
1386 {
1387 struct userspace_mem_region *region;
1388 uintptr_t offset;
1389
1390 region = userspace_mem_region_find(vm, gpa, gpa);
1391 if (!region)
1392 return NULL;
1393
1394 if (!region->host_alias)
1395 return NULL;
1396
1397 offset = gpa - region->region.guest_phys_addr;
1398 return (void *) ((uintptr_t) region->host_alias + offset);
1399 }
1400
1401
1402 void vm_create_irqchip(struct kvm_vm *vm)
1403 {
1404 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
1405
1406 vm->has_irqchip = true;
1407 }
1408
1409 int _vcpu_run(struct kvm_vcpu *vcpu)
1410 {
1411 int rc;
1412
1413 do {
1414 rc = __vcpu_run(vcpu);
1415 } while (rc == -1 && errno == EINTR);
1416
1417 assert_on_unhandled_exception(vcpu);
1418
1419 return rc;
1420 }
1421
1422
1423
1424
1425
1426 void vcpu_run(struct kvm_vcpu *vcpu)
1427 {
1428 int ret = _vcpu_run(vcpu);
1429
1430 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
1431 }
1432
1433 void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
1434 {
1435 int ret;
1436
1437 vcpu->run->immediate_exit = 1;
1438 ret = __vcpu_run(vcpu);
1439 vcpu->run->immediate_exit = 0;
1440
1441 TEST_ASSERT(ret == -1 && errno == EINTR,
1442 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1443 ret, errno);
1444 }
1445
1446
1447
1448
1449
1450
1451 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
1452 {
1453 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1454 int ret;
1455
1456 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n);
1457 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1458
1459 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1460 reg_list->n = reg_list_n.n;
1461 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
1462 return reg_list;
1463 }
1464
1465 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
1466 {
1467 uint32_t page_size = vcpu->vm->page_size;
1468 uint32_t size = vcpu->vm->dirty_ring_size;
1469
1470 TEST_ASSERT(size > 0, "Should enable dirty ring first");
1471
1472 if (!vcpu->dirty_gfns) {
1473 void *addr;
1474
1475 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
1476 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1477 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
1478
1479 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
1480 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1481 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
1482
1483 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
1484 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1485 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
1486
1487 vcpu->dirty_gfns = addr;
1488 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
1489 }
1490
1491 return vcpu->dirty_gfns;
1492 }
1493
1494
1495
1496
1497
1498 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
1499 {
1500 struct kvm_device_attr attribute = {
1501 .group = group,
1502 .attr = attr,
1503 .flags = 0,
1504 };
1505
1506 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
1507 }
1508
1509 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
1510 {
1511 struct kvm_create_device create_dev = {
1512 .type = type,
1513 .flags = KVM_CREATE_DEVICE_TEST,
1514 };
1515
1516 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
1517 }
1518
1519 int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
1520 {
1521 struct kvm_create_device create_dev = {
1522 .type = type,
1523 .fd = -1,
1524 .flags = 0,
1525 };
1526 int err;
1527
1528 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
1529 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
1530 return err ? : create_dev.fd;
1531 }
1532
1533 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
1534 {
1535 struct kvm_device_attr kvmattr = {
1536 .group = group,
1537 .attr = attr,
1538 .flags = 0,
1539 .addr = (uintptr_t)val,
1540 };
1541
1542 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
1543 }
1544
1545 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
1546 {
1547 struct kvm_device_attr kvmattr = {
1548 .group = group,
1549 .attr = attr,
1550 .flags = 0,
1551 .addr = (uintptr_t)val,
1552 };
1553
1554 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
1555 }
1556
1557
1558
1559
1560
1561 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
1562 {
1563 struct kvm_irq_level irq_level = {
1564 .irq = irq,
1565 .level = level,
1566 };
1567
1568 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
1569 }
1570
1571 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
1572 {
1573 int ret = _kvm_irq_line(vm, irq, level);
1574
1575 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
1576 }
1577
1578 struct kvm_irq_routing *kvm_gsi_routing_create(void)
1579 {
1580 struct kvm_irq_routing *routing;
1581 size_t size;
1582
1583 size = sizeof(struct kvm_irq_routing);
1584
1585 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
1586 routing = calloc(1, size);
1587 assert(routing);
1588
1589 return routing;
1590 }
1591
1592 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
1593 uint32_t gsi, uint32_t pin)
1594 {
1595 int i;
1596
1597 assert(routing);
1598 assert(routing->nr < KVM_MAX_IRQ_ROUTES);
1599
1600 i = routing->nr;
1601 routing->entries[i].gsi = gsi;
1602 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
1603 routing->entries[i].flags = 0;
1604 routing->entries[i].u.irqchip.irqchip = 0;
1605 routing->entries[i].u.irqchip.pin = pin;
1606 routing->nr++;
1607 }
1608
1609 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
1610 {
1611 int ret;
1612
1613 assert(routing);
1614 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
1615 free(routing);
1616
1617 return ret;
1618 }
1619
1620 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
1621 {
1622 int ret;
1623
1624 ret = _kvm_gsi_routing_write(vm, routing);
1625 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
1626 }
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1644 {
1645 int ctr;
1646 struct userspace_mem_region *region;
1647 struct kvm_vcpu *vcpu;
1648
1649 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1650 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1651 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1652 fprintf(stream, "%*sMem Regions:\n", indent, "");
1653 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
1654 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1655 "host_virt: %p\n", indent + 2, "",
1656 (uint64_t) region->region.guest_phys_addr,
1657 (uint64_t) region->region.memory_size,
1658 region->host_mem);
1659 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1660 sparsebit_dump(stream, region->unused_phy_pages, 0);
1661 }
1662 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1663 sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1664 fprintf(stream, "%*spgd_created: %u\n", indent, "",
1665 vm->pgd_created);
1666 if (vm->pgd_created) {
1667 fprintf(stream, "%*sVirtual Translation Tables:\n",
1668 indent + 2, "");
1669 virt_dump(stream, vm, indent + 4);
1670 }
1671 fprintf(stream, "%*sVCPUs:\n", indent, "");
1672
1673 list_for_each_entry(vcpu, &vm->vcpus, list)
1674 vcpu_dump(stream, vcpu, indent + 2);
1675 }
1676
1677
1678 static struct exit_reason {
1679 unsigned int reason;
1680 const char *name;
1681 } exit_reasons_known[] = {
1682 {KVM_EXIT_UNKNOWN, "UNKNOWN"},
1683 {KVM_EXIT_EXCEPTION, "EXCEPTION"},
1684 {KVM_EXIT_IO, "IO"},
1685 {KVM_EXIT_HYPERCALL, "HYPERCALL"},
1686 {KVM_EXIT_DEBUG, "DEBUG"},
1687 {KVM_EXIT_HLT, "HLT"},
1688 {KVM_EXIT_MMIO, "MMIO"},
1689 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
1690 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
1691 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
1692 {KVM_EXIT_INTR, "INTR"},
1693 {KVM_EXIT_SET_TPR, "SET_TPR"},
1694 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
1695 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
1696 {KVM_EXIT_S390_RESET, "S390_RESET"},
1697 {KVM_EXIT_DCR, "DCR"},
1698 {KVM_EXIT_NMI, "NMI"},
1699 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
1700 {KVM_EXIT_OSI, "OSI"},
1701 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
1702 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
1703 {KVM_EXIT_X86_RDMSR, "RDMSR"},
1704 {KVM_EXIT_X86_WRMSR, "WRMSR"},
1705 {KVM_EXIT_XEN, "XEN"},
1706 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
1707 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
1708 #endif
1709 };
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726 const char *exit_reason_str(unsigned int exit_reason)
1727 {
1728 unsigned int n1;
1729
1730 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
1731 if (exit_reason == exit_reasons_known[n1].reason)
1732 return exit_reasons_known[n1].name;
1733 }
1734
1735 return "Unknown";
1736 }
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
1758 vm_paddr_t paddr_min, uint32_t memslot)
1759 {
1760 struct userspace_mem_region *region;
1761 sparsebit_idx_t pg, base;
1762
1763 TEST_ASSERT(num > 0, "Must allocate at least one page");
1764
1765 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1766 "not divisible by page size.\n"
1767 " paddr_min: 0x%lx page_size: 0x%x",
1768 paddr_min, vm->page_size);
1769
1770 region = memslot2region(vm, memslot);
1771 base = pg = paddr_min >> vm->page_shift;
1772
1773 do {
1774 for (; pg < base + num; ++pg) {
1775 if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
1776 base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
1777 break;
1778 }
1779 }
1780 } while (pg && pg != base + num);
1781
1782 if (pg == 0) {
1783 fprintf(stderr, "No guest physical page available, "
1784 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
1785 paddr_min, vm->page_size, memslot);
1786 fputs("---- vm dump ----\n", stderr);
1787 vm_dump(stderr, vm, 2);
1788 abort();
1789 }
1790
1791 for (pg = base; pg < base + num; ++pg)
1792 sparsebit_clear(region->unused_phy_pages, pg);
1793
1794 return base * vm->page_size;
1795 }
1796
1797 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
1798 uint32_t memslot)
1799 {
1800 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
1801 }
1802
1803
1804 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
1805
1806 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
1807 {
1808 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
1809 }
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
1824 {
1825 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
1826 }
1827
1828 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
1829 {
1830 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
1831 }
1832
1833 static unsigned int vm_calc_num_pages(unsigned int num_pages,
1834 unsigned int page_shift,
1835 unsigned int new_page_shift,
1836 bool ceil)
1837 {
1838 unsigned int n = 1 << (new_page_shift - page_shift);
1839
1840 if (page_shift >= new_page_shift)
1841 return num_pages * (1 << (page_shift - new_page_shift));
1842
1843 return num_pages / n + !!(ceil && num_pages % n);
1844 }
1845
1846 static inline int getpageshift(void)
1847 {
1848 return __builtin_ffs(getpagesize()) - 1;
1849 }
1850
1851 unsigned int
1852 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
1853 {
1854 return vm_calc_num_pages(num_guest_pages,
1855 vm_guest_mode_params[mode].page_shift,
1856 getpageshift(), true);
1857 }
1858
1859 unsigned int
1860 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
1861 {
1862 return vm_calc_num_pages(num_host_pages, getpageshift(),
1863 vm_guest_mode_params[mode].page_shift, false);
1864 }
1865
1866 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
1867 {
1868 unsigned int n;
1869 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
1870 return vm_adjust_num_guest_pages(mode, n);
1871 }
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888 struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
1889 struct kvm_stats_header *header)
1890 {
1891 struct kvm_stats_desc *stats_desc;
1892 ssize_t desc_size, total_size, ret;
1893
1894 desc_size = get_stats_descriptor_size(header);
1895 total_size = header->num_desc * desc_size;
1896
1897 stats_desc = calloc(header->num_desc, desc_size);
1898 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
1899
1900 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
1901 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
1902
1903 return stats_desc;
1904 }
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920 void read_stat_data(int stats_fd, struct kvm_stats_header *header,
1921 struct kvm_stats_desc *desc, uint64_t *data,
1922 size_t max_elements)
1923 {
1924 size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
1925 size_t size = nr_elements * sizeof(*data);
1926 ssize_t ret;
1927
1928 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
1929 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
1930
1931 ret = pread(stats_fd, data, size,
1932 header->data_offset + desc->offset);
1933
1934 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
1935 desc->name, errno, strerror(errno));
1936 TEST_ASSERT(ret == size,
1937 "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
1938 desc->name, size, ret);
1939 }
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
1955 size_t max_elements)
1956 {
1957 struct kvm_stats_desc *desc;
1958 size_t size_desc;
1959 int i;
1960
1961 if (!vm->stats_fd) {
1962 vm->stats_fd = vm_get_stats_fd(vm);
1963 read_stats_header(vm->stats_fd, &vm->stats_header);
1964 vm->stats_desc = read_stats_descriptors(vm->stats_fd,
1965 &vm->stats_header);
1966 }
1967
1968 size_desc = get_stats_descriptor_size(&vm->stats_header);
1969
1970 for (i = 0; i < vm->stats_header.num_desc; ++i) {
1971 desc = (void *)vm->stats_desc + (i * size_desc);
1972
1973 if (strcmp(desc->name, stat_name))
1974 continue;
1975
1976 read_stat_data(vm->stats_fd, &vm->stats_header, desc,
1977 data, max_elements);
1978
1979 break;
1980 }
1981 }