Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * tools/testing/selftests/kvm/lib/x86_64/processor.c
0004  *
0005  * Copyright (C) 2018, Google LLC.
0006  */
0007 
0008 #include "test_util.h"
0009 #include "kvm_util.h"
0010 #include "processor.h"
0011 
0012 #ifndef NUM_INTERRUPTS
0013 #define NUM_INTERRUPTS 256
0014 #endif
0015 
0016 #define DEFAULT_CODE_SELECTOR 0x8
0017 #define DEFAULT_DATA_SELECTOR 0x10
0018 
0019 #define MAX_NR_CPUID_ENTRIES 100
0020 
0021 vm_vaddr_t exception_handlers;
0022 
0023 static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
0024 {
0025     fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
0026         "rcx: 0x%.16llx rdx: 0x%.16llx\n",
0027         indent, "",
0028         regs->rax, regs->rbx, regs->rcx, regs->rdx);
0029     fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
0030         "rsp: 0x%.16llx rbp: 0x%.16llx\n",
0031         indent, "",
0032         regs->rsi, regs->rdi, regs->rsp, regs->rbp);
0033     fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
0034         "r10: 0x%.16llx r11: 0x%.16llx\n",
0035         indent, "",
0036         regs->r8, regs->r9, regs->r10, regs->r11);
0037     fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
0038         "r14: 0x%.16llx r15: 0x%.16llx\n",
0039         indent, "",
0040         regs->r12, regs->r13, regs->r14, regs->r15);
0041     fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
0042         indent, "",
0043         regs->rip, regs->rflags);
0044 }
0045 
0046 static void segment_dump(FILE *stream, struct kvm_segment *segment,
0047              uint8_t indent)
0048 {
0049     fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
0050         "selector: 0x%.4x type: 0x%.2x\n",
0051         indent, "", segment->base, segment->limit,
0052         segment->selector, segment->type);
0053     fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
0054         "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
0055         indent, "", segment->present, segment->dpl,
0056         segment->db, segment->s, segment->l);
0057     fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
0058         "unusable: 0x%.2x padding: 0x%.2x\n",
0059         indent, "", segment->g, segment->avl,
0060         segment->unusable, segment->padding);
0061 }
0062 
0063 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
0064             uint8_t indent)
0065 {
0066     fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
0067         "padding: 0x%.4x 0x%.4x 0x%.4x\n",
0068         indent, "", dtable->base, dtable->limit,
0069         dtable->padding[0], dtable->padding[1], dtable->padding[2]);
0070 }
0071 
0072 static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
0073 {
0074     unsigned int i;
0075 
0076     fprintf(stream, "%*scs:\n", indent, "");
0077     segment_dump(stream, &sregs->cs, indent + 2);
0078     fprintf(stream, "%*sds:\n", indent, "");
0079     segment_dump(stream, &sregs->ds, indent + 2);
0080     fprintf(stream, "%*ses:\n", indent, "");
0081     segment_dump(stream, &sregs->es, indent + 2);
0082     fprintf(stream, "%*sfs:\n", indent, "");
0083     segment_dump(stream, &sregs->fs, indent + 2);
0084     fprintf(stream, "%*sgs:\n", indent, "");
0085     segment_dump(stream, &sregs->gs, indent + 2);
0086     fprintf(stream, "%*sss:\n", indent, "");
0087     segment_dump(stream, &sregs->ss, indent + 2);
0088     fprintf(stream, "%*str:\n", indent, "");
0089     segment_dump(stream, &sregs->tr, indent + 2);
0090     fprintf(stream, "%*sldt:\n", indent, "");
0091     segment_dump(stream, &sregs->ldt, indent + 2);
0092 
0093     fprintf(stream, "%*sgdt:\n", indent, "");
0094     dtable_dump(stream, &sregs->gdt, indent + 2);
0095     fprintf(stream, "%*sidt:\n", indent, "");
0096     dtable_dump(stream, &sregs->idt, indent + 2);
0097 
0098     fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
0099         "cr3: 0x%.16llx cr4: 0x%.16llx\n",
0100         indent, "",
0101         sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
0102     fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
0103         "apic_base: 0x%.16llx\n",
0104         indent, "",
0105         sregs->cr8, sregs->efer, sregs->apic_base);
0106 
0107     fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
0108     for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
0109         fprintf(stream, "%*s%.16llx\n", indent + 2, "",
0110             sregs->interrupt_bitmap[i]);
0111     }
0112 }
0113 
0114 void virt_arch_pgd_alloc(struct kvm_vm *vm)
0115 {
0116     TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
0117         "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
0118 
0119     /* If needed, create page map l4 table. */
0120     if (!vm->pgd_created) {
0121         vm->pgd = vm_alloc_page_table(vm);
0122         vm->pgd_created = true;
0123     }
0124 }
0125 
0126 static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
0127               int level)
0128 {
0129     uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
0130     int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
0131 
0132     return &page_table[index];
0133 }
0134 
0135 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
0136                        uint64_t pt_pfn,
0137                        uint64_t vaddr,
0138                        uint64_t paddr,
0139                        int current_level,
0140                        int target_level)
0141 {
0142     uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
0143 
0144     if (!(*pte & PTE_PRESENT_MASK)) {
0145         *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
0146         if (current_level == target_level)
0147             *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
0148         else
0149             *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
0150     } else {
0151         /*
0152          * Entry already present.  Assert that the caller doesn't want
0153          * a hugepage at this level, and that there isn't a hugepage at
0154          * this level.
0155          */
0156         TEST_ASSERT(current_level != target_level,
0157                 "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
0158                 current_level, vaddr);
0159         TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
0160                 "Cannot create page table at level: %u, vaddr: 0x%lx\n",
0161                 current_level, vaddr);
0162     }
0163     return pte;
0164 }
0165 
0166 void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
0167 {
0168     const uint64_t pg_size = PG_LEVEL_SIZE(level);
0169     uint64_t *pml4e, *pdpe, *pde;
0170     uint64_t *pte;
0171 
0172     TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
0173             "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
0174 
0175     TEST_ASSERT((vaddr % pg_size) == 0,
0176             "Virtual address not aligned,\n"
0177             "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
0178     TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
0179             "Invalid virtual address, vaddr: 0x%lx", vaddr);
0180     TEST_ASSERT((paddr % pg_size) == 0,
0181             "Physical address not aligned,\n"
0182             "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
0183     TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
0184             "Physical address beyond maximum supported,\n"
0185             "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
0186             paddr, vm->max_gfn, vm->page_size);
0187 
0188     /*
0189      * Allocate upper level page tables, if not already present.  Return
0190      * early if a hugepage was created.
0191      */
0192     pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
0193                       vaddr, paddr, PG_LEVEL_512G, level);
0194     if (*pml4e & PTE_LARGE_MASK)
0195         return;
0196 
0197     pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
0198     if (*pdpe & PTE_LARGE_MASK)
0199         return;
0200 
0201     pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
0202     if (*pde & PTE_LARGE_MASK)
0203         return;
0204 
0205     /* Fill in page table entry. */
0206     pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
0207     TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
0208             "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
0209     *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
0210 }
0211 
0212 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
0213 {
0214     __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
0215 }
0216 
0217 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
0218                       struct kvm_vcpu *vcpu,
0219                       uint64_t vaddr)
0220 {
0221     uint16_t index[4];
0222     uint64_t *pml4e, *pdpe, *pde;
0223     uint64_t *pte;
0224     struct kvm_sregs sregs;
0225     uint64_t rsvd_mask = 0;
0226 
0227     /* Set the high bits in the reserved mask. */
0228     if (vm->pa_bits < 52)
0229         rsvd_mask = GENMASK_ULL(51, vm->pa_bits);
0230 
0231     /*
0232      * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
0233      * with 4-Level Paging and 5-Level Paging".
0234      * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
0235      * the XD flag (bit 63) is reserved.
0236      */
0237     vcpu_sregs_get(vcpu, &sregs);
0238     if ((sregs.efer & EFER_NX) == 0) {
0239         rsvd_mask |= PTE_NX_MASK;
0240     }
0241 
0242     TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
0243         "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
0244     TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
0245         (vaddr >> vm->page_shift)),
0246         "Invalid virtual address, vaddr: 0x%lx",
0247         vaddr);
0248     /*
0249      * Based on the mode check above there are 48 bits in the vaddr, so
0250      * shift 16 to sign extend the last bit (bit-47),
0251      */
0252     TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
0253         "Canonical check failed.  The virtual address is invalid.");
0254 
0255     index[0] = (vaddr >> 12) & 0x1ffu;
0256     index[1] = (vaddr >> 21) & 0x1ffu;
0257     index[2] = (vaddr >> 30) & 0x1ffu;
0258     index[3] = (vaddr >> 39) & 0x1ffu;
0259 
0260     pml4e = addr_gpa2hva(vm, vm->pgd);
0261     TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
0262         "Expected pml4e to be present for gva: 0x%08lx", vaddr);
0263     TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
0264         "Unexpected reserved bits set.");
0265 
0266     pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
0267     TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
0268         "Expected pdpe to be present for gva: 0x%08lx", vaddr);
0269     TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
0270         "Expected pdpe to map a pde not a 1-GByte page.");
0271     TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
0272         "Unexpected reserved bits set.");
0273 
0274     pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
0275     TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
0276         "Expected pde to be present for gva: 0x%08lx", vaddr);
0277     TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
0278         "Expected pde to map a pte not a 2-MByte page.");
0279     TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
0280         "Unexpected reserved bits set.");
0281 
0282     pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
0283     TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
0284         "Expected pte to be present for gva: 0x%08lx", vaddr);
0285 
0286     return &pte[index[0]];
0287 }
0288 
0289 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
0290                  uint64_t vaddr)
0291 {
0292     uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
0293 
0294     return *(uint64_t *)pte;
0295 }
0296 
0297 void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
0298                  uint64_t vaddr, uint64_t pte)
0299 {
0300     uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
0301 
0302     *(uint64_t *)new_pte = pte;
0303 }
0304 
0305 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
0306 {
0307     uint64_t *pml4e, *pml4e_start;
0308     uint64_t *pdpe, *pdpe_start;
0309     uint64_t *pde, *pde_start;
0310     uint64_t *pte, *pte_start;
0311 
0312     if (!vm->pgd_created)
0313         return;
0314 
0315     fprintf(stream, "%*s                                          "
0316         "                no\n", indent, "");
0317     fprintf(stream, "%*s      index hvaddr         gpaddr         "
0318         "addr         w exec dirty\n",
0319         indent, "");
0320     pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
0321     for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
0322         pml4e = &pml4e_start[n1];
0323         if (!(*pml4e & PTE_PRESENT_MASK))
0324             continue;
0325         fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
0326             " %u\n",
0327             indent, "",
0328             pml4e - pml4e_start, pml4e,
0329             addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
0330             !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
0331 
0332         pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
0333         for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
0334             pdpe = &pdpe_start[n2];
0335             if (!(*pdpe & PTE_PRESENT_MASK))
0336                 continue;
0337             fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
0338                 "%u  %u\n",
0339                 indent, "",
0340                 pdpe - pdpe_start, pdpe,
0341                 addr_hva2gpa(vm, pdpe),
0342                 PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
0343                 !!(*pdpe & PTE_NX_MASK));
0344 
0345             pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
0346             for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
0347                 pde = &pde_start[n3];
0348                 if (!(*pde & PTE_PRESENT_MASK))
0349                     continue;
0350                 fprintf(stream, "%*spde   0x%-3zx %p "
0351                     "0x%-12lx 0x%-10llx %u  %u\n",
0352                     indent, "", pde - pde_start, pde,
0353                     addr_hva2gpa(vm, pde),
0354                     PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
0355                     !!(*pde & PTE_NX_MASK));
0356 
0357                 pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
0358                 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
0359                     pte = &pte_start[n4];
0360                     if (!(*pte & PTE_PRESENT_MASK))
0361                         continue;
0362                     fprintf(stream, "%*spte   0x%-3zx %p "
0363                         "0x%-12lx 0x%-10llx %u  %u "
0364                         "    %u    0x%-10lx\n",
0365                         indent, "",
0366                         pte - pte_start, pte,
0367                         addr_hva2gpa(vm, pte),
0368                         PTE_GET_PFN(*pte),
0369                         !!(*pte & PTE_WRITABLE_MASK),
0370                         !!(*pte & PTE_NX_MASK),
0371                         !!(*pte & PTE_DIRTY_MASK),
0372                         ((uint64_t) n1 << 27)
0373                             | ((uint64_t) n2 << 18)
0374                             | ((uint64_t) n3 << 9)
0375                             | ((uint64_t) n4));
0376                 }
0377             }
0378         }
0379     }
0380 }
0381 
0382 /*
0383  * Set Unusable Segment
0384  *
0385  * Input Args: None
0386  *
0387  * Output Args:
0388  *   segp - Pointer to segment register
0389  *
0390  * Return: None
0391  *
0392  * Sets the segment register pointed to by @segp to an unusable state.
0393  */
0394 static void kvm_seg_set_unusable(struct kvm_segment *segp)
0395 {
0396     memset(segp, 0, sizeof(*segp));
0397     segp->unusable = true;
0398 }
0399 
0400 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
0401 {
0402     void *gdt = addr_gva2hva(vm, vm->gdt);
0403     struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
0404 
0405     desc->limit0 = segp->limit & 0xFFFF;
0406     desc->base0 = segp->base & 0xFFFF;
0407     desc->base1 = segp->base >> 16;
0408     desc->type = segp->type;
0409     desc->s = segp->s;
0410     desc->dpl = segp->dpl;
0411     desc->p = segp->present;
0412     desc->limit1 = segp->limit >> 16;
0413     desc->avl = segp->avl;
0414     desc->l = segp->l;
0415     desc->db = segp->db;
0416     desc->g = segp->g;
0417     desc->base2 = segp->base >> 24;
0418     if (!segp->s)
0419         desc->base3 = segp->base >> 32;
0420 }
0421 
0422 
0423 /*
0424  * Set Long Mode Flat Kernel Code Segment
0425  *
0426  * Input Args:
0427  *   vm - VM whose GDT is being filled, or NULL to only write segp
0428  *   selector - selector value
0429  *
0430  * Output Args:
0431  *   segp - Pointer to KVM segment
0432  *
0433  * Return: None
0434  *
0435  * Sets up the KVM segment pointed to by @segp, to be a code segment
0436  * with the selector value given by @selector.
0437  */
0438 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
0439     struct kvm_segment *segp)
0440 {
0441     memset(segp, 0, sizeof(*segp));
0442     segp->selector = selector;
0443     segp->limit = 0xFFFFFFFFu;
0444     segp->s = 0x1; /* kTypeCodeData */
0445     segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
0446                       * | kFlagCodeReadable
0447                       */
0448     segp->g = true;
0449     segp->l = true;
0450     segp->present = 1;
0451     if (vm)
0452         kvm_seg_fill_gdt_64bit(vm, segp);
0453 }
0454 
0455 /*
0456  * Set Long Mode Flat Kernel Data Segment
0457  *
0458  * Input Args:
0459  *   vm - VM whose GDT is being filled, or NULL to only write segp
0460  *   selector - selector value
0461  *
0462  * Output Args:
0463  *   segp - Pointer to KVM segment
0464  *
0465  * Return: None
0466  *
0467  * Sets up the KVM segment pointed to by @segp, to be a data segment
0468  * with the selector value given by @selector.
0469  */
0470 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
0471     struct kvm_segment *segp)
0472 {
0473     memset(segp, 0, sizeof(*segp));
0474     segp->selector = selector;
0475     segp->limit = 0xFFFFFFFFu;
0476     segp->s = 0x1; /* kTypeCodeData */
0477     segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
0478                       * | kFlagDataWritable
0479                       */
0480     segp->g = true;
0481     segp->present = true;
0482     if (vm)
0483         kvm_seg_fill_gdt_64bit(vm, segp);
0484 }
0485 
0486 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
0487 {
0488     uint16_t index[4];
0489     uint64_t *pml4e, *pdpe, *pde;
0490     uint64_t *pte;
0491 
0492     TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
0493         "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
0494 
0495     index[0] = (gva >> 12) & 0x1ffu;
0496     index[1] = (gva >> 21) & 0x1ffu;
0497     index[2] = (gva >> 30) & 0x1ffu;
0498     index[3] = (gva >> 39) & 0x1ffu;
0499 
0500     if (!vm->pgd_created)
0501         goto unmapped_gva;
0502     pml4e = addr_gpa2hva(vm, vm->pgd);
0503     if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
0504         goto unmapped_gva;
0505 
0506     pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
0507     if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
0508         goto unmapped_gva;
0509 
0510     pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
0511     if (!(pde[index[1]] & PTE_PRESENT_MASK))
0512         goto unmapped_gva;
0513 
0514     pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
0515     if (!(pte[index[0]] & PTE_PRESENT_MASK))
0516         goto unmapped_gva;
0517 
0518     return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
0519 
0520 unmapped_gva:
0521     TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
0522     exit(EXIT_FAILURE);
0523 }
0524 
0525 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
0526 {
0527     if (!vm->gdt)
0528         vm->gdt = vm_vaddr_alloc_page(vm);
0529 
0530     dt->base = vm->gdt;
0531     dt->limit = getpagesize();
0532 }
0533 
0534 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
0535                 int selector)
0536 {
0537     if (!vm->tss)
0538         vm->tss = vm_vaddr_alloc_page(vm);
0539 
0540     memset(segp, 0, sizeof(*segp));
0541     segp->base = vm->tss;
0542     segp->limit = 0x67;
0543     segp->selector = selector;
0544     segp->type = 0xb;
0545     segp->present = 1;
0546     kvm_seg_fill_gdt_64bit(vm, segp);
0547 }
0548 
0549 static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
0550 {
0551     struct kvm_sregs sregs;
0552 
0553     /* Set mode specific system register values. */
0554     vcpu_sregs_get(vcpu, &sregs);
0555 
0556     sregs.idt.limit = 0;
0557 
0558     kvm_setup_gdt(vm, &sregs.gdt);
0559 
0560     switch (vm->mode) {
0561     case VM_MODE_PXXV48_4K:
0562         sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
0563         sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
0564         sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
0565 
0566         kvm_seg_set_unusable(&sregs.ldt);
0567         kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
0568         kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
0569         kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
0570         kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
0571         break;
0572 
0573     default:
0574         TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
0575     }
0576 
0577     sregs.cr3 = vm->pgd;
0578     vcpu_sregs_set(vcpu, &sregs);
0579 }
0580 
0581 void __vm_xsave_require_permission(int bit, const char *name)
0582 {
0583     int kvm_fd;
0584     u64 bitmask;
0585     long rc;
0586     struct kvm_device_attr attr = {
0587         .group = 0,
0588         .attr = KVM_X86_XCOMP_GUEST_SUPP,
0589         .addr = (unsigned long) &bitmask
0590     };
0591 
0592     TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
0593 
0594     kvm_fd = open_kvm_dev_path_or_exit();
0595     rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
0596     close(kvm_fd);
0597 
0598     if (rc == -1 && (errno == ENXIO || errno == EINVAL))
0599         __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
0600 
0601     TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
0602 
0603     __TEST_REQUIRE(bitmask & (1ULL << bit),
0604                "Required XSAVE feature '%s' not supported", name);
0605 
0606     TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
0607 
0608     rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
0609     TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
0610     TEST_ASSERT(bitmask & (1ULL << bit),
0611             "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
0612             bitmask);
0613 }
0614 
0615 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
0616                   void *guest_code)
0617 {
0618     struct kvm_mp_state mp_state;
0619     struct kvm_regs regs;
0620     vm_vaddr_t stack_vaddr;
0621     struct kvm_vcpu *vcpu;
0622 
0623     stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
0624                      DEFAULT_GUEST_STACK_VADDR_MIN);
0625 
0626     vcpu = __vm_vcpu_add(vm, vcpu_id);
0627     vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
0628     vcpu_setup(vm, vcpu);
0629 
0630     /* Setup guest general purpose registers */
0631     vcpu_regs_get(vcpu, &regs);
0632     regs.rflags = regs.rflags | 0x2;
0633     regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
0634     regs.rip = (unsigned long) guest_code;
0635     vcpu_regs_set(vcpu, &regs);
0636 
0637     /* Setup the MP state */
0638     mp_state.mp_state = 0;
0639     vcpu_mp_state_set(vcpu, &mp_state);
0640 
0641     return vcpu;
0642 }
0643 
0644 struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
0645 {
0646     struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
0647 
0648     vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
0649 
0650     return vcpu;
0651 }
0652 
0653 void vcpu_arch_free(struct kvm_vcpu *vcpu)
0654 {
0655     if (vcpu->cpuid)
0656         free(vcpu->cpuid);
0657 }
0658 
0659 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
0660 {
0661     static struct kvm_cpuid2 *cpuid;
0662     int kvm_fd;
0663 
0664     if (cpuid)
0665         return cpuid;
0666 
0667     cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
0668     kvm_fd = open_kvm_dev_path_or_exit();
0669 
0670     kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
0671 
0672     close(kvm_fd);
0673     return cpuid;
0674 }
0675 
0676 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
0677            struct kvm_x86_cpu_feature feature)
0678 {
0679     const struct kvm_cpuid_entry2 *entry;
0680     int i;
0681 
0682     for (i = 0; i < cpuid->nent; i++) {
0683         entry = &cpuid->entries[i];
0684 
0685         /*
0686          * The output registers in kvm_cpuid_entry2 are in alphabetical
0687          * order, but kvm_x86_cpu_feature matches that mess, so yay
0688          * pointer shenanigans!
0689          */
0690         if (entry->function == feature.function &&
0691             entry->index == feature.index)
0692             return (&entry->eax)[feature.reg] & BIT(feature.bit);
0693     }
0694 
0695     return false;
0696 }
0697 
0698 uint64_t kvm_get_feature_msr(uint64_t msr_index)
0699 {
0700     struct {
0701         struct kvm_msrs header;
0702         struct kvm_msr_entry entry;
0703     } buffer = {};
0704     int r, kvm_fd;
0705 
0706     buffer.header.nmsrs = 1;
0707     buffer.entry.index = msr_index;
0708     kvm_fd = open_kvm_dev_path_or_exit();
0709 
0710     r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
0711     TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
0712 
0713     close(kvm_fd);
0714     return buffer.entry.data;
0715 }
0716 
0717 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
0718 {
0719     TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
0720 
0721     /* Allow overriding the default CPUID. */
0722     if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
0723         free(vcpu->cpuid);
0724         vcpu->cpuid = NULL;
0725     }
0726 
0727     if (!vcpu->cpuid)
0728         vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
0729 
0730     memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
0731     vcpu_set_cpuid(vcpu);
0732 }
0733 
0734 void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
0735 {
0736     struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
0737 
0738     entry->eax = (entry->eax & ~0xff) | maxphyaddr;
0739     vcpu_set_cpuid(vcpu);
0740 }
0741 
0742 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
0743 {
0744     struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
0745 
0746     entry->eax = 0;
0747     entry->ebx = 0;
0748     entry->ecx = 0;
0749     entry->edx = 0;
0750     vcpu_set_cpuid(vcpu);
0751 }
0752 
0753 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
0754                      struct kvm_x86_cpu_feature feature,
0755                      bool set)
0756 {
0757     struct kvm_cpuid_entry2 *entry;
0758     u32 *reg;
0759 
0760     entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
0761     reg = (&entry->eax) + feature.reg;
0762 
0763     if (set)
0764         *reg |= BIT(feature.bit);
0765     else
0766         *reg &= ~BIT(feature.bit);
0767 
0768     vcpu_set_cpuid(vcpu);
0769 }
0770 
0771 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
0772 {
0773     struct {
0774         struct kvm_msrs header;
0775         struct kvm_msr_entry entry;
0776     } buffer = {};
0777 
0778     buffer.header.nmsrs = 1;
0779     buffer.entry.index = msr_index;
0780 
0781     vcpu_msrs_get(vcpu, &buffer.header);
0782 
0783     return buffer.entry.data;
0784 }
0785 
0786 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
0787 {
0788     struct {
0789         struct kvm_msrs header;
0790         struct kvm_msr_entry entry;
0791     } buffer = {};
0792 
0793     memset(&buffer, 0, sizeof(buffer));
0794     buffer.header.nmsrs = 1;
0795     buffer.entry.index = msr_index;
0796     buffer.entry.data = msr_value;
0797 
0798     return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
0799 }
0800 
0801 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
0802 {
0803     va_list ap;
0804     struct kvm_regs regs;
0805 
0806     TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
0807             "  num: %u\n",
0808             num);
0809 
0810     va_start(ap, num);
0811     vcpu_regs_get(vcpu, &regs);
0812 
0813     if (num >= 1)
0814         regs.rdi = va_arg(ap, uint64_t);
0815 
0816     if (num >= 2)
0817         regs.rsi = va_arg(ap, uint64_t);
0818 
0819     if (num >= 3)
0820         regs.rdx = va_arg(ap, uint64_t);
0821 
0822     if (num >= 4)
0823         regs.rcx = va_arg(ap, uint64_t);
0824 
0825     if (num >= 5)
0826         regs.r8 = va_arg(ap, uint64_t);
0827 
0828     if (num >= 6)
0829         regs.r9 = va_arg(ap, uint64_t);
0830 
0831     vcpu_regs_set(vcpu, &regs);
0832     va_end(ap);
0833 }
0834 
0835 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
0836 {
0837     struct kvm_regs regs;
0838     struct kvm_sregs sregs;
0839 
0840     fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
0841 
0842     fprintf(stream, "%*sregs:\n", indent + 2, "");
0843     vcpu_regs_get(vcpu, &regs);
0844     regs_dump(stream, &regs, indent + 4);
0845 
0846     fprintf(stream, "%*ssregs:\n", indent + 2, "");
0847     vcpu_sregs_get(vcpu, &sregs);
0848     sregs_dump(stream, &sregs, indent + 4);
0849 }
0850 
0851 static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
0852 {
0853     struct kvm_msr_list *list;
0854     struct kvm_msr_list nmsrs;
0855     int kvm_fd, r;
0856 
0857     kvm_fd = open_kvm_dev_path_or_exit();
0858 
0859     nmsrs.nmsrs = 0;
0860     if (!feature_msrs)
0861         r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
0862     else
0863         r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
0864 
0865     TEST_ASSERT(r == -1 && errno == E2BIG,
0866             "Expected -E2BIG, got rc: %i errno: %i (%s)",
0867             r, errno, strerror(errno));
0868 
0869     list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
0870     TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
0871     list->nmsrs = nmsrs.nmsrs;
0872 
0873     if (!feature_msrs)
0874         kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
0875     else
0876         kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
0877     close(kvm_fd);
0878 
0879     TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
0880             "Number of MSRs in list changed, was %d, now %d",
0881             nmsrs.nmsrs, list->nmsrs);
0882     return list;
0883 }
0884 
0885 const struct kvm_msr_list *kvm_get_msr_index_list(void)
0886 {
0887     static const struct kvm_msr_list *list;
0888 
0889     if (!list)
0890         list = __kvm_get_msr_index_list(false);
0891     return list;
0892 }
0893 
0894 
0895 const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
0896 {
0897     static const struct kvm_msr_list *list;
0898 
0899     if (!list)
0900         list = __kvm_get_msr_index_list(true);
0901     return list;
0902 }
0903 
0904 bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
0905 {
0906     const struct kvm_msr_list *list = kvm_get_msr_index_list();
0907     int i;
0908 
0909     for (i = 0; i < list->nmsrs; ++i) {
0910         if (list->indices[i] == msr_index)
0911             return true;
0912     }
0913 
0914     return false;
0915 }
0916 
0917 static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
0918                   struct kvm_x86_state *state)
0919 {
0920     int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
0921 
0922     if (size) {
0923         state->xsave = malloc(size);
0924         vcpu_xsave2_get(vcpu, state->xsave);
0925     } else {
0926         state->xsave = malloc(sizeof(struct kvm_xsave));
0927         vcpu_xsave_get(vcpu, state->xsave);
0928     }
0929 }
0930 
0931 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
0932 {
0933     const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
0934     struct kvm_x86_state *state;
0935     int i;
0936 
0937     static int nested_size = -1;
0938 
0939     if (nested_size == -1) {
0940         nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
0941         TEST_ASSERT(nested_size <= sizeof(state->nested_),
0942                 "Nested state size too big, %i > %zi",
0943                 nested_size, sizeof(state->nested_));
0944     }
0945 
0946     /*
0947      * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
0948      * guest state is consistent only after userspace re-enters the
0949      * kernel with KVM_RUN.  Complete IO prior to migrating state
0950      * to a new VM.
0951      */
0952     vcpu_run_complete_io(vcpu);
0953 
0954     state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
0955 
0956     vcpu_events_get(vcpu, &state->events);
0957     vcpu_mp_state_get(vcpu, &state->mp_state);
0958     vcpu_regs_get(vcpu, &state->regs);
0959     vcpu_save_xsave_state(vcpu, state);
0960 
0961     if (kvm_has_cap(KVM_CAP_XCRS))
0962         vcpu_xcrs_get(vcpu, &state->xcrs);
0963 
0964     vcpu_sregs_get(vcpu, &state->sregs);
0965 
0966     if (nested_size) {
0967         state->nested.size = sizeof(state->nested_);
0968 
0969         vcpu_nested_state_get(vcpu, &state->nested);
0970         TEST_ASSERT(state->nested.size <= nested_size,
0971                 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
0972                 state->nested.size, nested_size);
0973     } else {
0974         state->nested.size = 0;
0975     }
0976 
0977     state->msrs.nmsrs = msr_list->nmsrs;
0978     for (i = 0; i < msr_list->nmsrs; i++)
0979         state->msrs.entries[i].index = msr_list->indices[i];
0980     vcpu_msrs_get(vcpu, &state->msrs);
0981 
0982     vcpu_debugregs_get(vcpu, &state->debugregs);
0983 
0984     return state;
0985 }
0986 
0987 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
0988 {
0989     vcpu_sregs_set(vcpu, &state->sregs);
0990     vcpu_msrs_set(vcpu, &state->msrs);
0991 
0992     if (kvm_has_cap(KVM_CAP_XCRS))
0993         vcpu_xcrs_set(vcpu, &state->xcrs);
0994 
0995     vcpu_xsave_set(vcpu,  state->xsave);
0996     vcpu_events_set(vcpu, &state->events);
0997     vcpu_mp_state_set(vcpu, &state->mp_state);
0998     vcpu_debugregs_set(vcpu, &state->debugregs);
0999     vcpu_regs_set(vcpu, &state->regs);
1000 
1001     if (state->nested.size)
1002         vcpu_nested_state_set(vcpu, &state->nested);
1003 }
1004 
1005 void kvm_x86_state_cleanup(struct kvm_x86_state *state)
1006 {
1007     free(state->xsave);
1008     free(state);
1009 }
1010 
1011 static bool cpu_vendor_string_is(const char *vendor)
1012 {
1013     const uint32_t *chunk = (const uint32_t *)vendor;
1014     uint32_t eax, ebx, ecx, edx;
1015 
1016     cpuid(0, &eax, &ebx, &ecx, &edx);
1017     return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
1018 }
1019 
1020 bool is_intel_cpu(void)
1021 {
1022     return cpu_vendor_string_is("GenuineIntel");
1023 }
1024 
1025 /*
1026  * Exclude early K5 samples with a vendor string of "AMDisbetter!"
1027  */
1028 bool is_amd_cpu(void)
1029 {
1030     return cpu_vendor_string_is("AuthenticAMD");
1031 }
1032 
1033 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
1034 {
1035     const struct kvm_cpuid_entry2 *entry;
1036     bool pae;
1037 
1038     /* SDM 4.1.4 */
1039     if (kvm_get_cpuid_max_extended() < 0x80000008) {
1040         pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
1041         *pa_bits = pae ? 36 : 32;
1042         *va_bits = 32;
1043     } else {
1044         entry = kvm_get_supported_cpuid_entry(0x80000008);
1045         *pa_bits = entry->eax & 0xff;
1046         *va_bits = (entry->eax >> 8) & 0xff;
1047     }
1048 }
1049 
1050 struct idt_entry {
1051     uint16_t offset0;
1052     uint16_t selector;
1053     uint16_t ist : 3;
1054     uint16_t : 5;
1055     uint16_t type : 4;
1056     uint16_t : 1;
1057     uint16_t dpl : 2;
1058     uint16_t p : 1;
1059     uint16_t offset1;
1060     uint32_t offset2; uint32_t reserved;
1061 };
1062 
1063 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
1064               int dpl, unsigned short selector)
1065 {
1066     struct idt_entry *base =
1067         (struct idt_entry *)addr_gva2hva(vm, vm->idt);
1068     struct idt_entry *e = &base[vector];
1069 
1070     memset(e, 0, sizeof(*e));
1071     e->offset0 = addr;
1072     e->selector = selector;
1073     e->ist = 0;
1074     e->type = 14;
1075     e->dpl = dpl;
1076     e->p = 1;
1077     e->offset1 = addr >> 16;
1078     e->offset2 = addr >> 32;
1079 }
1080 
1081 
1082 static bool kvm_fixup_exception(struct ex_regs *regs)
1083 {
1084     if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
1085         return false;
1086 
1087     if (regs->vector == DE_VECTOR)
1088         return false;
1089 
1090     regs->rip = regs->r11;
1091     regs->r9 = regs->vector;
1092     return true;
1093 }
1094 
1095 void kvm_exit_unexpected_vector(uint32_t value)
1096 {
1097     ucall(UCALL_UNHANDLED, 1, value);
1098 }
1099 
1100 void route_exception(struct ex_regs *regs)
1101 {
1102     typedef void(*handler)(struct ex_regs *);
1103     handler *handlers = (handler *)exception_handlers;
1104 
1105     if (handlers && handlers[regs->vector]) {
1106         handlers[regs->vector](regs);
1107         return;
1108     }
1109 
1110     if (kvm_fixup_exception(regs))
1111         return;
1112 
1113     kvm_exit_unexpected_vector(regs->vector);
1114 }
1115 
1116 void vm_init_descriptor_tables(struct kvm_vm *vm)
1117 {
1118     extern void *idt_handlers;
1119     int i;
1120 
1121     vm->idt = vm_vaddr_alloc_page(vm);
1122     vm->handlers = vm_vaddr_alloc_page(vm);
1123     /* Handlers have the same address in both address spaces.*/
1124     for (i = 0; i < NUM_INTERRUPTS; i++)
1125         set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
1126             DEFAULT_CODE_SELECTOR);
1127 }
1128 
1129 void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
1130 {
1131     struct kvm_vm *vm = vcpu->vm;
1132     struct kvm_sregs sregs;
1133 
1134     vcpu_sregs_get(vcpu, &sregs);
1135     sregs.idt.base = vm->idt;
1136     sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
1137     sregs.gdt.base = vm->gdt;
1138     sregs.gdt.limit = getpagesize() - 1;
1139     kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
1140     vcpu_sregs_set(vcpu, &sregs);
1141     *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
1142 }
1143 
1144 void vm_install_exception_handler(struct kvm_vm *vm, int vector,
1145                    void (*handler)(struct ex_regs *))
1146 {
1147     vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
1148 
1149     handlers[vector] = (vm_vaddr_t)handler;
1150 }
1151 
1152 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
1153 {
1154     struct ucall uc;
1155 
1156     if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
1157         uint64_t vector = uc.args[0];
1158 
1159         TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
1160               vector);
1161     }
1162 }
1163 
1164 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
1165                            uint32_t function, uint32_t index)
1166 {
1167     int i;
1168 
1169     for (i = 0; i < cpuid->nent; i++) {
1170         if (cpuid->entries[i].function == function &&
1171             cpuid->entries[i].index == index)
1172             return &cpuid->entries[i];
1173     }
1174 
1175     TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
1176 
1177     return NULL;
1178 }
1179 
1180 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
1181                uint64_t a3)
1182 {
1183     uint64_t r;
1184 
1185     asm volatile("vmcall"
1186              : "=a"(r)
1187              : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
1188     return r;
1189 }
1190 
1191 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
1192 {
1193     static struct kvm_cpuid2 *cpuid;
1194     int kvm_fd;
1195 
1196     if (cpuid)
1197         return cpuid;
1198 
1199     cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
1200     kvm_fd = open_kvm_dev_path_or_exit();
1201 
1202     kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
1203 
1204     close(kvm_fd);
1205     return cpuid;
1206 }
1207 
1208 void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
1209 {
1210     static struct kvm_cpuid2 *cpuid_full;
1211     const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
1212     int i, nent = 0;
1213 
1214     if (!cpuid_full) {
1215         cpuid_sys = kvm_get_supported_cpuid();
1216         cpuid_hv = kvm_get_supported_hv_cpuid();
1217 
1218         cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
1219         if (!cpuid_full) {
1220             perror("malloc");
1221             abort();
1222         }
1223 
1224         /* Need to skip KVM CPUID leaves 0x400000xx */
1225         for (i = 0; i < cpuid_sys->nent; i++) {
1226             if (cpuid_sys->entries[i].function >= 0x40000000 &&
1227                 cpuid_sys->entries[i].function < 0x40000100)
1228                 continue;
1229             cpuid_full->entries[nent] = cpuid_sys->entries[i];
1230             nent++;
1231         }
1232 
1233         memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
1234                cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
1235         cpuid_full->nent = nent + cpuid_hv->nent;
1236     }
1237 
1238     vcpu_init_cpuid(vcpu, cpuid_full);
1239 }
1240 
1241 const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
1242 {
1243     struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
1244 
1245     vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
1246 
1247     return cpuid;
1248 }
1249 
1250 unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
1251 {
1252     const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
1253     unsigned long ht_gfn, max_gfn, max_pfn;
1254     uint32_t eax, ebx, ecx, edx, max_ext_leaf;
1255 
1256     max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
1257 
1258     /* Avoid reserved HyperTransport region on AMD processors.  */
1259     if (!is_amd_cpu())
1260         return max_gfn;
1261 
1262     /* On parts with <40 physical address bits, the area is fully hidden */
1263     if (vm->pa_bits < 40)
1264         return max_gfn;
1265 
1266     /* Before family 17h, the HyperTransport area is just below 1T.  */
1267     ht_gfn = (1 << 28) - num_ht_pages;
1268     cpuid(1, &eax, &ebx, &ecx, &edx);
1269     if (x86_family(eax) < 0x17)
1270         goto done;
1271 
1272     /*
1273      * Otherwise it's at the top of the physical address space, possibly
1274      * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
1275      * the old conservative value if MAXPHYADDR is not enumerated.
1276      */
1277     cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
1278     max_ext_leaf = eax;
1279     if (max_ext_leaf < 0x80000008)
1280         goto done;
1281 
1282     cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
1283     max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
1284     if (max_ext_leaf >= 0x8000001f) {
1285         cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1286         max_pfn >>= (ebx >> 6) & 0x3f;
1287     }
1288 
1289     ht_gfn = max_pfn - num_ht_pages;
1290 done:
1291     return min(max_gfn, ht_gfn - 1);
1292 }
1293 
1294 /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
1295 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
1296 {
1297     char val = 'N';
1298     size_t count;
1299     FILE *f;
1300 
1301     /* Ensure that a KVM vendor-specific module is loaded. */
1302     if (vm == NULL)
1303         close(open_kvm_dev_path_or_exit());
1304 
1305     f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
1306     if (f) {
1307         count = fread(&val, sizeof(char), 1, f);
1308         TEST_ASSERT(count == 1, "Unable to read from param file.");
1309         fclose(f);
1310     }
1311 
1312     return val == 'Y';
1313 }