0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #include <linux/highmem.h>
0017 #include <linux/hrtimer.h>
0018 #include <linux/kernel.h>
0019 #include <linux/kvm_host.h>
0020 #include <linux/module.h>
0021 #include <linux/moduleparam.h>
0022 #include <linux/mod_devicetable.h>
0023 #include <linux/mm.h>
0024 #include <linux/objtool.h>
0025 #include <linux/sched.h>
0026 #include <linux/sched/smt.h>
0027 #include <linux/slab.h>
0028 #include <linux/tboot.h>
0029 #include <linux/trace_events.h>
0030 #include <linux/entry-kvm.h>
0031
0032 #include <asm/apic.h>
0033 #include <asm/asm.h>
0034 #include <asm/cpu.h>
0035 #include <asm/cpu_device_id.h>
0036 #include <asm/debugreg.h>
0037 #include <asm/desc.h>
0038 #include <asm/fpu/api.h>
0039 #include <asm/fpu/xstate.h>
0040 #include <asm/idtentry.h>
0041 #include <asm/io.h>
0042 #include <asm/irq_remapping.h>
0043 #include <asm/kexec.h>
0044 #include <asm/perf_event.h>
0045 #include <asm/mmu_context.h>
0046 #include <asm/mshyperv.h>
0047 #include <asm/mwait.h>
0048 #include <asm/spec-ctrl.h>
0049 #include <asm/virtext.h>
0050 #include <asm/vmx.h>
0051
0052 #include "capabilities.h"
0053 #include "cpuid.h"
0054 #include "evmcs.h"
0055 #include "hyperv.h"
0056 #include "kvm_onhyperv.h"
0057 #include "irq.h"
0058 #include "kvm_cache_regs.h"
0059 #include "lapic.h"
0060 #include "mmu.h"
0061 #include "nested.h"
0062 #include "pmu.h"
0063 #include "sgx.h"
0064 #include "trace.h"
0065 #include "vmcs.h"
0066 #include "vmcs12.h"
0067 #include "vmx.h"
0068 #include "x86.h"
0069
0070 MODULE_AUTHOR("Qumranet");
0071 MODULE_LICENSE("GPL");
0072
0073 #ifdef MODULE
0074 static const struct x86_cpu_id vmx_cpu_id[] = {
0075 X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL),
0076 {}
0077 };
0078 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
0079 #endif
0080
0081 bool __read_mostly enable_vpid = 1;
0082 module_param_named(vpid, enable_vpid, bool, 0444);
0083
0084 static bool __read_mostly enable_vnmi = 1;
0085 module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
0086
0087 bool __read_mostly flexpriority_enabled = 1;
0088 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
0089
0090 bool __read_mostly enable_ept = 1;
0091 module_param_named(ept, enable_ept, bool, S_IRUGO);
0092
0093 bool __read_mostly enable_unrestricted_guest = 1;
0094 module_param_named(unrestricted_guest,
0095 enable_unrestricted_guest, bool, S_IRUGO);
0096
0097 bool __read_mostly enable_ept_ad_bits = 1;
0098 module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
0099
0100 static bool __read_mostly emulate_invalid_guest_state = true;
0101 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
0102
0103 static bool __read_mostly fasteoi = 1;
0104 module_param(fasteoi, bool, S_IRUGO);
0105
0106 module_param(enable_apicv, bool, S_IRUGO);
0107
0108 bool __read_mostly enable_ipiv = true;
0109 module_param(enable_ipiv, bool, 0444);
0110
0111
0112
0113
0114
0115
0116 static bool __read_mostly nested = 1;
0117 module_param(nested, bool, S_IRUGO);
0118
0119 bool __read_mostly enable_pml = 1;
0120 module_param_named(pml, enable_pml, bool, S_IRUGO);
0121
0122 static bool __read_mostly error_on_inconsistent_vmcs_config = true;
0123 module_param(error_on_inconsistent_vmcs_config, bool, 0444);
0124
0125 static bool __read_mostly dump_invalid_vmcs = 0;
0126 module_param(dump_invalid_vmcs, bool, 0644);
0127
0128 #define MSR_BITMAP_MODE_X2APIC 1
0129 #define MSR_BITMAP_MODE_X2APIC_APICV 2
0130
0131 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
0132
0133
0134 static int __read_mostly cpu_preemption_timer_multi;
0135 static bool __read_mostly enable_preemption_timer = 1;
0136 #ifdef CONFIG_X86_64
0137 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
0138 #endif
0139
0140 extern bool __read_mostly allow_smaller_maxphyaddr;
0141 module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
0142
0143 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
0144 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
0145 #define KVM_VM_CR0_ALWAYS_ON \
0146 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
0147
0148 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
0149 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
0150 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
0151
0152 #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
0153
0154 #define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
0155 RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
0156 RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
0157 RTIT_STATUS_BYTECNT))
0158
0159
0160
0161
0162
0163 static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
0164 MSR_IA32_SPEC_CTRL,
0165 MSR_IA32_PRED_CMD,
0166 MSR_IA32_TSC,
0167 #ifdef CONFIG_X86_64
0168 MSR_FS_BASE,
0169 MSR_GS_BASE,
0170 MSR_KERNEL_GS_BASE,
0171 MSR_IA32_XFD,
0172 MSR_IA32_XFD_ERR,
0173 #endif
0174 MSR_IA32_SYSENTER_CS,
0175 MSR_IA32_SYSENTER_ESP,
0176 MSR_IA32_SYSENTER_EIP,
0177 MSR_CORE_C1_RES,
0178 MSR_CORE_C3_RESIDENCY,
0179 MSR_CORE_C6_RESIDENCY,
0180 MSR_CORE_C7_RESIDENCY,
0181 };
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194 static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
0195 module_param(ple_gap, uint, 0444);
0196
0197 static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
0198 module_param(ple_window, uint, 0444);
0199
0200
0201 static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
0202 module_param(ple_window_grow, uint, 0444);
0203
0204
0205 static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
0206 module_param(ple_window_shrink, uint, 0444);
0207
0208
0209 static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
0210 module_param(ple_window_max, uint, 0444);
0211
0212
0213 int __read_mostly pt_mode = PT_MODE_SYSTEM;
0214 module_param(pt_mode, int, S_IRUGO);
0215
0216 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
0217 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
0218 static DEFINE_MUTEX(vmx_l1d_flush_mutex);
0219
0220
0221 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
0222
0223 static const struct {
0224 const char *option;
0225 bool for_parse;
0226 } vmentry_l1d_param[] = {
0227 [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
0228 [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
0229 [VMENTER_L1D_FLUSH_COND] = {"cond", true},
0230 [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
0231 [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
0232 [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
0233 };
0234
0235 #define L1D_CACHE_ORDER 4
0236 static void *vmx_l1d_flush_pages;
0237
0238
0239 static bool __read_mostly vmx_fb_clear_ctrl_available;
0240
0241 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
0242 {
0243 struct page *page;
0244 unsigned int i;
0245
0246 if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
0247 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
0248 return 0;
0249 }
0250
0251 if (!enable_ept) {
0252 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
0253 return 0;
0254 }
0255
0256 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
0257 u64 msr;
0258
0259 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
0260 if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
0261 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
0262 return 0;
0263 }
0264 }
0265
0266
0267 if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
0268 switch (l1tf_mitigation) {
0269 case L1TF_MITIGATION_OFF:
0270 l1tf = VMENTER_L1D_FLUSH_NEVER;
0271 break;
0272 case L1TF_MITIGATION_FLUSH_NOWARN:
0273 case L1TF_MITIGATION_FLUSH:
0274 case L1TF_MITIGATION_FLUSH_NOSMT:
0275 l1tf = VMENTER_L1D_FLUSH_COND;
0276 break;
0277 case L1TF_MITIGATION_FULL:
0278 case L1TF_MITIGATION_FULL_FORCE:
0279 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
0280 break;
0281 }
0282 } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
0283 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
0284 }
0285
0286 if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
0287 !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
0288
0289
0290
0291
0292 page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
0293 if (!page)
0294 return -ENOMEM;
0295 vmx_l1d_flush_pages = page_address(page);
0296
0297
0298
0299
0300
0301
0302 for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
0303 memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
0304 PAGE_SIZE);
0305 }
0306 }
0307
0308 l1tf_vmx_mitigation = l1tf;
0309
0310 if (l1tf != VMENTER_L1D_FLUSH_NEVER)
0311 static_branch_enable(&vmx_l1d_should_flush);
0312 else
0313 static_branch_disable(&vmx_l1d_should_flush);
0314
0315 if (l1tf == VMENTER_L1D_FLUSH_COND)
0316 static_branch_enable(&vmx_l1d_flush_cond);
0317 else
0318 static_branch_disable(&vmx_l1d_flush_cond);
0319 return 0;
0320 }
0321
0322 static int vmentry_l1d_flush_parse(const char *s)
0323 {
0324 unsigned int i;
0325
0326 if (s) {
0327 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
0328 if (vmentry_l1d_param[i].for_parse &&
0329 sysfs_streq(s, vmentry_l1d_param[i].option))
0330 return i;
0331 }
0332 }
0333 return -EINVAL;
0334 }
0335
0336 static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
0337 {
0338 int l1tf, ret;
0339
0340 l1tf = vmentry_l1d_flush_parse(s);
0341 if (l1tf < 0)
0342 return l1tf;
0343
0344 if (!boot_cpu_has(X86_BUG_L1TF))
0345 return 0;
0346
0347
0348
0349
0350
0351
0352
0353 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
0354 vmentry_l1d_flush_param = l1tf;
0355 return 0;
0356 }
0357
0358 mutex_lock(&vmx_l1d_flush_mutex);
0359 ret = vmx_setup_l1d_flush(l1tf);
0360 mutex_unlock(&vmx_l1d_flush_mutex);
0361 return ret;
0362 }
0363
0364 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
0365 {
0366 if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
0367 return sprintf(s, "???\n");
0368
0369 return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
0370 }
0371
0372 static void vmx_setup_fb_clear_ctrl(void)
0373 {
0374 u64 msr;
0375
0376 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
0377 !boot_cpu_has_bug(X86_BUG_MDS) &&
0378 !boot_cpu_has_bug(X86_BUG_TAA)) {
0379 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
0380 if (msr & ARCH_CAP_FB_CLEAR_CTRL)
0381 vmx_fb_clear_ctrl_available = true;
0382 }
0383 }
0384
0385 static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
0386 {
0387 u64 msr;
0388
0389 if (!vmx->disable_fb_clear)
0390 return;
0391
0392 msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
0393 msr |= FB_CLEAR_DIS;
0394 native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
0395
0396 vmx->msr_ia32_mcu_opt_ctrl = msr;
0397 }
0398
0399 static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
0400 {
0401 if (!vmx->disable_fb_clear)
0402 return;
0403
0404 vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
0405 native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
0406 }
0407
0408 static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
0409 {
0410 vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
0411
0412
0413
0414
0415
0416
0417 if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
0418 ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
0419 (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
0420 (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
0421 (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
0422 (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
0423 vmx->disable_fb_clear = false;
0424 }
0425
0426 static const struct kernel_param_ops vmentry_l1d_flush_ops = {
0427 .set = vmentry_l1d_flush_set,
0428 .get = vmentry_l1d_flush_get,
0429 };
0430 module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
0431
0432 static u32 vmx_segment_access_rights(struct kvm_segment *var);
0433
0434 void vmx_vmexit(void);
0435
0436 #define vmx_insn_failed(fmt...) \
0437 do { \
0438 WARN_ONCE(1, fmt); \
0439 pr_warn_ratelimited(fmt); \
0440 } while (0)
0441
0442 asmlinkage void vmread_error(unsigned long field, bool fault)
0443 {
0444 if (fault)
0445 kvm_spurious_fault();
0446 else
0447 vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
0448 }
0449
0450 noinline void vmwrite_error(unsigned long field, unsigned long value)
0451 {
0452 vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%u\n",
0453 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
0454 }
0455
0456 noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
0457 {
0458 vmx_insn_failed("kvm: vmclear failed: %p/%llx err=%u\n",
0459 vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
0460 }
0461
0462 noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
0463 {
0464 vmx_insn_failed("kvm: vmptrld failed: %p/%llx err=%u\n",
0465 vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
0466 }
0467
0468 noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
0469 {
0470 vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
0471 ext, vpid, gva);
0472 }
0473
0474 noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
0475 {
0476 vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
0477 ext, eptp, gpa);
0478 }
0479
0480 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
0481 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
0482
0483
0484
0485
0486 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
0487
0488 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
0489 static DEFINE_SPINLOCK(vmx_vpid_lock);
0490
0491 struct vmcs_config vmcs_config;
0492 struct vmx_capability vmx_capability;
0493
0494 #define VMX_SEGMENT_FIELD(seg) \
0495 [VCPU_SREG_##seg] = { \
0496 .selector = GUEST_##seg##_SELECTOR, \
0497 .base = GUEST_##seg##_BASE, \
0498 .limit = GUEST_##seg##_LIMIT, \
0499 .ar_bytes = GUEST_##seg##_AR_BYTES, \
0500 }
0501
0502 static const struct kvm_vmx_segment_field {
0503 unsigned selector;
0504 unsigned base;
0505 unsigned limit;
0506 unsigned ar_bytes;
0507 } kvm_vmx_segment_fields[] = {
0508 VMX_SEGMENT_FIELD(CS),
0509 VMX_SEGMENT_FIELD(DS),
0510 VMX_SEGMENT_FIELD(ES),
0511 VMX_SEGMENT_FIELD(FS),
0512 VMX_SEGMENT_FIELD(GS),
0513 VMX_SEGMENT_FIELD(SS),
0514 VMX_SEGMENT_FIELD(TR),
0515 VMX_SEGMENT_FIELD(LDTR),
0516 };
0517
0518 static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
0519 {
0520 vmx->segment_cache.bitmask = 0;
0521 }
0522
0523 static unsigned long host_idt_base;
0524
0525 #if IS_ENABLED(CONFIG_HYPERV)
0526 static bool __read_mostly enlightened_vmcs = true;
0527 module_param(enlightened_vmcs, bool, 0444);
0528
0529 static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
0530 {
0531 struct hv_enlightened_vmcs *evmcs;
0532 struct hv_partition_assist_pg **p_hv_pa_pg =
0533 &to_kvm_hv(vcpu->kvm)->hv_pa_pg;
0534
0535
0536
0537
0538 if (!*p_hv_pa_pg)
0539 *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
0540
0541 if (!*p_hv_pa_pg)
0542 return -ENOMEM;
0543
0544 evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
0545
0546 evmcs->partition_assist_page =
0547 __pa(*p_hv_pa_pg);
0548 evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
0549 evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
0550
0551 return 0;
0552 }
0553
0554 #endif
0555
0556
0557
0558
0559
0560
0561 static u32 vmx_preemption_cpu_tfms[] = {
0562
0563 0x000206E6,
0564
0565
0566
0567 0x00020652,
0568
0569 0x00020655,
0570
0571
0572
0573
0574
0575
0576 0x000106E5,
0577
0578 0x000106A0,
0579
0580 0x000106A1,
0581
0582 0x000106A4,
0583
0584
0585
0586 0x000106A5,
0587
0588 0x000306A8,
0589 };
0590
0591 static inline bool cpu_has_broken_vmx_preemption_timer(void)
0592 {
0593 u32 eax = cpuid_eax(0x00000001), i;
0594
0595
0596 eax &= ~(0x3U << 14 | 0xfU << 28);
0597 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
0598 if (eax == vmx_preemption_cpu_tfms[i])
0599 return true;
0600
0601 return false;
0602 }
0603
0604 static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
0605 {
0606 return flexpriority_enabled && lapic_in_kernel(vcpu);
0607 }
0608
0609 static int possible_passthrough_msr_slot(u32 msr)
0610 {
0611 u32 i;
0612
0613 for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
0614 if (vmx_possible_passthrough_msrs[i] == msr)
0615 return i;
0616
0617 return -ENOENT;
0618 }
0619
0620 static bool is_valid_passthrough_msr(u32 msr)
0621 {
0622 bool r;
0623
0624 switch (msr) {
0625 case 0x800 ... 0x8ff:
0626
0627 return true;
0628 case MSR_IA32_RTIT_STATUS:
0629 case MSR_IA32_RTIT_OUTPUT_BASE:
0630 case MSR_IA32_RTIT_OUTPUT_MASK:
0631 case MSR_IA32_RTIT_CR3_MATCH:
0632 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
0633
0634 case MSR_LBR_SELECT:
0635 case MSR_LBR_TOS:
0636 case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
0637 case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
0638 case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
0639 case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
0640 case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
0641
0642 return true;
0643 }
0644
0645 r = possible_passthrough_msr_slot(msr) != -ENOENT;
0646
0647 WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
0648
0649 return r;
0650 }
0651
0652 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
0653 {
0654 int i;
0655
0656 i = kvm_find_user_return_msr(msr);
0657 if (i >= 0)
0658 return &vmx->guest_uret_msrs[i];
0659 return NULL;
0660 }
0661
0662 static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
0663 struct vmx_uret_msr *msr, u64 data)
0664 {
0665 unsigned int slot = msr - vmx->guest_uret_msrs;
0666 int ret = 0;
0667
0668 if (msr->load_into_hardware) {
0669 preempt_disable();
0670 ret = kvm_set_user_return_msr(slot, data, msr->mask);
0671 preempt_enable();
0672 }
0673 if (!ret)
0674 msr->data = data;
0675 return ret;
0676 }
0677
0678 #ifdef CONFIG_KEXEC_CORE
0679 static void crash_vmclear_local_loaded_vmcss(void)
0680 {
0681 int cpu = raw_smp_processor_id();
0682 struct loaded_vmcs *v;
0683
0684 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
0685 loaded_vmcss_on_cpu_link)
0686 vmcs_clear(v->vmcs);
0687 }
0688 #endif
0689
0690 static void __loaded_vmcs_clear(void *arg)
0691 {
0692 struct loaded_vmcs *loaded_vmcs = arg;
0693 int cpu = raw_smp_processor_id();
0694
0695 if (loaded_vmcs->cpu != cpu)
0696 return;
0697 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
0698 per_cpu(current_vmcs, cpu) = NULL;
0699
0700 vmcs_clear(loaded_vmcs->vmcs);
0701 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
0702 vmcs_clear(loaded_vmcs->shadow_vmcs);
0703
0704 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
0705
0706
0707
0708
0709
0710
0711
0712
0713 smp_wmb();
0714
0715 loaded_vmcs->cpu = -1;
0716 loaded_vmcs->launched = 0;
0717 }
0718
0719 void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
0720 {
0721 int cpu = loaded_vmcs->cpu;
0722
0723 if (cpu != -1)
0724 smp_call_function_single(cpu,
0725 __loaded_vmcs_clear, loaded_vmcs, 1);
0726 }
0727
0728 static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
0729 unsigned field)
0730 {
0731 bool ret;
0732 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
0733
0734 if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
0735 kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
0736 vmx->segment_cache.bitmask = 0;
0737 }
0738 ret = vmx->segment_cache.bitmask & mask;
0739 vmx->segment_cache.bitmask |= mask;
0740 return ret;
0741 }
0742
0743 static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
0744 {
0745 u16 *p = &vmx->segment_cache.seg[seg].selector;
0746
0747 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
0748 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
0749 return *p;
0750 }
0751
0752 static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
0753 {
0754 ulong *p = &vmx->segment_cache.seg[seg].base;
0755
0756 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
0757 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
0758 return *p;
0759 }
0760
0761 static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
0762 {
0763 u32 *p = &vmx->segment_cache.seg[seg].limit;
0764
0765 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
0766 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
0767 return *p;
0768 }
0769
0770 static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
0771 {
0772 u32 *p = &vmx->segment_cache.seg[seg].ar;
0773
0774 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
0775 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
0776 return *p;
0777 }
0778
0779 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
0780 {
0781 u32 eb;
0782
0783 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
0784 (1u << DB_VECTOR) | (1u << AC_VECTOR);
0785
0786
0787
0788
0789
0790
0791 if (enable_vmware_backdoor)
0792 eb |= (1u << GP_VECTOR);
0793 if ((vcpu->guest_debug &
0794 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
0795 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
0796 eb |= 1u << BP_VECTOR;
0797 if (to_vmx(vcpu)->rmode.vm86_active)
0798 eb = ~0;
0799 if (!vmx_need_pf_intercept(vcpu))
0800 eb &= ~(1u << PF_VECTOR);
0801
0802
0803
0804
0805
0806
0807 if (is_guest_mode(vcpu))
0808 eb |= get_vmcs12(vcpu)->exception_bitmap;
0809 else {
0810 int mask = 0, match = 0;
0811
0812 if (enable_ept && (eb & (1u << PF_VECTOR))) {
0813
0814
0815
0816
0817
0818
0819
0820 mask = PFERR_PRESENT_MASK | PFERR_RSVD_MASK;
0821 match = PFERR_PRESENT_MASK;
0822 }
0823 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
0824 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
0825 }
0826
0827
0828
0829
0830
0831
0832 if (vcpu->arch.xfd_no_write_intercept)
0833 eb |= (1u << NM_VECTOR);
0834
0835 vmcs_write32(EXCEPTION_BITMAP, eb);
0836 }
0837
0838
0839
0840
0841 static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
0842 {
0843 if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
0844 return true;
0845
0846 return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
0847 }
0848
0849 unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
0850 {
0851 unsigned int flags = 0;
0852
0853 if (vmx->loaded_vmcs->launched)
0854 flags |= VMX_RUN_VMRESUME;
0855
0856
0857
0858
0859
0860
0861 if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
0862 flags |= VMX_RUN_SAVE_SPEC_CTRL;
0863
0864 return flags;
0865 }
0866
0867 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
0868 unsigned long entry, unsigned long exit)
0869 {
0870 vm_entry_controls_clearbit(vmx, entry);
0871 vm_exit_controls_clearbit(vmx, exit);
0872 }
0873
0874 int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
0875 {
0876 unsigned int i;
0877
0878 for (i = 0; i < m->nr; ++i) {
0879 if (m->val[i].index == msr)
0880 return i;
0881 }
0882 return -ENOENT;
0883 }
0884
0885 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
0886 {
0887 int i;
0888 struct msr_autoload *m = &vmx->msr_autoload;
0889
0890 switch (msr) {
0891 case MSR_EFER:
0892 if (cpu_has_load_ia32_efer()) {
0893 clear_atomic_switch_msr_special(vmx,
0894 VM_ENTRY_LOAD_IA32_EFER,
0895 VM_EXIT_LOAD_IA32_EFER);
0896 return;
0897 }
0898 break;
0899 case MSR_CORE_PERF_GLOBAL_CTRL:
0900 if (cpu_has_load_perf_global_ctrl()) {
0901 clear_atomic_switch_msr_special(vmx,
0902 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
0903 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
0904 return;
0905 }
0906 break;
0907 }
0908 i = vmx_find_loadstore_msr_slot(&m->guest, msr);
0909 if (i < 0)
0910 goto skip_guest;
0911 --m->guest.nr;
0912 m->guest.val[i] = m->guest.val[m->guest.nr];
0913 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
0914
0915 skip_guest:
0916 i = vmx_find_loadstore_msr_slot(&m->host, msr);
0917 if (i < 0)
0918 return;
0919
0920 --m->host.nr;
0921 m->host.val[i] = m->host.val[m->host.nr];
0922 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
0923 }
0924
0925 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
0926 unsigned long entry, unsigned long exit,
0927 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
0928 u64 guest_val, u64 host_val)
0929 {
0930 vmcs_write64(guest_val_vmcs, guest_val);
0931 if (host_val_vmcs != HOST_IA32_EFER)
0932 vmcs_write64(host_val_vmcs, host_val);
0933 vm_entry_controls_setbit(vmx, entry);
0934 vm_exit_controls_setbit(vmx, exit);
0935 }
0936
0937 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
0938 u64 guest_val, u64 host_val, bool entry_only)
0939 {
0940 int i, j = 0;
0941 struct msr_autoload *m = &vmx->msr_autoload;
0942
0943 switch (msr) {
0944 case MSR_EFER:
0945 if (cpu_has_load_ia32_efer()) {
0946 add_atomic_switch_msr_special(vmx,
0947 VM_ENTRY_LOAD_IA32_EFER,
0948 VM_EXIT_LOAD_IA32_EFER,
0949 GUEST_IA32_EFER,
0950 HOST_IA32_EFER,
0951 guest_val, host_val);
0952 return;
0953 }
0954 break;
0955 case MSR_CORE_PERF_GLOBAL_CTRL:
0956 if (cpu_has_load_perf_global_ctrl()) {
0957 add_atomic_switch_msr_special(vmx,
0958 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
0959 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
0960 GUEST_IA32_PERF_GLOBAL_CTRL,
0961 HOST_IA32_PERF_GLOBAL_CTRL,
0962 guest_val, host_val);
0963 return;
0964 }
0965 break;
0966 case MSR_IA32_PEBS_ENABLE:
0967
0968
0969
0970
0971
0972 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
0973 }
0974
0975 i = vmx_find_loadstore_msr_slot(&m->guest, msr);
0976 if (!entry_only)
0977 j = vmx_find_loadstore_msr_slot(&m->host, msr);
0978
0979 if ((i < 0 && m->guest.nr == MAX_NR_LOADSTORE_MSRS) ||
0980 (j < 0 && m->host.nr == MAX_NR_LOADSTORE_MSRS)) {
0981 printk_once(KERN_WARNING "Not enough msr switch entries. "
0982 "Can't add msr %x\n", msr);
0983 return;
0984 }
0985 if (i < 0) {
0986 i = m->guest.nr++;
0987 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
0988 }
0989 m->guest.val[i].index = msr;
0990 m->guest.val[i].value = guest_val;
0991
0992 if (entry_only)
0993 return;
0994
0995 if (j < 0) {
0996 j = m->host.nr++;
0997 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
0998 }
0999 m->host.val[j].index = msr;
1000 m->host.val[j].value = host_val;
1001 }
1002
1003 static bool update_transition_efer(struct vcpu_vmx *vmx)
1004 {
1005 u64 guest_efer = vmx->vcpu.arch.efer;
1006 u64 ignore_bits = 0;
1007 int i;
1008
1009
1010 if (!enable_ept)
1011 guest_efer |= EFER_NX;
1012
1013
1014
1015
1016 ignore_bits |= EFER_SCE;
1017 #ifdef CONFIG_X86_64
1018 ignore_bits |= EFER_LMA | EFER_LME;
1019
1020 if (guest_efer & EFER_LMA)
1021 ignore_bits &= ~(u64)EFER_SCE;
1022 #endif
1023
1024
1025
1026
1027
1028
1029 if (cpu_has_load_ia32_efer() ||
1030 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1031 if (!(guest_efer & EFER_LMA))
1032 guest_efer &= ~EFER_LME;
1033 if (guest_efer != host_efer)
1034 add_atomic_switch_msr(vmx, MSR_EFER,
1035 guest_efer, host_efer, false);
1036 else
1037 clear_atomic_switch_msr(vmx, MSR_EFER);
1038 return false;
1039 }
1040
1041 i = kvm_find_user_return_msr(MSR_EFER);
1042 if (i < 0)
1043 return false;
1044
1045 clear_atomic_switch_msr(vmx, MSR_EFER);
1046
1047 guest_efer &= ~ignore_bits;
1048 guest_efer |= host_efer & ignore_bits;
1049
1050 vmx->guest_uret_msrs[i].data = guest_efer;
1051 vmx->guest_uret_msrs[i].mask = ~ignore_bits;
1052
1053 return true;
1054 }
1055
1056 #ifdef CONFIG_X86_32
1057
1058
1059
1060
1061
1062 static unsigned long segment_base(u16 selector)
1063 {
1064 struct desc_struct *table;
1065 unsigned long v;
1066
1067 if (!(selector & ~SEGMENT_RPL_MASK))
1068 return 0;
1069
1070 table = get_current_gdt_ro();
1071
1072 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1073 u16 ldt_selector = kvm_read_ldt();
1074
1075 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1076 return 0;
1077
1078 table = (struct desc_struct *)segment_base(ldt_selector);
1079 }
1080 v = get_desc_base(&table[selector >> 3]);
1081 return v;
1082 }
1083 #endif
1084
1085 static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
1086 {
1087 return vmx_pt_mode_is_host_guest() &&
1088 !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
1089 }
1090
1091 static inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
1092 {
1093
1094 return kvm_vcpu_is_legal_aligned_gpa(vcpu, base, 128);
1095 }
1096
1097 static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
1098 {
1099 u32 i;
1100
1101 wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1102 wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1103 wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1104 wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1105 for (i = 0; i < addr_range; i++) {
1106 wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1107 wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1108 }
1109 }
1110
1111 static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
1112 {
1113 u32 i;
1114
1115 rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1116 rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1117 rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1118 rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1119 for (i = 0; i < addr_range; i++) {
1120 rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1121 rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1122 }
1123 }
1124
1125 static void pt_guest_enter(struct vcpu_vmx *vmx)
1126 {
1127 if (vmx_pt_mode_is_system())
1128 return;
1129
1130
1131
1132
1133
1134 rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1135 if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1136 wrmsrl(MSR_IA32_RTIT_CTL, 0);
1137 pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
1138 pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
1139 }
1140 }
1141
1142 static void pt_guest_exit(struct vcpu_vmx *vmx)
1143 {
1144 if (vmx_pt_mode_is_system())
1145 return;
1146
1147 if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1148 pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
1149 pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
1150 }
1151
1152
1153
1154
1155
1156 if (vmx->pt_desc.host.ctl)
1157 wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1158 }
1159
1160 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
1161 unsigned long fs_base, unsigned long gs_base)
1162 {
1163 if (unlikely(fs_sel != host->fs_sel)) {
1164 if (!(fs_sel & 7))
1165 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1166 else
1167 vmcs_write16(HOST_FS_SELECTOR, 0);
1168 host->fs_sel = fs_sel;
1169 }
1170 if (unlikely(gs_sel != host->gs_sel)) {
1171 if (!(gs_sel & 7))
1172 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1173 else
1174 vmcs_write16(HOST_GS_SELECTOR, 0);
1175 host->gs_sel = gs_sel;
1176 }
1177 if (unlikely(fs_base != host->fs_base)) {
1178 vmcs_writel(HOST_FS_BASE, fs_base);
1179 host->fs_base = fs_base;
1180 }
1181 if (unlikely(gs_base != host->gs_base)) {
1182 vmcs_writel(HOST_GS_BASE, gs_base);
1183 host->gs_base = gs_base;
1184 }
1185 }
1186
1187 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1188 {
1189 struct vcpu_vmx *vmx = to_vmx(vcpu);
1190 struct vmcs_host_state *host_state;
1191 #ifdef CONFIG_X86_64
1192 int cpu = raw_smp_processor_id();
1193 #endif
1194 unsigned long fs_base, gs_base;
1195 u16 fs_sel, gs_sel;
1196 int i;
1197
1198 vmx->req_immediate_exit = false;
1199
1200
1201
1202
1203
1204
1205 if (!vmx->guest_uret_msrs_loaded) {
1206 vmx->guest_uret_msrs_loaded = true;
1207 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
1208 if (!vmx->guest_uret_msrs[i].load_into_hardware)
1209 continue;
1210
1211 kvm_set_user_return_msr(i,
1212 vmx->guest_uret_msrs[i].data,
1213 vmx->guest_uret_msrs[i].mask);
1214 }
1215 }
1216
1217 if (vmx->nested.need_vmcs12_to_shadow_sync)
1218 nested_sync_vmcs12_to_shadow(vcpu);
1219
1220 if (vmx->guest_state_loaded)
1221 return;
1222
1223 host_state = &vmx->loaded_vmcs->host_state;
1224
1225
1226
1227
1228
1229 host_state->ldt_sel = kvm_read_ldt();
1230
1231 #ifdef CONFIG_X86_64
1232 savesegment(ds, host_state->ds_sel);
1233 savesegment(es, host_state->es_sel);
1234
1235 gs_base = cpu_kernelmode_gs_base(cpu);
1236 if (likely(is_64bit_mm(current->mm))) {
1237 current_save_fsgs();
1238 fs_sel = current->thread.fsindex;
1239 gs_sel = current->thread.gsindex;
1240 fs_base = current->thread.fsbase;
1241 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
1242 } else {
1243 savesegment(fs, fs_sel);
1244 savesegment(gs, gs_sel);
1245 fs_base = read_msr(MSR_FS_BASE);
1246 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
1247 }
1248
1249 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1250 #else
1251 savesegment(fs, fs_sel);
1252 savesegment(gs, gs_sel);
1253 fs_base = segment_base(fs_sel);
1254 gs_base = segment_base(gs_sel);
1255 #endif
1256
1257 vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
1258 vmx->guest_state_loaded = true;
1259 }
1260
1261 static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
1262 {
1263 struct vmcs_host_state *host_state;
1264
1265 if (!vmx->guest_state_loaded)
1266 return;
1267
1268 host_state = &vmx->loaded_vmcs->host_state;
1269
1270 ++vmx->vcpu.stat.host_state_reload;
1271
1272 #ifdef CONFIG_X86_64
1273 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1274 #endif
1275 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
1276 kvm_load_ldt(host_state->ldt_sel);
1277 #ifdef CONFIG_X86_64
1278 load_gs_index(host_state->gs_sel);
1279 #else
1280 loadsegment(gs, host_state->gs_sel);
1281 #endif
1282 }
1283 if (host_state->fs_sel & 7)
1284 loadsegment(fs, host_state->fs_sel);
1285 #ifdef CONFIG_X86_64
1286 if (unlikely(host_state->ds_sel | host_state->es_sel)) {
1287 loadsegment(ds, host_state->ds_sel);
1288 loadsegment(es, host_state->es_sel);
1289 }
1290 #endif
1291 invalidate_tss_limit();
1292 #ifdef CONFIG_X86_64
1293 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1294 #endif
1295 load_fixmap_gdt(raw_smp_processor_id());
1296 vmx->guest_state_loaded = false;
1297 vmx->guest_uret_msrs_loaded = false;
1298 }
1299
1300 #ifdef CONFIG_X86_64
1301 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
1302 {
1303 preempt_disable();
1304 if (vmx->guest_state_loaded)
1305 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1306 preempt_enable();
1307 return vmx->msr_guest_kernel_gs_base;
1308 }
1309
1310 static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
1311 {
1312 preempt_disable();
1313 if (vmx->guest_state_loaded)
1314 wrmsrl(MSR_KERNEL_GS_BASE, data);
1315 preempt_enable();
1316 vmx->msr_guest_kernel_gs_base = data;
1317 }
1318 #endif
1319
1320 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
1321 struct loaded_vmcs *buddy)
1322 {
1323 struct vcpu_vmx *vmx = to_vmx(vcpu);
1324 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
1325 struct vmcs *prev;
1326
1327 if (!already_loaded) {
1328 loaded_vmcs_clear(vmx->loaded_vmcs);
1329 local_irq_disable();
1330
1331
1332
1333
1334
1335
1336
1337 smp_rmb();
1338
1339 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
1340 &per_cpu(loaded_vmcss_on_cpu, cpu));
1341 local_irq_enable();
1342 }
1343
1344 prev = per_cpu(current_vmcs, cpu);
1345 if (prev != vmx->loaded_vmcs->vmcs) {
1346 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
1347 vmcs_load(vmx->loaded_vmcs->vmcs);
1348
1349
1350
1351
1352
1353
1354 if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
1355 indirect_branch_prediction_barrier();
1356 }
1357
1358 if (!already_loaded) {
1359 void *gdt = get_current_gdt_ro();
1360
1361
1362
1363
1364
1365 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1366
1367
1368
1369
1370
1371 vmcs_writel(HOST_TR_BASE,
1372 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
1373 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);
1374
1375 if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
1376
1377 vmcs_writel(HOST_IA32_SYSENTER_ESP,
1378 (unsigned long)(cpu_entry_stack(cpu) + 1));
1379 }
1380
1381 vmx->loaded_vmcs->cpu = cpu;
1382 }
1383 }
1384
1385
1386
1387
1388
1389 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1390 {
1391 struct vcpu_vmx *vmx = to_vmx(vcpu);
1392
1393 vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
1394
1395 vmx_vcpu_pi_load(vcpu, cpu);
1396
1397 vmx->host_debugctlmsr = get_debugctlmsr();
1398 }
1399
1400 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
1401 {
1402 vmx_vcpu_pi_put(vcpu);
1403
1404 vmx_prepare_switch_to_host(to_vmx(vcpu));
1405 }
1406
1407 bool vmx_emulation_required(struct kvm_vcpu *vcpu)
1408 {
1409 return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
1410 }
1411
1412 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1413 {
1414 struct vcpu_vmx *vmx = to_vmx(vcpu);
1415 unsigned long rflags, save_rflags;
1416
1417 if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
1418 kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
1419 rflags = vmcs_readl(GUEST_RFLAGS);
1420 if (vmx->rmode.vm86_active) {
1421 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1422 save_rflags = vmx->rmode.save_rflags;
1423 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1424 }
1425 vmx->rflags = rflags;
1426 }
1427 return vmx->rflags;
1428 }
1429
1430 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1431 {
1432 struct vcpu_vmx *vmx = to_vmx(vcpu);
1433 unsigned long old_rflags;
1434
1435 if (is_unrestricted_guest(vcpu)) {
1436 kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
1437 vmx->rflags = rflags;
1438 vmcs_writel(GUEST_RFLAGS, rflags);
1439 return;
1440 }
1441
1442 old_rflags = vmx_get_rflags(vcpu);
1443 vmx->rflags = rflags;
1444 if (vmx->rmode.vm86_active) {
1445 vmx->rmode.save_rflags = rflags;
1446 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1447 }
1448 vmcs_writel(GUEST_RFLAGS, rflags);
1449
1450 if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
1451 vmx->emulation_required = vmx_emulation_required(vcpu);
1452 }
1453
1454 static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
1455 {
1456 return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
1457 }
1458
1459 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
1460 {
1461 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1462 int ret = 0;
1463
1464 if (interruptibility & GUEST_INTR_STATE_STI)
1465 ret |= KVM_X86_SHADOW_INT_STI;
1466 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1467 ret |= KVM_X86_SHADOW_INT_MOV_SS;
1468
1469 return ret;
1470 }
1471
1472 void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
1473 {
1474 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1475 u32 interruptibility = interruptibility_old;
1476
1477 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
1478
1479 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
1480 interruptibility |= GUEST_INTR_STATE_MOV_SS;
1481 else if (mask & KVM_X86_SHADOW_INT_STI)
1482 interruptibility |= GUEST_INTR_STATE_STI;
1483
1484 if ((interruptibility != interruptibility_old))
1485 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
1486 }
1487
1488 static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
1489 {
1490 struct vcpu_vmx *vmx = to_vmx(vcpu);
1491 unsigned long value;
1492
1493
1494
1495
1496
1497 if (data & vmx->pt_desc.ctl_bitmask)
1498 return 1;
1499
1500
1501
1502
1503
1504 if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
1505 ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
1506 return 1;
1507
1508
1509
1510
1511
1512
1513 if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
1514 !(data & RTIT_CTL_FABRIC_EN) &&
1515 !intel_pt_validate_cap(vmx->pt_desc.caps,
1516 PT_CAP_single_range_output))
1517 return 1;
1518
1519
1520
1521
1522
1523 value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
1524 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
1525 !test_bit((data & RTIT_CTL_MTC_RANGE) >>
1526 RTIT_CTL_MTC_RANGE_OFFSET, &value))
1527 return 1;
1528 value = intel_pt_validate_cap(vmx->pt_desc.caps,
1529 PT_CAP_cycle_thresholds);
1530 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1531 !test_bit((data & RTIT_CTL_CYC_THRESH) >>
1532 RTIT_CTL_CYC_THRESH_OFFSET, &value))
1533 return 1;
1534 value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
1535 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1536 !test_bit((data & RTIT_CTL_PSB_FREQ) >>
1537 RTIT_CTL_PSB_FREQ_OFFSET, &value))
1538 return 1;
1539
1540
1541
1542
1543
1544 value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
1545 if ((value && (vmx->pt_desc.num_address_ranges < 1)) || (value > 2))
1546 return 1;
1547 value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
1548 if ((value && (vmx->pt_desc.num_address_ranges < 2)) || (value > 2))
1549 return 1;
1550 value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
1551 if ((value && (vmx->pt_desc.num_address_ranges < 3)) || (value > 2))
1552 return 1;
1553 value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
1554 if ((value && (vmx->pt_desc.num_address_ranges < 4)) || (value > 2))
1555 return 1;
1556
1557 return 0;
1558 }
1559
1560 static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
1561 void *insn, int insn_len)
1562 {
1563
1564
1565
1566
1567
1568
1569
1570 if (to_vmx(vcpu)->exit_reason.enclave_mode) {
1571 kvm_queue_exception(vcpu, UD_VECTOR);
1572 return false;
1573 }
1574 return true;
1575 }
1576
1577 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
1578 {
1579 union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason;
1580 unsigned long rip, orig_rip;
1581 u32 instr_len;
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591 if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
1592 exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
1593 instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611 if (!instr_len)
1612 goto rip_updated;
1613
1614 WARN(exit_reason.enclave_mode,
1615 "KVM: skipping instruction after SGX enclave VM-Exit");
1616
1617 orig_rip = kvm_rip_read(vcpu);
1618 rip = orig_rip + instr_len;
1619 #ifdef CONFIG_X86_64
1620
1621
1622
1623
1624
1625 if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu))
1626 rip = (u32)rip;
1627 #endif
1628 kvm_rip_write(vcpu, rip);
1629 } else {
1630 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
1631 return 0;
1632 }
1633
1634 rip_updated:
1635
1636 vmx_set_interrupt_shadow(vcpu, 0);
1637
1638 return 1;
1639 }
1640
1641
1642
1643
1644
1645 static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
1646 {
1647 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1648 struct vcpu_vmx *vmx = to_vmx(vcpu);
1649
1650 if (!is_guest_mode(vcpu))
1651 return;
1652
1653
1654
1655
1656
1657
1658
1659
1660 if (nested_cpu_has_mtf(vmcs12) &&
1661 (!vcpu->arch.exception.pending ||
1662 vcpu->arch.exception.nr == DB_VECTOR))
1663 vmx->nested.mtf_pending = true;
1664 else
1665 vmx->nested.mtf_pending = false;
1666 }
1667
1668 static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
1669 {
1670 vmx_update_emulated_instruction(vcpu);
1671 return skip_emulated_instruction(vcpu);
1672 }
1673
1674 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1675 {
1676
1677
1678
1679
1680
1681
1682 if (kvm_hlt_in_guest(vcpu->kvm) &&
1683 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1684 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1685 }
1686
1687 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
1688 {
1689 struct vcpu_vmx *vmx = to_vmx(vcpu);
1690 unsigned nr = vcpu->arch.exception.nr;
1691 bool has_error_code = vcpu->arch.exception.has_error_code;
1692 u32 error_code = vcpu->arch.exception.error_code;
1693 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1694
1695 kvm_deliver_exception_payload(vcpu);
1696
1697 if (has_error_code) {
1698 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1699 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
1700 }
1701
1702 if (vmx->rmode.vm86_active) {
1703 int inc_eip = 0;
1704 if (kvm_exception_is_soft(nr))
1705 inc_eip = vcpu->arch.event_exit_inst_len;
1706 kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
1707 return;
1708 }
1709
1710 WARN_ON_ONCE(vmx->emulation_required);
1711
1712 if (kvm_exception_is_soft(nr)) {
1713 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
1714 vmx->vcpu.arch.event_exit_inst_len);
1715 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
1716 } else
1717 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1718
1719 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1720
1721 vmx_clear_hlt(vcpu);
1722 }
1723
1724 static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
1725 bool load_into_hardware)
1726 {
1727 struct vmx_uret_msr *uret_msr;
1728
1729 uret_msr = vmx_find_uret_msr(vmx, msr);
1730 if (!uret_msr)
1731 return;
1732
1733 uret_msr->load_into_hardware = load_into_hardware;
1734 }
1735
1736
1737
1738
1739
1740
1741
1742 static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
1743 {
1744 #ifdef CONFIG_X86_64
1745 bool load_syscall_msrs;
1746
1747
1748
1749
1750
1751 load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
1752 (vmx->vcpu.arch.efer & EFER_SCE);
1753
1754 vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
1755 vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
1756 vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
1757 #endif
1758 vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
1759
1760 vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
1761 guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
1762 guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
1763
1764
1765
1766
1767
1768
1769
1770 vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
1771
1772
1773
1774
1775
1776 vmx->guest_uret_msrs_loaded = false;
1777 }
1778
1779 u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
1780 {
1781 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1782
1783 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING))
1784 return vmcs12->tsc_offset;
1785
1786 return 0;
1787 }
1788
1789 u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
1790 {
1791 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1792
1793 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) &&
1794 nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
1795 return vmcs12->tsc_multiplier;
1796
1797 return kvm_caps.default_tsc_scaling_ratio;
1798 }
1799
1800 static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1801 {
1802 vmcs_write64(TSC_OFFSET, offset);
1803 }
1804
1805 static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
1806 {
1807 vmcs_write64(TSC_MULTIPLIER, multiplier);
1808 }
1809
1810
1811
1812
1813
1814
1815
1816 bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
1817 {
1818 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
1819 }
1820
1821 static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
1822 uint64_t val)
1823 {
1824 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
1825
1826 return !(val & ~valid_bits);
1827 }
1828
1829 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
1830 {
1831 switch (msr->index) {
1832 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1833 if (!nested)
1834 return 1;
1835 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
1836 case MSR_IA32_PERF_CAPABILITIES:
1837 msr->data = vmx_get_perf_capabilities();
1838 return 0;
1839 default:
1840 return KVM_MSR_RET_INVALID;
1841 }
1842 }
1843
1844
1845
1846
1847
1848
1849 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1850 {
1851 struct vcpu_vmx *vmx = to_vmx(vcpu);
1852 struct vmx_uret_msr *msr;
1853 u32 index;
1854
1855 switch (msr_info->index) {
1856 #ifdef CONFIG_X86_64
1857 case MSR_FS_BASE:
1858 msr_info->data = vmcs_readl(GUEST_FS_BASE);
1859 break;
1860 case MSR_GS_BASE:
1861 msr_info->data = vmcs_readl(GUEST_GS_BASE);
1862 break;
1863 case MSR_KERNEL_GS_BASE:
1864 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
1865 break;
1866 #endif
1867 case MSR_EFER:
1868 return kvm_get_msr_common(vcpu, msr_info);
1869 case MSR_IA32_TSX_CTRL:
1870 if (!msr_info->host_initiated &&
1871 !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
1872 return 1;
1873 goto find_uret_msr;
1874 case MSR_IA32_UMWAIT_CONTROL:
1875 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
1876 return 1;
1877
1878 msr_info->data = vmx->msr_ia32_umwait_control;
1879 break;
1880 case MSR_IA32_SPEC_CTRL:
1881 if (!msr_info->host_initiated &&
1882 !guest_has_spec_ctrl_msr(vcpu))
1883 return 1;
1884
1885 msr_info->data = to_vmx(vcpu)->spec_ctrl;
1886 break;
1887 case MSR_IA32_SYSENTER_CS:
1888 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
1889 break;
1890 case MSR_IA32_SYSENTER_EIP:
1891 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
1892 break;
1893 case MSR_IA32_SYSENTER_ESP:
1894 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
1895 break;
1896 case MSR_IA32_BNDCFGS:
1897 if (!kvm_mpx_supported() ||
1898 (!msr_info->host_initiated &&
1899 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
1900 return 1;
1901 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
1902 break;
1903 case MSR_IA32_MCG_EXT_CTL:
1904 if (!msr_info->host_initiated &&
1905 !(vmx->msr_ia32_feature_control &
1906 FEAT_CTL_LMCE_ENABLED))
1907 return 1;
1908 msr_info->data = vcpu->arch.mcg_ext_ctl;
1909 break;
1910 case MSR_IA32_FEAT_CTL:
1911 msr_info->data = vmx->msr_ia32_feature_control;
1912 break;
1913 case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
1914 if (!msr_info->host_initiated &&
1915 !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
1916 return 1;
1917 msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
1918 [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
1919 break;
1920 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1921 if (!nested_vmx_allowed(vcpu))
1922 return 1;
1923 if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
1924 &msr_info->data))
1925 return 1;
1926
1927
1928
1929
1930
1931
1932
1933 if (!msr_info->host_initiated &&
1934 vmx->nested.enlightened_vmcs_enabled)
1935 nested_evmcs_filter_control_msr(msr_info->index,
1936 &msr_info->data);
1937 break;
1938 case MSR_IA32_RTIT_CTL:
1939 if (!vmx_pt_mode_is_host_guest())
1940 return 1;
1941 msr_info->data = vmx->pt_desc.guest.ctl;
1942 break;
1943 case MSR_IA32_RTIT_STATUS:
1944 if (!vmx_pt_mode_is_host_guest())
1945 return 1;
1946 msr_info->data = vmx->pt_desc.guest.status;
1947 break;
1948 case MSR_IA32_RTIT_CR3_MATCH:
1949 if (!vmx_pt_mode_is_host_guest() ||
1950 !intel_pt_validate_cap(vmx->pt_desc.caps,
1951 PT_CAP_cr3_filtering))
1952 return 1;
1953 msr_info->data = vmx->pt_desc.guest.cr3_match;
1954 break;
1955 case MSR_IA32_RTIT_OUTPUT_BASE:
1956 if (!vmx_pt_mode_is_host_guest() ||
1957 (!intel_pt_validate_cap(vmx->pt_desc.caps,
1958 PT_CAP_topa_output) &&
1959 !intel_pt_validate_cap(vmx->pt_desc.caps,
1960 PT_CAP_single_range_output)))
1961 return 1;
1962 msr_info->data = vmx->pt_desc.guest.output_base;
1963 break;
1964 case MSR_IA32_RTIT_OUTPUT_MASK:
1965 if (!vmx_pt_mode_is_host_guest() ||
1966 (!intel_pt_validate_cap(vmx->pt_desc.caps,
1967 PT_CAP_topa_output) &&
1968 !intel_pt_validate_cap(vmx->pt_desc.caps,
1969 PT_CAP_single_range_output)))
1970 return 1;
1971 msr_info->data = vmx->pt_desc.guest.output_mask;
1972 break;
1973 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
1974 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
1975 if (!vmx_pt_mode_is_host_guest() ||
1976 (index >= 2 * vmx->pt_desc.num_address_ranges))
1977 return 1;
1978 if (index % 2)
1979 msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
1980 else
1981 msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
1982 break;
1983 case MSR_IA32_DEBUGCTLMSR:
1984 msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
1985 break;
1986 default:
1987 find_uret_msr:
1988 msr = vmx_find_uret_msr(vmx, msr_info->index);
1989 if (msr) {
1990 msr_info->data = msr->data;
1991 break;
1992 }
1993 return kvm_get_msr_common(vcpu, msr_info);
1994 }
1995
1996 return 0;
1997 }
1998
1999 static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
2000 u64 data)
2001 {
2002 #ifdef CONFIG_X86_64
2003 if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
2004 return (u32)data;
2005 #endif
2006 return (unsigned long)data;
2007 }
2008
2009 static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
2010 {
2011 u64 debugctl = vmx_supported_debugctl();
2012
2013 if (!intel_pmu_lbr_is_enabled(vcpu))
2014 debugctl &= ~DEBUGCTLMSR_LBR_MASK;
2015
2016 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
2017 debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
2018
2019 return debugctl;
2020 }
2021
2022
2023
2024
2025
2026
2027 static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2028 {
2029 struct vcpu_vmx *vmx = to_vmx(vcpu);
2030 struct vmx_uret_msr *msr;
2031 int ret = 0;
2032 u32 msr_index = msr_info->index;
2033 u64 data = msr_info->data;
2034 u32 index;
2035
2036 switch (msr_index) {
2037 case MSR_EFER:
2038 ret = kvm_set_msr_common(vcpu, msr_info);
2039 break;
2040 #ifdef CONFIG_X86_64
2041 case MSR_FS_BASE:
2042 vmx_segment_cache_clear(vmx);
2043 vmcs_writel(GUEST_FS_BASE, data);
2044 break;
2045 case MSR_GS_BASE:
2046 vmx_segment_cache_clear(vmx);
2047 vmcs_writel(GUEST_GS_BASE, data);
2048 break;
2049 case MSR_KERNEL_GS_BASE:
2050 vmx_write_guest_kernel_gs_base(vmx, data);
2051 break;
2052 case MSR_IA32_XFD:
2053 ret = kvm_set_msr_common(vcpu, msr_info);
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063 if (!ret && data) {
2064 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD,
2065 MSR_TYPE_RW);
2066 vcpu->arch.xfd_no_write_intercept = true;
2067 vmx_update_exception_bitmap(vcpu);
2068 }
2069 break;
2070 #endif
2071 case MSR_IA32_SYSENTER_CS:
2072 if (is_guest_mode(vcpu))
2073 get_vmcs12(vcpu)->guest_sysenter_cs = data;
2074 vmcs_write32(GUEST_SYSENTER_CS, data);
2075 break;
2076 case MSR_IA32_SYSENTER_EIP:
2077 if (is_guest_mode(vcpu)) {
2078 data = nested_vmx_truncate_sysenter_addr(vcpu, data);
2079 get_vmcs12(vcpu)->guest_sysenter_eip = data;
2080 }
2081 vmcs_writel(GUEST_SYSENTER_EIP, data);
2082 break;
2083 case MSR_IA32_SYSENTER_ESP:
2084 if (is_guest_mode(vcpu)) {
2085 data = nested_vmx_truncate_sysenter_addr(vcpu, data);
2086 get_vmcs12(vcpu)->guest_sysenter_esp = data;
2087 }
2088 vmcs_writel(GUEST_SYSENTER_ESP, data);
2089 break;
2090 case MSR_IA32_DEBUGCTLMSR: {
2091 u64 invalid = data & ~vcpu_supported_debugctl(vcpu);
2092 if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
2093 if (report_ignored_msrs)
2094 vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
2095 __func__, data);
2096 data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
2097 invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
2098 }
2099
2100 if (invalid)
2101 return 1;
2102
2103 if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
2104 VM_EXIT_SAVE_DEBUG_CONTROLS)
2105 get_vmcs12(vcpu)->guest_ia32_debugctl = data;
2106
2107 vmcs_write64(GUEST_IA32_DEBUGCTL, data);
2108 if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
2109 (data & DEBUGCTLMSR_LBR))
2110 intel_pmu_create_guest_lbr_event(vcpu);
2111 return 0;
2112 }
2113 case MSR_IA32_BNDCFGS:
2114 if (!kvm_mpx_supported() ||
2115 (!msr_info->host_initiated &&
2116 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
2117 return 1;
2118 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
2119 (data & MSR_IA32_BNDCFGS_RSVD))
2120 return 1;
2121
2122 if (is_guest_mode(vcpu) &&
2123 ((vmx->nested.msrs.entry_ctls_high & VM_ENTRY_LOAD_BNDCFGS) ||
2124 (vmx->nested.msrs.exit_ctls_high & VM_EXIT_CLEAR_BNDCFGS)))
2125 get_vmcs12(vcpu)->guest_bndcfgs = data;
2126
2127 vmcs_write64(GUEST_BNDCFGS, data);
2128 break;
2129 case MSR_IA32_UMWAIT_CONTROL:
2130 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
2131 return 1;
2132
2133
2134 if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
2135 return 1;
2136
2137 vmx->msr_ia32_umwait_control = data;
2138 break;
2139 case MSR_IA32_SPEC_CTRL:
2140 if (!msr_info->host_initiated &&
2141 !guest_has_spec_ctrl_msr(vcpu))
2142 return 1;
2143
2144 if (kvm_spec_ctrl_test_value(data))
2145 return 1;
2146
2147 vmx->spec_ctrl = data;
2148 if (!data)
2149 break;
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163 vmx_disable_intercept_for_msr(vcpu,
2164 MSR_IA32_SPEC_CTRL,
2165 MSR_TYPE_RW);
2166 break;
2167 case MSR_IA32_TSX_CTRL:
2168 if (!msr_info->host_initiated &&
2169 !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
2170 return 1;
2171 if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
2172 return 1;
2173 goto find_uret_msr;
2174 case MSR_IA32_PRED_CMD:
2175 if (!msr_info->host_initiated &&
2176 !guest_has_pred_cmd_msr(vcpu))
2177 return 1;
2178
2179 if (data & ~PRED_CMD_IBPB)
2180 return 1;
2181 if (!boot_cpu_has(X86_FEATURE_IBPB))
2182 return 1;
2183 if (!data)
2184 break;
2185
2186 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W);
2200 break;
2201 case MSR_IA32_CR_PAT:
2202 if (!kvm_pat_valid(data))
2203 return 1;
2204
2205 if (is_guest_mode(vcpu) &&
2206 get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
2207 get_vmcs12(vcpu)->guest_ia32_pat = data;
2208
2209 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2210 vmcs_write64(GUEST_IA32_PAT, data);
2211 vcpu->arch.pat = data;
2212 break;
2213 }
2214 ret = kvm_set_msr_common(vcpu, msr_info);
2215 break;
2216 case MSR_IA32_MCG_EXT_CTL:
2217 if ((!msr_info->host_initiated &&
2218 !(to_vmx(vcpu)->msr_ia32_feature_control &
2219 FEAT_CTL_LMCE_ENABLED)) ||
2220 (data & ~MCG_EXT_CTL_LMCE_EN))
2221 return 1;
2222 vcpu->arch.mcg_ext_ctl = data;
2223 break;
2224 case MSR_IA32_FEAT_CTL:
2225 if (!vmx_feature_control_msr_valid(vcpu, data) ||
2226 (to_vmx(vcpu)->msr_ia32_feature_control &
2227 FEAT_CTL_LOCKED && !msr_info->host_initiated))
2228 return 1;
2229 vmx->msr_ia32_feature_control = data;
2230 if (msr_info->host_initiated && data == 0)
2231 vmx_leave_nested(vcpu);
2232
2233
2234 vmx_write_encls_bitmap(vcpu, NULL);
2235 break;
2236 case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248 if (!msr_info->host_initiated &&
2249 (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
2250 ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
2251 !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
2252 return 1;
2253 vmx->msr_ia32_sgxlepubkeyhash
2254 [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
2255 break;
2256 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2257 if (!msr_info->host_initiated)
2258 return 1;
2259 if (!nested_vmx_allowed(vcpu))
2260 return 1;
2261 return vmx_set_vmx_msr(vcpu, msr_index, data);
2262 case MSR_IA32_RTIT_CTL:
2263 if (!vmx_pt_mode_is_host_guest() ||
2264 vmx_rtit_ctl_check(vcpu, data) ||
2265 vmx->nested.vmxon)
2266 return 1;
2267 vmcs_write64(GUEST_IA32_RTIT_CTL, data);
2268 vmx->pt_desc.guest.ctl = data;
2269 pt_update_intercept_for_msr(vcpu);
2270 break;
2271 case MSR_IA32_RTIT_STATUS:
2272 if (!pt_can_write_msr(vmx))
2273 return 1;
2274 if (data & MSR_IA32_RTIT_STATUS_MASK)
2275 return 1;
2276 vmx->pt_desc.guest.status = data;
2277 break;
2278 case MSR_IA32_RTIT_CR3_MATCH:
2279 if (!pt_can_write_msr(vmx))
2280 return 1;
2281 if (!intel_pt_validate_cap(vmx->pt_desc.caps,
2282 PT_CAP_cr3_filtering))
2283 return 1;
2284 vmx->pt_desc.guest.cr3_match = data;
2285 break;
2286 case MSR_IA32_RTIT_OUTPUT_BASE:
2287 if (!pt_can_write_msr(vmx))
2288 return 1;
2289 if (!intel_pt_validate_cap(vmx->pt_desc.caps,
2290 PT_CAP_topa_output) &&
2291 !intel_pt_validate_cap(vmx->pt_desc.caps,
2292 PT_CAP_single_range_output))
2293 return 1;
2294 if (!pt_output_base_valid(vcpu, data))
2295 return 1;
2296 vmx->pt_desc.guest.output_base = data;
2297 break;
2298 case MSR_IA32_RTIT_OUTPUT_MASK:
2299 if (!pt_can_write_msr(vmx))
2300 return 1;
2301 if (!intel_pt_validate_cap(vmx->pt_desc.caps,
2302 PT_CAP_topa_output) &&
2303 !intel_pt_validate_cap(vmx->pt_desc.caps,
2304 PT_CAP_single_range_output))
2305 return 1;
2306 vmx->pt_desc.guest.output_mask = data;
2307 break;
2308 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2309 if (!pt_can_write_msr(vmx))
2310 return 1;
2311 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2312 if (index >= 2 * vmx->pt_desc.num_address_ranges)
2313 return 1;
2314 if (is_noncanonical_address(data, vcpu))
2315 return 1;
2316 if (index % 2)
2317 vmx->pt_desc.guest.addr_b[index / 2] = data;
2318 else
2319 vmx->pt_desc.guest.addr_a[index / 2] = data;
2320 break;
2321 case MSR_IA32_PERF_CAPABILITIES:
2322 if (data && !vcpu_to_pmu(vcpu)->version)
2323 return 1;
2324 if (data & PMU_CAP_LBR_FMT) {
2325 if ((data & PMU_CAP_LBR_FMT) !=
2326 (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT))
2327 return 1;
2328 if (!cpuid_model_is_consistent(vcpu))
2329 return 1;
2330 }
2331 if (data & PERF_CAP_PEBS_FORMAT) {
2332 if ((data & PERF_CAP_PEBS_MASK) !=
2333 (vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK))
2334 return 1;
2335 if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
2336 return 1;
2337 if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
2338 return 1;
2339 if (!cpuid_model_is_consistent(vcpu))
2340 return 1;
2341 }
2342 ret = kvm_set_msr_common(vcpu, msr_info);
2343 break;
2344
2345 default:
2346 find_uret_msr:
2347 msr = vmx_find_uret_msr(vmx, msr_index);
2348 if (msr)
2349 ret = vmx_set_guest_uret_msr(vmx, msr, data);
2350 else
2351 ret = kvm_set_msr_common(vcpu, msr_info);
2352 }
2353
2354
2355 if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
2356 vmx_update_fb_clear_dis(vcpu, vmx);
2357
2358 return ret;
2359 }
2360
2361 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2362 {
2363 unsigned long guest_owned_bits;
2364
2365 kvm_register_mark_available(vcpu, reg);
2366
2367 switch (reg) {
2368 case VCPU_REGS_RSP:
2369 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
2370 break;
2371 case VCPU_REGS_RIP:
2372 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
2373 break;
2374 case VCPU_EXREG_PDPTR:
2375 if (enable_ept)
2376 ept_save_pdptrs(vcpu);
2377 break;
2378 case VCPU_EXREG_CR0:
2379 guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
2380
2381 vcpu->arch.cr0 &= ~guest_owned_bits;
2382 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
2383 break;
2384 case VCPU_EXREG_CR3:
2385
2386
2387
2388
2389 if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
2390 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2391 break;
2392 case VCPU_EXREG_CR4:
2393 guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
2394
2395 vcpu->arch.cr4 &= ~guest_owned_bits;
2396 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
2397 break;
2398 default:
2399 KVM_BUG_ON(1, vcpu->kvm);
2400 break;
2401 }
2402 }
2403
2404 static __init int cpu_has_kvm_support(void)
2405 {
2406 return cpu_has_vmx();
2407 }
2408
2409 static __init int vmx_disabled_by_bios(void)
2410 {
2411 return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
2412 !boot_cpu_has(X86_FEATURE_VMX);
2413 }
2414
2415 static int kvm_cpu_vmxon(u64 vmxon_pointer)
2416 {
2417 u64 msr;
2418
2419 cr4_set_bits(X86_CR4_VMXE);
2420
2421 asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
2422 _ASM_EXTABLE(1b, %l[fault])
2423 : : [vmxon_pointer] "m"(vmxon_pointer)
2424 : : fault);
2425 return 0;
2426
2427 fault:
2428 WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
2429 rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
2430 cr4_clear_bits(X86_CR4_VMXE);
2431
2432 return -EFAULT;
2433 }
2434
2435 static int vmx_hardware_enable(void)
2436 {
2437 int cpu = raw_smp_processor_id();
2438 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
2439 int r;
2440
2441 if (cr4_read_shadow() & X86_CR4_VMXE)
2442 return -EBUSY;
2443
2444
2445
2446
2447
2448 if (static_branch_unlikely(&enable_evmcs) &&
2449 !hv_get_vp_assist_page(cpu))
2450 return -EFAULT;
2451
2452 intel_pt_handle_vmx(1);
2453
2454 r = kvm_cpu_vmxon(phys_addr);
2455 if (r) {
2456 intel_pt_handle_vmx(0);
2457 return r;
2458 }
2459
2460 if (enable_ept)
2461 ept_sync_global();
2462
2463 return 0;
2464 }
2465
2466 static void vmclear_local_loaded_vmcss(void)
2467 {
2468 int cpu = raw_smp_processor_id();
2469 struct loaded_vmcs *v, *n;
2470
2471 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
2472 loaded_vmcss_on_cpu_link)
2473 __loaded_vmcs_clear(v);
2474 }
2475
2476 static void vmx_hardware_disable(void)
2477 {
2478 vmclear_local_loaded_vmcss();
2479
2480 if (cpu_vmxoff())
2481 kvm_spurious_fault();
2482
2483 intel_pt_handle_vmx(0);
2484 }
2485
2486
2487
2488
2489
2490
2491
2492 static bool cpu_has_sgx(void)
2493 {
2494 return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
2495 }
2496
2497 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
2498 u32 msr, u32 *result)
2499 {
2500 u32 vmx_msr_low, vmx_msr_high;
2501 u32 ctl = ctl_min | ctl_opt;
2502
2503 rdmsr(msr, vmx_msr_low, vmx_msr_high);
2504
2505 ctl &= vmx_msr_high;
2506 ctl |= vmx_msr_low;
2507
2508
2509 if (ctl_min & ~ctl)
2510 return -EIO;
2511
2512 *result = ctl;
2513 return 0;
2514 }
2515
2516 static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
2517 {
2518 u64 allowed;
2519
2520 rdmsrl(msr, allowed);
2521
2522 return ctl_opt & allowed;
2523 }
2524
2525 static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
2526 struct vmx_capability *vmx_cap)
2527 {
2528 u32 vmx_msr_low, vmx_msr_high;
2529 u32 min, opt, min2, opt2;
2530 u32 _pin_based_exec_control = 0;
2531 u32 _cpu_based_exec_control = 0;
2532 u32 _cpu_based_2nd_exec_control = 0;
2533 u64 _cpu_based_3rd_exec_control = 0;
2534 u32 _vmexit_control = 0;
2535 u32 _vmentry_control = 0;
2536 int i;
2537
2538
2539
2540
2541
2542
2543 struct {
2544 u32 entry_control;
2545 u32 exit_control;
2546 } const vmcs_entry_exit_pairs[] = {
2547 { VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL },
2548 { VM_ENTRY_LOAD_IA32_PAT, VM_EXIT_LOAD_IA32_PAT },
2549 { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
2550 { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
2551 { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
2552 };
2553
2554 memset(vmcs_conf, 0, sizeof(*vmcs_conf));
2555 min = CPU_BASED_HLT_EXITING |
2556 #ifdef CONFIG_X86_64
2557 CPU_BASED_CR8_LOAD_EXITING |
2558 CPU_BASED_CR8_STORE_EXITING |
2559 #endif
2560 CPU_BASED_CR3_LOAD_EXITING |
2561 CPU_BASED_CR3_STORE_EXITING |
2562 CPU_BASED_UNCOND_IO_EXITING |
2563 CPU_BASED_MOV_DR_EXITING |
2564 CPU_BASED_USE_TSC_OFFSETTING |
2565 CPU_BASED_MWAIT_EXITING |
2566 CPU_BASED_MONITOR_EXITING |
2567 CPU_BASED_INVLPG_EXITING |
2568 CPU_BASED_RDPMC_EXITING;
2569
2570 opt = CPU_BASED_TPR_SHADOW |
2571 CPU_BASED_USE_MSR_BITMAPS |
2572 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
2573 CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
2574 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
2575 &_cpu_based_exec_control) < 0)
2576 return -EIO;
2577 #ifdef CONFIG_X86_64
2578 if (_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)
2579 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
2580 ~CPU_BASED_CR8_STORE_EXITING;
2581 #endif
2582 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2583 min2 = 0;
2584 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2585 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2586 SECONDARY_EXEC_WBINVD_EXITING |
2587 SECONDARY_EXEC_ENABLE_VPID |
2588 SECONDARY_EXEC_ENABLE_EPT |
2589 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2590 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2591 SECONDARY_EXEC_DESC |
2592 SECONDARY_EXEC_ENABLE_RDTSCP |
2593 SECONDARY_EXEC_ENABLE_INVPCID |
2594 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2595 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2596 SECONDARY_EXEC_SHADOW_VMCS |
2597 SECONDARY_EXEC_XSAVES |
2598 SECONDARY_EXEC_RDSEED_EXITING |
2599 SECONDARY_EXEC_RDRAND_EXITING |
2600 SECONDARY_EXEC_ENABLE_PML |
2601 SECONDARY_EXEC_TSC_SCALING |
2602 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2603 SECONDARY_EXEC_PT_USE_GPA |
2604 SECONDARY_EXEC_PT_CONCEAL_VMX |
2605 SECONDARY_EXEC_ENABLE_VMFUNC |
2606 SECONDARY_EXEC_BUS_LOCK_DETECTION |
2607 SECONDARY_EXEC_NOTIFY_VM_EXITING;
2608 if (cpu_has_sgx())
2609 opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
2610 if (adjust_vmx_controls(min2, opt2,
2611 MSR_IA32_VMX_PROCBASED_CTLS2,
2612 &_cpu_based_2nd_exec_control) < 0)
2613 return -EIO;
2614 }
2615 #ifndef CONFIG_X86_64
2616 if (!(_cpu_based_2nd_exec_control &
2617 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2618 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2619 #endif
2620
2621 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2622 _cpu_based_2nd_exec_control &= ~(
2623 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2624 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2625 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2626
2627 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
2628 &vmx_cap->ept, &vmx_cap->vpid);
2629
2630 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2631
2632
2633 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
2634 CPU_BASED_CR3_STORE_EXITING |
2635 CPU_BASED_INVLPG_EXITING);
2636 } else if (vmx_cap->ept) {
2637 pr_warn_once("EPT CAP should not exist if not support "
2638 "1-setting enable EPT VM-execution control\n");
2639
2640 if (error_on_inconsistent_vmcs_config)
2641 return -EIO;
2642
2643 vmx_cap->ept = 0;
2644 }
2645 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
2646 vmx_cap->vpid) {
2647 pr_warn_once("VPID CAP should not exist if not support "
2648 "1-setting enable VPID VM-execution control\n");
2649
2650 if (error_on_inconsistent_vmcs_config)
2651 return -EIO;
2652
2653 vmx_cap->vpid = 0;
2654 }
2655
2656 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
2657 u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
2658
2659 _cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3,
2660 MSR_IA32_VMX_PROCBASED_CTLS3);
2661 }
2662
2663 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
2664 #ifdef CONFIG_X86_64
2665 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2666 #endif
2667 opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
2668 VM_EXIT_LOAD_IA32_PAT |
2669 VM_EXIT_LOAD_IA32_EFER |
2670 VM_EXIT_CLEAR_BNDCFGS |
2671 VM_EXIT_PT_CONCEAL_PIP |
2672 VM_EXIT_CLEAR_IA32_RTIT_CTL;
2673 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2674 &_vmexit_control) < 0)
2675 return -EIO;
2676
2677 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2678 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
2679 PIN_BASED_VMX_PREEMPTION_TIMER;
2680 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2681 &_pin_based_exec_control) < 0)
2682 return -EIO;
2683
2684 if (cpu_has_broken_vmx_preemption_timer())
2685 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2686 if (!(_cpu_based_2nd_exec_control &
2687 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
2688 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2689
2690 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
2691 opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
2692 VM_ENTRY_LOAD_IA32_PAT |
2693 VM_ENTRY_LOAD_IA32_EFER |
2694 VM_ENTRY_LOAD_BNDCFGS |
2695 VM_ENTRY_PT_CONCEAL_PIP |
2696 VM_ENTRY_LOAD_IA32_RTIT_CTL;
2697 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
2698 &_vmentry_control) < 0)
2699 return -EIO;
2700
2701 for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
2702 u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
2703 u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
2704
2705 if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
2706 continue;
2707
2708 pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
2709 _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
2710
2711 if (error_on_inconsistent_vmcs_config)
2712 return -EIO;
2713
2714 _vmentry_control &= ~n_ctrl;
2715 _vmexit_control &= ~x_ctrl;
2716 }
2717
2718
2719
2720
2721
2722
2723
2724 if (boot_cpu_data.x86 == 0x6) {
2725 switch (boot_cpu_data.x86_model) {
2726 case 26:
2727 case 30:
2728 case 37:
2729 case 44:
2730 case 46:
2731 _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
2732 _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
2733 pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
2734 "does not work properly. Using workaround\n");
2735 break;
2736 default:
2737 break;
2738 }
2739 }
2740
2741
2742 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
2743
2744
2745 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
2746 return -EIO;
2747
2748 #ifdef CONFIG_X86_64
2749
2750 if (vmx_msr_high & (1u<<16))
2751 return -EIO;
2752 #endif
2753
2754
2755 if (((vmx_msr_high >> 18) & 15) != 6)
2756 return -EIO;
2757
2758 vmcs_conf->size = vmx_msr_high & 0x1fff;
2759 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
2760
2761 vmcs_conf->revision_id = vmx_msr_low;
2762
2763 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
2764 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
2765 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
2766 vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
2767 vmcs_conf->vmexit_ctrl = _vmexit_control;
2768 vmcs_conf->vmentry_ctrl = _vmentry_control;
2769
2770 #if IS_ENABLED(CONFIG_HYPERV)
2771 if (enlightened_vmcs)
2772 evmcs_sanitize_exec_ctrls(vmcs_conf);
2773 #endif
2774
2775 return 0;
2776 }
2777
2778 struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
2779 {
2780 int node = cpu_to_node(cpu);
2781 struct page *pages;
2782 struct vmcs *vmcs;
2783
2784 pages = __alloc_pages_node(node, flags, 0);
2785 if (!pages)
2786 return NULL;
2787 vmcs = page_address(pages);
2788 memset(vmcs, 0, vmcs_config.size);
2789
2790
2791 if (static_branch_unlikely(&enable_evmcs))
2792 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
2793 else
2794 vmcs->hdr.revision_id = vmcs_config.revision_id;
2795
2796 if (shadow)
2797 vmcs->hdr.shadow_vmcs = 1;
2798 return vmcs;
2799 }
2800
2801 void free_vmcs(struct vmcs *vmcs)
2802 {
2803 free_page((unsigned long)vmcs);
2804 }
2805
2806
2807
2808
2809 void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2810 {
2811 if (!loaded_vmcs->vmcs)
2812 return;
2813 loaded_vmcs_clear(loaded_vmcs);
2814 free_vmcs(loaded_vmcs->vmcs);
2815 loaded_vmcs->vmcs = NULL;
2816 if (loaded_vmcs->msr_bitmap)
2817 free_page((unsigned long)loaded_vmcs->msr_bitmap);
2818 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
2819 }
2820
2821 int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2822 {
2823 loaded_vmcs->vmcs = alloc_vmcs(false);
2824 if (!loaded_vmcs->vmcs)
2825 return -ENOMEM;
2826
2827 vmcs_clear(loaded_vmcs->vmcs);
2828
2829 loaded_vmcs->shadow_vmcs = NULL;
2830 loaded_vmcs->hv_timer_soft_disabled = false;
2831 loaded_vmcs->cpu = -1;
2832 loaded_vmcs->launched = 0;
2833
2834 if (cpu_has_vmx_msr_bitmap()) {
2835 loaded_vmcs->msr_bitmap = (unsigned long *)
2836 __get_free_page(GFP_KERNEL_ACCOUNT);
2837 if (!loaded_vmcs->msr_bitmap)
2838 goto out_vmcs;
2839 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
2840 }
2841
2842 memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
2843 memset(&loaded_vmcs->controls_shadow, 0,
2844 sizeof(struct vmcs_controls_shadow));
2845
2846 return 0;
2847
2848 out_vmcs:
2849 free_loaded_vmcs(loaded_vmcs);
2850 return -ENOMEM;
2851 }
2852
2853 static void free_kvm_area(void)
2854 {
2855 int cpu;
2856
2857 for_each_possible_cpu(cpu) {
2858 free_vmcs(per_cpu(vmxarea, cpu));
2859 per_cpu(vmxarea, cpu) = NULL;
2860 }
2861 }
2862
2863 static __init int alloc_kvm_area(void)
2864 {
2865 int cpu;
2866
2867 for_each_possible_cpu(cpu) {
2868 struct vmcs *vmcs;
2869
2870 vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
2871 if (!vmcs) {
2872 free_kvm_area();
2873 return -ENOMEM;
2874 }
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886 if (static_branch_unlikely(&enable_evmcs))
2887 vmcs->hdr.revision_id = vmcs_config.revision_id;
2888
2889 per_cpu(vmxarea, cpu) = vmcs;
2890 }
2891 return 0;
2892 }
2893
2894 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2895 struct kvm_segment *save)
2896 {
2897 if (!emulate_invalid_guest_state) {
2898
2899
2900
2901
2902
2903
2904
2905 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2906 save->selector &= ~SEGMENT_RPL_MASK;
2907 save->dpl = save->selector & SEGMENT_RPL_MASK;
2908 save->s = 1;
2909 }
2910 __vmx_set_segment(vcpu, save, seg);
2911 }
2912
2913 static void enter_pmode(struct kvm_vcpu *vcpu)
2914 {
2915 unsigned long flags;
2916 struct vcpu_vmx *vmx = to_vmx(vcpu);
2917
2918
2919
2920
2921
2922 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2923 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2924 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2925 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2926 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2927 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2928
2929 vmx->rmode.vm86_active = 0;
2930
2931 __vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2932
2933 flags = vmcs_readl(GUEST_RFLAGS);
2934 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2935 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2936 vmcs_writel(GUEST_RFLAGS, flags);
2937
2938 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
2939 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
2940
2941 vmx_update_exception_bitmap(vcpu);
2942
2943 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2944 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2945 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2946 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2947 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2948 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2949 }
2950
2951 static void fix_rmode_seg(int seg, struct kvm_segment *save)
2952 {
2953 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2954 struct kvm_segment var = *save;
2955
2956 var.dpl = 0x3;
2957 if (seg == VCPU_SREG_CS)
2958 var.type = 0x3;
2959
2960 if (!emulate_invalid_guest_state) {
2961 var.selector = var.base >> 4;
2962 var.base = var.base & 0xffff0;
2963 var.limit = 0xffff;
2964 var.g = 0;
2965 var.db = 0;
2966 var.present = 1;
2967 var.s = 1;
2968 var.l = 0;
2969 var.unusable = 0;
2970 var.type = 0x3;
2971 var.avl = 0;
2972 if (save->base & 0xf)
2973 printk_once(KERN_WARNING "kvm: segment base is not "
2974 "paragraph aligned when entering "
2975 "protected mode (seg=%d)", seg);
2976 }
2977
2978 vmcs_write16(sf->selector, var.selector);
2979 vmcs_writel(sf->base, var.base);
2980 vmcs_write32(sf->limit, var.limit);
2981 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2982 }
2983
2984 static void enter_rmode(struct kvm_vcpu *vcpu)
2985 {
2986 unsigned long flags;
2987 struct vcpu_vmx *vmx = to_vmx(vcpu);
2988 struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
2989
2990 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2991 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2992 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2993 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2994 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2995 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2996 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2997
2998 vmx->rmode.vm86_active = 1;
2999
3000
3001
3002
3003
3004 if (!kvm_vmx->tss_addr)
3005 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
3006 "called before entering vcpu\n");
3007
3008 vmx_segment_cache_clear(vmx);
3009
3010 vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
3011 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
3012 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
3013
3014 flags = vmcs_readl(GUEST_RFLAGS);
3015 vmx->rmode.save_rflags = flags;
3016
3017 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
3018
3019 vmcs_writel(GUEST_RFLAGS, flags);
3020 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
3021 vmx_update_exception_bitmap(vcpu);
3022
3023 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
3024 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
3025 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
3026 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3027 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3028 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3029 }
3030
3031 int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3032 {
3033 struct vcpu_vmx *vmx = to_vmx(vcpu);
3034
3035
3036 if (!vmx_find_uret_msr(vmx, MSR_EFER))
3037 return 0;
3038
3039 vcpu->arch.efer = efer;
3040 if (efer & EFER_LMA)
3041 vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE);
3042 else
3043 vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE);
3044
3045 vmx_setup_uret_msrs(vmx);
3046 return 0;
3047 }
3048
3049 #ifdef CONFIG_X86_64
3050
3051 static void enter_lmode(struct kvm_vcpu *vcpu)
3052 {
3053 u32 guest_tr_ar;
3054
3055 vmx_segment_cache_clear(to_vmx(vcpu));
3056
3057 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
3058 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
3059 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
3060 __func__);
3061 vmcs_write32(GUEST_TR_AR_BYTES,
3062 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
3063 | VMX_AR_TYPE_BUSY_64_TSS);
3064 }
3065 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
3066 }
3067
3068 static void exit_lmode(struct kvm_vcpu *vcpu)
3069 {
3070 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3071 }
3072
3073 #endif
3074
3075 static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
3076 {
3077 struct vcpu_vmx *vmx = to_vmx(vcpu);
3078
3079
3080
3081
3082
3083
3084
3085
3086 if (enable_ept) {
3087 ept_sync_global();
3088 } else if (enable_vpid) {
3089 if (cpu_has_vmx_invvpid_global()) {
3090 vpid_sync_vcpu_global();
3091 } else {
3092 vpid_sync_vcpu_single(vmx->vpid);
3093 vpid_sync_vcpu_single(vmx->nested.vpid02);
3094 }
3095 }
3096 }
3097
3098 static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
3099 {
3100 if (is_guest_mode(vcpu))
3101 return nested_get_vpid02(vcpu);
3102 return to_vmx(vcpu)->vpid;
3103 }
3104
3105 static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
3106 {
3107 struct kvm_mmu *mmu = vcpu->arch.mmu;
3108 u64 root_hpa = mmu->root.hpa;
3109
3110
3111 if (!VALID_PAGE(root_hpa))
3112 return;
3113
3114 if (enable_ept)
3115 ept_sync_context(construct_eptp(vcpu, root_hpa,
3116 mmu->root_role.level));
3117 else
3118 vpid_sync_context(vmx_get_current_vpid(vcpu));
3119 }
3120
3121 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
3122 {
3123
3124
3125
3126
3127 vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
3128 }
3129
3130 static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
3131 {
3132
3133
3134
3135
3136
3137
3138
3139 vpid_sync_context(vmx_get_current_vpid(vcpu));
3140 }
3141
3142 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
3143 {
3144 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3145
3146 if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
3147 return;
3148
3149 if (is_pae_paging(vcpu)) {
3150 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
3151 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
3152 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
3153 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
3154 }
3155 }
3156
3157 void ept_save_pdptrs(struct kvm_vcpu *vcpu)
3158 {
3159 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3160
3161 if (WARN_ON_ONCE(!is_pae_paging(vcpu)))
3162 return;
3163
3164 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
3165 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
3166 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
3167 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
3168
3169 kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
3170 }
3171
3172 #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
3173 CPU_BASED_CR3_STORE_EXITING)
3174
3175 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3176 {
3177 struct vcpu_vmx *vmx = to_vmx(vcpu);
3178 unsigned long hw_cr0, old_cr0_pg;
3179 u32 tmp;
3180
3181 old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
3182
3183 hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
3184 if (is_unrestricted_guest(vcpu))
3185 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3186 else {
3187 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3188 if (!enable_ept)
3189 hw_cr0 |= X86_CR0_WP;
3190
3191 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3192 enter_pmode(vcpu);
3193
3194 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3195 enter_rmode(vcpu);
3196 }
3197
3198 vmcs_writel(CR0_READ_SHADOW, cr0);
3199 vmcs_writel(GUEST_CR0, hw_cr0);
3200 vcpu->arch.cr0 = cr0;
3201 kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
3202
3203 #ifdef CONFIG_X86_64
3204 if (vcpu->arch.efer & EFER_LME) {
3205 if (!old_cr0_pg && (cr0 & X86_CR0_PG))
3206 enter_lmode(vcpu);
3207 else if (old_cr0_pg && !(cr0 & X86_CR0_PG))
3208 exit_lmode(vcpu);
3209 }
3210 #endif
3211
3212 if (enable_ept && !is_unrestricted_guest(vcpu)) {
3213
3214
3215
3216
3217
3218
3219 if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
3220 vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237 if (!(cr0 & X86_CR0_PG)) {
3238 exec_controls_setbit(vmx, CR3_EXITING_BITS);
3239 } else if (!is_guest_mode(vcpu)) {
3240 exec_controls_clearbit(vmx, CR3_EXITING_BITS);
3241 } else {
3242 tmp = exec_controls_get(vmx);
3243 tmp &= ~CR3_EXITING_BITS;
3244 tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS;
3245 exec_controls_set(vmx, tmp);
3246 }
3247
3248
3249 if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
3250 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
3251
3252
3253
3254
3255
3256 if (!(old_cr0_pg & X86_CR0_PG) && (cr0 & X86_CR0_PG))
3257 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
3258 }
3259
3260
3261 vmx->emulation_required = vmx_emulation_required(vcpu);
3262 }
3263
3264 static int vmx_get_max_tdp_level(void)
3265 {
3266 if (cpu_has_vmx_ept_5levels())
3267 return 5;
3268 return 4;
3269 }
3270
3271 u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
3272 {
3273 u64 eptp = VMX_EPTP_MT_WB;
3274
3275 eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
3276
3277 if (enable_ept_ad_bits &&
3278 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
3279 eptp |= VMX_EPTP_AD_ENABLE_BIT;
3280 eptp |= root_hpa;
3281
3282 return eptp;
3283 }
3284
3285 static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
3286 int root_level)
3287 {
3288 struct kvm *kvm = vcpu->kvm;
3289 bool update_guest_cr3 = true;
3290 unsigned long guest_cr3;
3291 u64 eptp;
3292
3293 if (enable_ept) {
3294 eptp = construct_eptp(vcpu, root_hpa, root_level);
3295 vmcs_write64(EPT_POINTER, eptp);
3296
3297 hv_track_root_tdp(vcpu, root_hpa);
3298
3299 if (!enable_unrestricted_guest && !is_paging(vcpu))
3300 guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
3301 else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3))
3302 guest_cr3 = vcpu->arch.cr3;
3303 else
3304 update_guest_cr3 = false;
3305 vmx_ept_load_pdptrs(vcpu);
3306 } else {
3307 guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
3308 }
3309
3310 if (update_guest_cr3)
3311 vmcs_writel(GUEST_CR3, guest_cr3);
3312 }
3313
3314
3315 static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3316 {
3317
3318
3319
3320
3321
3322 if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
3323 return false;
3324
3325 if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
3326 return false;
3327
3328 return true;
3329 }
3330
3331 void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3332 {
3333 unsigned long old_cr4 = vcpu->arch.cr4;
3334 struct vcpu_vmx *vmx = to_vmx(vcpu);
3335
3336
3337
3338
3339
3340 unsigned long hw_cr4;
3341
3342 hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3343 if (is_unrestricted_guest(vcpu))
3344 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
3345 else if (vmx->rmode.vm86_active)
3346 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
3347 else
3348 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
3349
3350 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
3351 if (cr4 & X86_CR4_UMIP) {
3352 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
3353 hw_cr4 &= ~X86_CR4_UMIP;
3354 } else if (!is_guest_mode(vcpu) ||
3355 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
3356 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
3357 }
3358 }
3359
3360 vcpu->arch.cr4 = cr4;
3361 kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
3362
3363 if (!is_unrestricted_guest(vcpu)) {
3364 if (enable_ept) {
3365 if (!is_paging(vcpu)) {
3366 hw_cr4 &= ~X86_CR4_PAE;
3367 hw_cr4 |= X86_CR4_PSE;
3368 } else if (!(cr4 & X86_CR4_PAE)) {
3369 hw_cr4 &= ~X86_CR4_PAE;
3370 }
3371 }
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384 if (!is_paging(vcpu))
3385 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
3386 }
3387
3388 vmcs_writel(CR4_READ_SHADOW, cr4);
3389 vmcs_writel(GUEST_CR4, hw_cr4);
3390
3391 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
3392 kvm_update_cpuid_runtime(vcpu);
3393 }
3394
3395 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3396 {
3397 struct vcpu_vmx *vmx = to_vmx(vcpu);
3398 u32 ar;
3399
3400 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3401 *var = vmx->rmode.segs[seg];
3402 if (seg == VCPU_SREG_TR
3403 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3404 return;
3405 var->base = vmx_read_guest_seg_base(vmx, seg);
3406 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3407 return;
3408 }
3409 var->base = vmx_read_guest_seg_base(vmx, seg);
3410 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3411 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3412 ar = vmx_read_guest_seg_ar(vmx, seg);
3413 var->unusable = (ar >> 16) & 1;
3414 var->type = ar & 15;
3415 var->s = (ar >> 4) & 1;
3416 var->dpl = (ar >> 5) & 3;
3417
3418
3419
3420
3421
3422
3423
3424 var->present = !var->unusable;
3425 var->avl = (ar >> 12) & 1;
3426 var->l = (ar >> 13) & 1;
3427 var->db = (ar >> 14) & 1;
3428 var->g = (ar >> 15) & 1;
3429 }
3430
3431 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3432 {
3433 struct kvm_segment s;
3434
3435 if (to_vmx(vcpu)->rmode.vm86_active) {
3436 vmx_get_segment(vcpu, &s, seg);
3437 return s.base;
3438 }
3439 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3440 }
3441
3442 int vmx_get_cpl(struct kvm_vcpu *vcpu)
3443 {
3444 struct vcpu_vmx *vmx = to_vmx(vcpu);
3445
3446 if (unlikely(vmx->rmode.vm86_active))
3447 return 0;
3448 else {
3449 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3450 return VMX_AR_DPL(ar);
3451 }
3452 }
3453
3454 static u32 vmx_segment_access_rights(struct kvm_segment *var)
3455 {
3456 u32 ar;
3457
3458 if (var->unusable || !var->present)
3459 ar = 1 << 16;
3460 else {
3461 ar = var->type & 15;
3462 ar |= (var->s & 1) << 4;
3463 ar |= (var->dpl & 3) << 5;
3464 ar |= (var->present & 1) << 7;
3465 ar |= (var->avl & 1) << 12;
3466 ar |= (var->l & 1) << 13;
3467 ar |= (var->db & 1) << 14;
3468 ar |= (var->g & 1) << 15;
3469 }
3470
3471 return ar;
3472 }
3473
3474 void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3475 {
3476 struct vcpu_vmx *vmx = to_vmx(vcpu);
3477 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3478
3479 vmx_segment_cache_clear(vmx);
3480
3481 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3482 vmx->rmode.segs[seg] = *var;
3483 if (seg == VCPU_SREG_TR)
3484 vmcs_write16(sf->selector, var->selector);
3485 else if (var->s)
3486 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3487 return;
3488 }
3489
3490 vmcs_writel(sf->base, var->base);
3491 vmcs_write32(sf->limit, var->limit);
3492 vmcs_write16(sf->selector, var->selector);
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505 if (is_unrestricted_guest(vcpu) && (seg != VCPU_SREG_LDTR))
3506 var->type |= 0x1;
3507
3508 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3509 }
3510
3511 static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3512 {
3513 __vmx_set_segment(vcpu, var, seg);
3514
3515 to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
3516 }
3517
3518 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3519 {
3520 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
3521
3522 *db = (ar >> 14) & 1;
3523 *l = (ar >> 13) & 1;
3524 }
3525
3526 static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3527 {
3528 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
3529 dt->address = vmcs_readl(GUEST_IDTR_BASE);
3530 }
3531
3532 static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3533 {
3534 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
3535 vmcs_writel(GUEST_IDTR_BASE, dt->address);
3536 }
3537
3538 static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3539 {
3540 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
3541 dt->address = vmcs_readl(GUEST_GDTR_BASE);
3542 }
3543
3544 static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3545 {
3546 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
3547 vmcs_writel(GUEST_GDTR_BASE, dt->address);
3548 }
3549
3550 static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3551 {
3552 struct kvm_segment var;
3553 u32 ar;
3554
3555 vmx_get_segment(vcpu, &var, seg);
3556 var.dpl = 0x3;
3557 if (seg == VCPU_SREG_CS)
3558 var.type = 0x3;
3559 ar = vmx_segment_access_rights(&var);
3560
3561 if (var.base != (var.selector << 4))
3562 return false;
3563 if (var.limit != 0xffff)
3564 return false;
3565 if (ar != 0xf3)
3566 return false;
3567
3568 return true;
3569 }
3570
3571 static bool code_segment_valid(struct kvm_vcpu *vcpu)
3572 {
3573 struct kvm_segment cs;
3574 unsigned int cs_rpl;
3575
3576 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3577 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
3578
3579 if (cs.unusable)
3580 return false;
3581 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
3582 return false;
3583 if (!cs.s)
3584 return false;
3585 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
3586 if (cs.dpl > cs_rpl)
3587 return false;
3588 } else {
3589 if (cs.dpl != cs_rpl)
3590 return false;
3591 }
3592 if (!cs.present)
3593 return false;
3594
3595
3596 return true;
3597 }
3598
3599 static bool stack_segment_valid(struct kvm_vcpu *vcpu)
3600 {
3601 struct kvm_segment ss;
3602 unsigned int ss_rpl;
3603
3604 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3605 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
3606
3607 if (ss.unusable)
3608 return true;
3609 if (ss.type != 3 && ss.type != 7)
3610 return false;
3611 if (!ss.s)
3612 return false;
3613 if (ss.dpl != ss_rpl)
3614 return false;
3615 if (!ss.present)
3616 return false;
3617
3618 return true;
3619 }
3620
3621 static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
3622 {
3623 struct kvm_segment var;
3624 unsigned int rpl;
3625
3626 vmx_get_segment(vcpu, &var, seg);
3627 rpl = var.selector & SEGMENT_RPL_MASK;
3628
3629 if (var.unusable)
3630 return true;
3631 if (!var.s)
3632 return false;
3633 if (!var.present)
3634 return false;
3635 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
3636 if (var.dpl < rpl)
3637 return false;
3638 }
3639
3640
3641
3642
3643 return true;
3644 }
3645
3646 static bool tr_valid(struct kvm_vcpu *vcpu)
3647 {
3648 struct kvm_segment tr;
3649
3650 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
3651
3652 if (tr.unusable)
3653 return false;
3654 if (tr.selector & SEGMENT_TI_MASK)
3655 return false;
3656 if (tr.type != 3 && tr.type != 11)
3657 return false;
3658 if (!tr.present)
3659 return false;
3660
3661 return true;
3662 }
3663
3664 static bool ldtr_valid(struct kvm_vcpu *vcpu)
3665 {
3666 struct kvm_segment ldtr;
3667
3668 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
3669
3670 if (ldtr.unusable)
3671 return true;
3672 if (ldtr.selector & SEGMENT_TI_MASK)
3673 return false;
3674 if (ldtr.type != 2)
3675 return false;
3676 if (!ldtr.present)
3677 return false;
3678
3679 return true;
3680 }
3681
3682 static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3683 {
3684 struct kvm_segment cs, ss;
3685
3686 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3687 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3688
3689 return ((cs.selector & SEGMENT_RPL_MASK) ==
3690 (ss.selector & SEGMENT_RPL_MASK));
3691 }
3692
3693
3694
3695
3696
3697
3698 bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
3699 {
3700
3701 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
3702 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
3703 return false;
3704 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
3705 return false;
3706 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
3707 return false;
3708 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
3709 return false;
3710 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
3711 return false;
3712 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
3713 return false;
3714 } else {
3715
3716 if (!cs_ss_rpl_check(vcpu))
3717 return false;
3718 if (!code_segment_valid(vcpu))
3719 return false;
3720 if (!stack_segment_valid(vcpu))
3721 return false;
3722 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
3723 return false;
3724 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
3725 return false;
3726 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
3727 return false;
3728 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
3729 return false;
3730 if (!tr_valid(vcpu))
3731 return false;
3732 if (!ldtr_valid(vcpu))
3733 return false;
3734 }
3735
3736
3737
3738
3739
3740 return true;
3741 }
3742
3743 static int init_rmode_tss(struct kvm *kvm, void __user *ua)
3744 {
3745 const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
3746 u16 data;
3747 int i;
3748
3749 for (i = 0; i < 3; i++) {
3750 if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE))
3751 return -EFAULT;
3752 }
3753
3754 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
3755 if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16)))
3756 return -EFAULT;
3757
3758 data = ~0;
3759 if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8)))
3760 return -EFAULT;
3761
3762 return 0;
3763 }
3764
3765 static int init_rmode_identity_map(struct kvm *kvm)
3766 {
3767 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
3768 int i, r = 0;
3769 void __user *uaddr;
3770 u32 tmp;
3771
3772
3773 mutex_lock(&kvm->slots_lock);
3774
3775 if (likely(kvm_vmx->ept_identity_pagetable_done))
3776 goto out;
3777
3778 if (!kvm_vmx->ept_identity_map_addr)
3779 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
3780
3781 uaddr = __x86_set_memory_region(kvm,
3782 IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
3783 kvm_vmx->ept_identity_map_addr,
3784 PAGE_SIZE);
3785 if (IS_ERR(uaddr)) {
3786 r = PTR_ERR(uaddr);
3787 goto out;
3788 }
3789
3790
3791 for (i = 0; i < (PAGE_SIZE / sizeof(tmp)); i++) {
3792 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
3793 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
3794 if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) {
3795 r = -EFAULT;
3796 goto out;
3797 }
3798 }
3799 kvm_vmx->ept_identity_pagetable_done = true;
3800
3801 out:
3802 mutex_unlock(&kvm->slots_lock);
3803 return r;
3804 }
3805
3806 static void seg_setup(int seg)
3807 {
3808 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3809 unsigned int ar;
3810
3811 vmcs_write16(sf->selector, 0);
3812 vmcs_writel(sf->base, 0);
3813 vmcs_write32(sf->limit, 0xffff);
3814 ar = 0x93;
3815 if (seg == VCPU_SREG_CS)
3816 ar |= 0x08;
3817
3818 vmcs_write32(sf->ar_bytes, ar);
3819 }
3820
3821 static int alloc_apic_access_page(struct kvm *kvm)
3822 {
3823 struct page *page;
3824 void __user *hva;
3825 int ret = 0;
3826
3827 mutex_lock(&kvm->slots_lock);
3828 if (kvm->arch.apic_access_memslot_enabled)
3829 goto out;
3830 hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
3831 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
3832 if (IS_ERR(hva)) {
3833 ret = PTR_ERR(hva);
3834 goto out;
3835 }
3836
3837 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
3838 if (is_error_page(page)) {
3839 ret = -EFAULT;
3840 goto out;
3841 }
3842
3843
3844
3845
3846
3847 put_page(page);
3848 kvm->arch.apic_access_memslot_enabled = true;
3849 out:
3850 mutex_unlock(&kvm->slots_lock);
3851 return ret;
3852 }
3853
3854 int allocate_vpid(void)
3855 {
3856 int vpid;
3857
3858 if (!enable_vpid)
3859 return 0;
3860 spin_lock(&vmx_vpid_lock);
3861 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
3862 if (vpid < VMX_NR_VPIDS)
3863 __set_bit(vpid, vmx_vpid_bitmap);
3864 else
3865 vpid = 0;
3866 spin_unlock(&vmx_vpid_lock);
3867 return vpid;
3868 }
3869
3870 void free_vpid(int vpid)
3871 {
3872 if (!enable_vpid || vpid == 0)
3873 return;
3874 spin_lock(&vmx_vpid_lock);
3875 __clear_bit(vpid, vmx_vpid_bitmap);
3876 spin_unlock(&vmx_vpid_lock);
3877 }
3878
3879 static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
3880 {
3881
3882
3883
3884
3885
3886 if (static_branch_unlikely(&enable_evmcs))
3887 evmcs_touch_msr_bitmap();
3888
3889 vmx->nested.force_msr_bitmap_recalc = true;
3890 }
3891
3892 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
3893 {
3894 struct vcpu_vmx *vmx = to_vmx(vcpu);
3895 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3896
3897 if (!cpu_has_vmx_msr_bitmap())
3898 return;
3899
3900 vmx_msr_bitmap_l01_changed(vmx);
3901
3902
3903
3904
3905
3906 if (is_valid_passthrough_msr(msr)) {
3907 int idx = possible_passthrough_msr_slot(msr);
3908
3909 if (idx != -ENOENT) {
3910 if (type & MSR_TYPE_R)
3911 clear_bit(idx, vmx->shadow_msr_intercept.read);
3912 if (type & MSR_TYPE_W)
3913 clear_bit(idx, vmx->shadow_msr_intercept.write);
3914 }
3915 }
3916
3917 if ((type & MSR_TYPE_R) &&
3918 !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
3919 vmx_set_msr_bitmap_read(msr_bitmap, msr);
3920 type &= ~MSR_TYPE_R;
3921 }
3922
3923 if ((type & MSR_TYPE_W) &&
3924 !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
3925 vmx_set_msr_bitmap_write(msr_bitmap, msr);
3926 type &= ~MSR_TYPE_W;
3927 }
3928
3929 if (type & MSR_TYPE_R)
3930 vmx_clear_msr_bitmap_read(msr_bitmap, msr);
3931
3932 if (type & MSR_TYPE_W)
3933 vmx_clear_msr_bitmap_write(msr_bitmap, msr);
3934 }
3935
3936 void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
3937 {
3938 struct vcpu_vmx *vmx = to_vmx(vcpu);
3939 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3940
3941 if (!cpu_has_vmx_msr_bitmap())
3942 return;
3943
3944 vmx_msr_bitmap_l01_changed(vmx);
3945
3946
3947
3948
3949
3950 if (is_valid_passthrough_msr(msr)) {
3951 int idx = possible_passthrough_msr_slot(msr);
3952
3953 if (idx != -ENOENT) {
3954 if (type & MSR_TYPE_R)
3955 set_bit(idx, vmx->shadow_msr_intercept.read);
3956 if (type & MSR_TYPE_W)
3957 set_bit(idx, vmx->shadow_msr_intercept.write);
3958 }
3959 }
3960
3961 if (type & MSR_TYPE_R)
3962 vmx_set_msr_bitmap_read(msr_bitmap, msr);
3963
3964 if (type & MSR_TYPE_W)
3965 vmx_set_msr_bitmap_write(msr_bitmap, msr);
3966 }
3967
3968 static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
3969 {
3970 unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
3971 unsigned long read_intercept;
3972 int msr;
3973
3974 read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
3975
3976 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
3977 unsigned int read_idx = msr / BITS_PER_LONG;
3978 unsigned int write_idx = read_idx + (0x800 / sizeof(long));
3979
3980 msr_bitmap[read_idx] = read_intercept;
3981 msr_bitmap[write_idx] = ~0ul;
3982 }
3983 }
3984
3985 static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
3986 {
3987 struct vcpu_vmx *vmx = to_vmx(vcpu);
3988 u8 mode;
3989
3990 if (!cpu_has_vmx_msr_bitmap())
3991 return;
3992
3993 if (cpu_has_secondary_exec_ctrls() &&
3994 (secondary_exec_controls_get(vmx) &
3995 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
3996 mode = MSR_BITMAP_MODE_X2APIC;
3997 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
3998 mode |= MSR_BITMAP_MODE_X2APIC_APICV;
3999 } else {
4000 mode = 0;
4001 }
4002
4003 if (mode == vmx->x2apic_msr_bitmap_mode)
4004 return;
4005
4006 vmx->x2apic_msr_bitmap_mode = mode;
4007
4008 vmx_reset_x2apic_msrs(vcpu, mode);
4009
4010
4011
4012
4013
4014 vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW,
4015 !(mode & MSR_BITMAP_MODE_X2APIC));
4016
4017 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
4018 vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
4019 vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
4020 vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
4021 if (enable_ipiv)
4022 vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW);
4023 }
4024 }
4025
4026 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
4027 {
4028 struct vcpu_vmx *vmx = to_vmx(vcpu);
4029 bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
4030 u32 i;
4031
4032 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_STATUS, MSR_TYPE_RW, flag);
4033 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_BASE, MSR_TYPE_RW, flag);
4034 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_MASK, MSR_TYPE_RW, flag);
4035 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_CR3_MATCH, MSR_TYPE_RW, flag);
4036 for (i = 0; i < vmx->pt_desc.num_address_ranges; i++) {
4037 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
4038 vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
4039 }
4040 }
4041
4042 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
4043 {
4044 struct vcpu_vmx *vmx = to_vmx(vcpu);
4045 void *vapic_page;
4046 u32 vppr;
4047 int rvi;
4048
4049 if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
4050 !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
4051 WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
4052 return false;
4053
4054 rvi = vmx_get_rvi();
4055
4056 vapic_page = vmx->nested.virtual_apic_map.hva;
4057 vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
4058
4059 return ((rvi & 0xf0) > (vppr & 0xf0));
4060 }
4061
4062 static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
4063 {
4064 struct vcpu_vmx *vmx = to_vmx(vcpu);
4065 u32 i;
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075 for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
4076 u32 msr = vmx_possible_passthrough_msrs[i];
4077
4078 if (!test_bit(i, vmx->shadow_msr_intercept.read))
4079 vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
4080
4081 if (!test_bit(i, vmx->shadow_msr_intercept.write))
4082 vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
4083 }
4084
4085
4086 if (vmx_pt_mode_is_host_guest())
4087 pt_update_intercept_for_msr(vcpu);
4088 }
4089
4090 static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
4091 int pi_vec)
4092 {
4093 #ifdef CONFIG_SMP
4094 if (vcpu->mode == IN_GUEST_MODE) {
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121 if (vcpu != kvm_get_running_vcpu())
4122 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
4123 return;
4124 }
4125 #endif
4126
4127
4128
4129
4130
4131 kvm_vcpu_wake_up(vcpu);
4132 }
4133
4134 static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
4135 int vector)
4136 {
4137 struct vcpu_vmx *vmx = to_vmx(vcpu);
4138
4139 if (is_guest_mode(vcpu) &&
4140 vector == vmx->nested.posted_intr_nv) {
4141
4142
4143
4144
4145 vmx->nested.pi_pending = true;
4146 kvm_make_request(KVM_REQ_EVENT, vcpu);
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158 smp_mb__after_atomic();
4159
4160
4161 kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR);
4162 return 0;
4163 }
4164 return -1;
4165 }
4166
4167
4168
4169
4170
4171
4172
4173 static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
4174 {
4175 struct vcpu_vmx *vmx = to_vmx(vcpu);
4176 int r;
4177
4178 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
4179 if (!r)
4180 return 0;
4181
4182
4183 if (!vcpu->arch.apic->apicv_active)
4184 return -1;
4185
4186 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
4187 return 0;
4188
4189
4190 if (pi_test_and_set_on(&vmx->pi_desc))
4191 return 0;
4192
4193
4194
4195
4196
4197
4198
4199 kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR);
4200 return 0;
4201 }
4202
4203 static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
4204 int trig_mode, int vector)
4205 {
4206 struct kvm_vcpu *vcpu = apic->vcpu;
4207
4208 if (vmx_deliver_posted_interrupt(vcpu, vector)) {
4209 kvm_lapic_set_irr(vector, apic);
4210 kvm_make_request(KVM_REQ_EVENT, vcpu);
4211 kvm_vcpu_kick(vcpu);
4212 } else {
4213 trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
4214 trig_mode, vector);
4215 }
4216 }
4217
4218
4219
4220
4221
4222
4223
4224 void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
4225 {
4226 u32 low32, high32;
4227 unsigned long tmpl;
4228 unsigned long cr0, cr3, cr4;
4229
4230 cr0 = read_cr0();
4231 WARN_ON(cr0 & X86_CR0_TS);
4232 vmcs_writel(HOST_CR0, cr0);
4233
4234
4235
4236
4237
4238 cr3 = __read_cr3();
4239 vmcs_writel(HOST_CR3, cr3);
4240 vmx->loaded_vmcs->host_state.cr3 = cr3;
4241
4242
4243 cr4 = cr4_read_shadow();
4244 vmcs_writel(HOST_CR4, cr4);
4245 vmx->loaded_vmcs->host_state.cr4 = cr4;
4246
4247 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);
4248 #ifdef CONFIG_X86_64
4249
4250
4251
4252
4253
4254 vmcs_write16(HOST_DS_SELECTOR, 0);
4255 vmcs_write16(HOST_ES_SELECTOR, 0);
4256 #else
4257 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);
4258 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);
4259 #endif
4260 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);
4261 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);
4262
4263 vmcs_writel(HOST_IDTR_BASE, host_idt_base);
4264
4265 vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit);
4266
4267 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
4268 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
4269
4270
4271
4272
4273
4274
4275
4276 if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
4277 vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
4278
4279 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
4280 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);
4281
4282 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
4283 rdmsr(MSR_IA32_CR_PAT, low32, high32);
4284 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
4285 }
4286
4287 if (cpu_has_load_ia32_efer())
4288 vmcs_write64(HOST_IA32_EFER, host_efer);
4289 }
4290
4291 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
4292 {
4293 struct kvm_vcpu *vcpu = &vmx->vcpu;
4294
4295 vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS &
4296 ~vcpu->arch.cr4_guest_rsvd_bits;
4297 if (!enable_ept) {
4298 vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_TLBFLUSH_BITS;
4299 vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PDPTR_BITS;
4300 }
4301 if (is_guest_mode(&vmx->vcpu))
4302 vcpu->arch.cr4_guest_owned_bits &=
4303 ~get_vmcs12(vcpu)->cr4_guest_host_mask;
4304 vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
4305 }
4306
4307 static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4308 {
4309 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
4310
4311 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
4312 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
4313
4314 if (!enable_vnmi)
4315 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
4316
4317 if (!enable_preemption_timer)
4318 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
4319
4320 return pin_based_exec_ctrl;
4321 }
4322
4323 static u32 vmx_vmentry_ctrl(void)
4324 {
4325 u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
4326
4327 if (vmx_pt_mode_is_system())
4328 vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
4329 VM_ENTRY_LOAD_IA32_RTIT_CTL);
4330
4331 return vmentry_ctrl &
4332 ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
4333 }
4334
4335 static u32 vmx_vmexit_ctrl(void)
4336 {
4337 u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
4338
4339 if (vmx_pt_mode_is_system())
4340 vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
4341 VM_EXIT_CLEAR_IA32_RTIT_CTL);
4342
4343 return vmexit_ctrl &
4344 ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
4345 }
4346
4347 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
4348 {
4349 struct vcpu_vmx *vmx = to_vmx(vcpu);
4350
4351 if (is_guest_mode(vcpu)) {
4352 vmx->nested.update_vmcs01_apicv_status = true;
4353 return;
4354 }
4355
4356 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4357
4358 if (kvm_vcpu_apicv_active(vcpu)) {
4359 secondary_exec_controls_setbit(vmx,
4360 SECONDARY_EXEC_APIC_REGISTER_VIRT |
4361 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4362 if (enable_ipiv)
4363 tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
4364 } else {
4365 secondary_exec_controls_clearbit(vmx,
4366 SECONDARY_EXEC_APIC_REGISTER_VIRT |
4367 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4368 if (enable_ipiv)
4369 tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
4370 }
4371
4372 vmx_update_msr_bitmap_x2apic(vcpu);
4373 }
4374
4375 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
4376 {
4377 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
4378
4379 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
4380 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
4381
4382 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
4383 exec_control &= ~CPU_BASED_TPR_SHADOW;
4384 #ifdef CONFIG_X86_64
4385 exec_control |= CPU_BASED_CR8_STORE_EXITING |
4386 CPU_BASED_CR8_LOAD_EXITING;
4387 #endif
4388 }
4389 if (!enable_ept)
4390 exec_control |= CPU_BASED_CR3_STORE_EXITING |
4391 CPU_BASED_CR3_LOAD_EXITING |
4392 CPU_BASED_INVLPG_EXITING;
4393 if (kvm_mwait_in_guest(vmx->vcpu.kvm))
4394 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
4395 CPU_BASED_MONITOR_EXITING);
4396 if (kvm_hlt_in_guest(vmx->vcpu.kvm))
4397 exec_control &= ~CPU_BASED_HLT_EXITING;
4398 return exec_control;
4399 }
4400
4401 static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
4402 {
4403 u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl;
4404
4405
4406
4407
4408
4409 if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu))
4410 exec_control &= ~TERTIARY_EXEC_IPI_VIRT;
4411
4412 return exec_control;
4413 }
4414
4415
4416
4417
4418
4419
4420 static inline void
4421 vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
4422 u32 control, bool enabled, bool exiting)
4423 {
4424
4425
4426
4427
4428
4429
4430
4431
4432 if (enabled == exiting)
4433 *exec_control &= ~control;
4434
4435
4436
4437
4438
4439 if (nested) {
4440 if (enabled)
4441 vmx->nested.msrs.secondary_ctls_high |= control;
4442 else
4443 vmx->nested.msrs.secondary_ctls_high &= ~control;
4444 }
4445 }
4446
4447
4448
4449
4450
4451
4452 #define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
4453 ({ \
4454 bool __enabled; \
4455 \
4456 if (cpu_has_vmx_##name()) { \
4457 __enabled = guest_cpuid_has(&(vmx)->vcpu, \
4458 X86_FEATURE_##feat_name); \
4459 vmx_adjust_secondary_exec_control(vmx, exec_control, \
4460 SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
4461 } \
4462 })
4463
4464
4465 #define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \
4466 vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false)
4467
4468 #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
4469 vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
4470
4471 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
4472 {
4473 struct kvm_vcpu *vcpu = &vmx->vcpu;
4474
4475 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
4476
4477 if (vmx_pt_mode_is_system())
4478 exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
4479 if (!cpu_need_virtualize_apic_accesses(vcpu))
4480 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
4481 if (vmx->vpid == 0)
4482 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
4483 if (!enable_ept) {
4484 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
4485 enable_unrestricted_guest = 0;
4486 }
4487 if (!enable_unrestricted_guest)
4488 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
4489 if (kvm_pause_in_guest(vmx->vcpu.kvm))
4490 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
4491 if (!kvm_vcpu_apicv_active(vcpu))
4492 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
4493 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4494 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
4495
4496
4497
4498 exec_control &= ~SECONDARY_EXEC_DESC;
4499
4500
4501
4502
4503
4504
4505 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4506
4507
4508
4509
4510
4511
4512 if (!vcpu->kvm->arch.cpu_dirty_logging_count)
4513 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4514
4515 if (cpu_has_vmx_xsaves()) {
4516
4517 bool xsaves_enabled =
4518 boot_cpu_has(X86_FEATURE_XSAVE) &&
4519 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4520 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
4521
4522 vcpu->arch.xsaves_enabled = xsaves_enabled;
4523
4524 vmx_adjust_secondary_exec_control(vmx, &exec_control,
4525 SECONDARY_EXEC_XSAVES,
4526 xsaves_enabled, false);
4527 }
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537 if (cpu_has_vmx_rdtscp()) {
4538 bool rdpid_or_rdtscp_enabled =
4539 guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
4540 guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
4541
4542 vmx_adjust_secondary_exec_control(vmx, &exec_control,
4543 SECONDARY_EXEC_ENABLE_RDTSCP,
4544 rdpid_or_rdtscp_enabled, false);
4545 }
4546 vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
4547
4548 vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
4549 vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
4550
4551 vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG,
4552 ENABLE_USR_WAIT_PAUSE, false);
4553
4554 if (!vcpu->kvm->arch.bus_lock_detection_enabled)
4555 exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
4556
4557 if (!kvm_notify_vmexit_enabled(vcpu->kvm))
4558 exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING;
4559
4560 return exec_control;
4561 }
4562
4563 static inline int vmx_get_pid_table_order(struct kvm *kvm)
4564 {
4565 return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table));
4566 }
4567
4568 static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
4569 {
4570 struct page *pages;
4571 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
4572
4573 if (!irqchip_in_kernel(kvm) || !enable_ipiv)
4574 return 0;
4575
4576 if (kvm_vmx->pid_table)
4577 return 0;
4578
4579 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
4580 if (!pages)
4581 return -ENOMEM;
4582
4583 kvm_vmx->pid_table = (void *)page_address(pages);
4584 return 0;
4585 }
4586
4587 static int vmx_vcpu_precreate(struct kvm *kvm)
4588 {
4589 return vmx_alloc_ipiv_pid_table(kvm);
4590 }
4591
4592 #define VMX_XSS_EXIT_BITMAP 0
4593
4594 static void init_vmcs(struct vcpu_vmx *vmx)
4595 {
4596 struct kvm *kvm = vmx->vcpu.kvm;
4597 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
4598
4599 if (nested)
4600 nested_vmx_set_vmcs_shadowing_bitmap();
4601
4602 if (cpu_has_vmx_msr_bitmap())
4603 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
4604
4605 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
4606
4607
4608 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4609
4610 exec_controls_set(vmx, vmx_exec_control(vmx));
4611
4612 if (cpu_has_secondary_exec_ctrls())
4613 secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
4614
4615 if (cpu_has_tertiary_exec_ctrls())
4616 tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));
4617
4618 if (enable_apicv && lapic_in_kernel(&vmx->vcpu)) {
4619 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4620 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4621 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4622 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4623
4624 vmcs_write16(GUEST_INTR_STATUS, 0);
4625
4626 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4627 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4628 }
4629
4630 if (vmx_can_use_ipiv(&vmx->vcpu)) {
4631 vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
4632 vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
4633 }
4634
4635 if (!kvm_pause_in_guest(kvm)) {
4636 vmcs_write32(PLE_GAP, ple_gap);
4637 vmx->ple_window = ple_window;
4638 vmx->ple_window_dirty = true;
4639 }
4640
4641 if (kvm_notify_vmexit_enabled(kvm))
4642 vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window);
4643
4644 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
4645 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
4646 vmcs_write32(CR3_TARGET_COUNT, 0);
4647
4648 vmcs_write16(HOST_FS_SELECTOR, 0);
4649 vmcs_write16(HOST_GS_SELECTOR, 0);
4650 vmx_set_constant_host_state(vmx);
4651 vmcs_writel(HOST_FS_BASE, 0);
4652 vmcs_writel(HOST_GS_BASE, 0);
4653
4654 if (cpu_has_vmx_vmfunc())
4655 vmcs_write64(VM_FUNCTION_CONTROL, 0);
4656
4657 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
4658 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
4659 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
4660 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
4661 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
4662
4663 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
4664 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
4665
4666 vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
4667
4668
4669 vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
4670
4671 vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4672 vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
4673
4674 set_cr4_guest_host_mask(vmx);
4675
4676 if (vmx->vpid != 0)
4677 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
4678
4679 if (cpu_has_vmx_xsaves())
4680 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
4681
4682 if (enable_pml) {
4683 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
4684 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
4685 }
4686
4687 vmx_write_encls_bitmap(&vmx->vcpu, NULL);
4688
4689 if (vmx_pt_mode_is_host_guest()) {
4690 memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
4691
4692 vmx->pt_desc.guest.output_mask = 0x7F;
4693 vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
4694 }
4695
4696 vmcs_write32(GUEST_SYSENTER_CS, 0);
4697 vmcs_writel(GUEST_SYSENTER_ESP, 0);
4698 vmcs_writel(GUEST_SYSENTER_EIP, 0);
4699 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4700
4701 if (cpu_has_vmx_tpr_shadow()) {
4702 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
4703 if (cpu_need_tpr_shadow(&vmx->vcpu))
4704 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
4705 __pa(vmx->vcpu.arch.apic->regs));
4706 vmcs_write32(TPR_THRESHOLD, 0);
4707 }
4708
4709 vmx_setup_uret_msrs(vmx);
4710 }
4711
4712 static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4713 {
4714 struct vcpu_vmx *vmx = to_vmx(vcpu);
4715
4716 init_vmcs(vmx);
4717
4718 if (nested)
4719 memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
4720
4721 vcpu_setup_sgx_lepubkeyhash(vcpu);
4722
4723 vmx->nested.posted_intr_nv = -1;
4724 vmx->nested.vmxon_ptr = INVALID_GPA;
4725 vmx->nested.current_vmptr = INVALID_GPA;
4726 vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
4727
4728 vcpu->arch.microcode_version = 0x100000000ULL;
4729 vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
4730
4731
4732
4733
4734
4735 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
4736 vmx->pi_desc.sn = 1;
4737 }
4738
4739 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
4740 {
4741 struct vcpu_vmx *vmx = to_vmx(vcpu);
4742
4743 if (!init_event)
4744 __vmx_vcpu_reset(vcpu);
4745
4746 vmx->rmode.vm86_active = 0;
4747 vmx->spec_ctrl = 0;
4748
4749 vmx->msr_ia32_umwait_control = 0;
4750
4751 vmx->hv_deadline_tsc = -1;
4752 kvm_set_cr8(vcpu, 0);
4753
4754 vmx_segment_cache_clear(vmx);
4755 kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
4756
4757 seg_setup(VCPU_SREG_CS);
4758 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
4759 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
4760
4761 seg_setup(VCPU_SREG_DS);
4762 seg_setup(VCPU_SREG_ES);
4763 seg_setup(VCPU_SREG_FS);
4764 seg_setup(VCPU_SREG_GS);
4765 seg_setup(VCPU_SREG_SS);
4766
4767 vmcs_write16(GUEST_TR_SELECTOR, 0);
4768 vmcs_writel(GUEST_TR_BASE, 0);
4769 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
4770 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4771
4772 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
4773 vmcs_writel(GUEST_LDTR_BASE, 0);
4774 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
4775 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
4776
4777 vmcs_writel(GUEST_GDTR_BASE, 0);
4778 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
4779
4780 vmcs_writel(GUEST_IDTR_BASE, 0);
4781 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
4782
4783 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
4784 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
4785 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
4786 if (kvm_mpx_supported())
4787 vmcs_write64(GUEST_BNDCFGS, 0);
4788
4789 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
4790
4791 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4792
4793 vpid_sync_context(vmx->vpid);
4794
4795 vmx_update_fb_clear_dis(vcpu, vmx);
4796 }
4797
4798 static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
4799 {
4800 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
4801 }
4802
4803 static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
4804 {
4805 if (!enable_vnmi ||
4806 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4807 vmx_enable_irq_window(vcpu);
4808 return;
4809 }
4810
4811 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
4812 }
4813
4814 static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
4815 {
4816 struct vcpu_vmx *vmx = to_vmx(vcpu);
4817 uint32_t intr;
4818 int irq = vcpu->arch.interrupt.nr;
4819
4820 trace_kvm_inj_virq(irq, vcpu->arch.interrupt.soft, reinjected);
4821
4822 ++vcpu->stat.irq_injections;
4823 if (vmx->rmode.vm86_active) {
4824 int inc_eip = 0;
4825 if (vcpu->arch.interrupt.soft)
4826 inc_eip = vcpu->arch.event_exit_inst_len;
4827 kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
4828 return;
4829 }
4830 intr = irq | INTR_INFO_VALID_MASK;
4831 if (vcpu->arch.interrupt.soft) {
4832 intr |= INTR_TYPE_SOFT_INTR;
4833 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
4834 vmx->vcpu.arch.event_exit_inst_len);
4835 } else
4836 intr |= INTR_TYPE_EXT_INTR;
4837 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4838
4839 vmx_clear_hlt(vcpu);
4840 }
4841
4842 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
4843 {
4844 struct vcpu_vmx *vmx = to_vmx(vcpu);
4845
4846 if (!enable_vnmi) {
4847
4848
4849
4850
4851
4852
4853
4854
4855 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
4856 vmx->loaded_vmcs->vnmi_blocked_time = 0;
4857 }
4858
4859 ++vcpu->stat.nmi_injections;
4860 vmx->loaded_vmcs->nmi_known_unmasked = false;
4861
4862 if (vmx->rmode.vm86_active) {
4863 kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
4864 return;
4865 }
4866
4867 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4868 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
4869
4870 vmx_clear_hlt(vcpu);
4871 }
4872
4873 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
4874 {
4875 struct vcpu_vmx *vmx = to_vmx(vcpu);
4876 bool masked;
4877
4878 if (!enable_vnmi)
4879 return vmx->loaded_vmcs->soft_vnmi_blocked;
4880 if (vmx->loaded_vmcs->nmi_known_unmasked)
4881 return false;
4882 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
4883 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4884 return masked;
4885 }
4886
4887 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4888 {
4889 struct vcpu_vmx *vmx = to_vmx(vcpu);
4890
4891 if (!enable_vnmi) {
4892 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
4893 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
4894 vmx->loaded_vmcs->vnmi_blocked_time = 0;
4895 }
4896 } else {
4897 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4898 if (masked)
4899 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
4900 GUEST_INTR_STATE_NMI);
4901 else
4902 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
4903 GUEST_INTR_STATE_NMI);
4904 }
4905 }
4906
4907 bool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
4908 {
4909 if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
4910 return false;
4911
4912 if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
4913 return true;
4914
4915 return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4916 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
4917 GUEST_INTR_STATE_NMI));
4918 }
4919
4920 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
4921 {
4922 if (to_vmx(vcpu)->nested.nested_run_pending)
4923 return -EBUSY;
4924
4925
4926 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
4927 return -EBUSY;
4928
4929 return !vmx_nmi_blocked(vcpu);
4930 }
4931
4932 bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
4933 {
4934 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4935 return false;
4936
4937 return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) ||
4938 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4939 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4940 }
4941
4942 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
4943 {
4944 if (to_vmx(vcpu)->nested.nested_run_pending)
4945 return -EBUSY;
4946
4947
4948
4949
4950
4951 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4952 return -EBUSY;
4953
4954 return !vmx_interrupt_blocked(vcpu);
4955 }
4956
4957 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4958 {
4959 void __user *ret;
4960
4961 if (enable_unrestricted_guest)
4962 return 0;
4963
4964 mutex_lock(&kvm->slots_lock);
4965 ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
4966 PAGE_SIZE * 3);
4967 mutex_unlock(&kvm->slots_lock);
4968
4969 if (IS_ERR(ret))
4970 return PTR_ERR(ret);
4971
4972 to_kvm_vmx(kvm)->tss_addr = addr;
4973
4974 return init_rmode_tss(kvm, ret);
4975 }
4976
4977 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
4978 {
4979 to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
4980 return 0;
4981 }
4982
4983 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4984 {
4985 switch (vec) {
4986 case BP_VECTOR:
4987
4988
4989
4990
4991 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4992 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4993 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4994 return false;
4995 fallthrough;
4996 case DB_VECTOR:
4997 return !(vcpu->guest_debug &
4998 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP));
4999 case DE_VECTOR:
5000 case OF_VECTOR:
5001 case BR_VECTOR:
5002 case UD_VECTOR:
5003 case DF_VECTOR:
5004 case SS_VECTOR:
5005 case GP_VECTOR:
5006 case MF_VECTOR:
5007 return true;
5008 }
5009 return false;
5010 }
5011
5012 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5013 int vec, u32 err_code)
5014 {
5015
5016
5017
5018
5019 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
5020 if (kvm_emulate_instruction(vcpu, 0)) {
5021 if (vcpu->arch.halt_request) {
5022 vcpu->arch.halt_request = 0;
5023 return kvm_emulate_halt_noskip(vcpu);
5024 }
5025 return 1;
5026 }
5027 return 0;
5028 }
5029
5030
5031
5032
5033
5034
5035 kvm_queue_exception(vcpu, vec);
5036 return 1;
5037 }
5038
5039 static int handle_machine_check(struct kvm_vcpu *vcpu)
5040 {
5041
5042 return 1;
5043 }
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056 bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
5057 {
5058 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
5059 return true;
5060
5061 return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
5062 (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
5063 }
5064
5065 static int handle_exception_nmi(struct kvm_vcpu *vcpu)
5066 {
5067 struct vcpu_vmx *vmx = to_vmx(vcpu);
5068 struct kvm_run *kvm_run = vcpu->run;
5069 u32 intr_info, ex_no, error_code;
5070 unsigned long cr2, dr6;
5071 u32 vect_info;
5072
5073 vect_info = vmx->idt_vectoring_info;
5074 intr_info = vmx_get_intr_info(vcpu);
5075
5076 if (is_machine_check(intr_info) || is_nmi(intr_info))
5077 return 1;
5078
5079
5080
5081
5082
5083
5084
5085 if (is_nm_fault(intr_info)) {
5086 kvm_queue_exception(vcpu, NM_VECTOR);
5087 return 1;
5088 }
5089
5090 if (is_invalid_opcode(intr_info))
5091 return handle_ud(vcpu);
5092
5093 error_code = 0;
5094 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
5095 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5096
5097 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
5098 WARN_ON_ONCE(!enable_vmware_backdoor);
5099
5100
5101
5102
5103
5104
5105 if (error_code) {
5106 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
5107 return 1;
5108 }
5109 return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
5110 }
5111
5112
5113
5114
5115
5116
5117 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5118 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
5119 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5120 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
5121 vcpu->run->internal.ndata = 4;
5122 vcpu->run->internal.data[0] = vect_info;
5123 vcpu->run->internal.data[1] = intr_info;
5124 vcpu->run->internal.data[2] = error_code;
5125 vcpu->run->internal.data[3] = vcpu->arch.last_vmentry_cpu;
5126 return 0;
5127 }
5128
5129 if (is_page_fault(intr_info)) {
5130 cr2 = vmx_get_exit_qual(vcpu);
5131 if (enable_ept && !vcpu->arch.apf.host_apf_flags) {
5132
5133
5134
5135
5136 WARN_ON_ONCE(!allow_smaller_maxphyaddr);
5137 kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
5138 return 1;
5139 } else
5140 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
5141 }
5142
5143 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
5144
5145 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
5146 return handle_rmode_exception(vcpu, ex_no, error_code);
5147
5148 switch (ex_no) {
5149 case DB_VECTOR:
5150 dr6 = vmx_get_exit_qual(vcpu);
5151 if (!(vcpu->guest_debug &
5152 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173 if (is_icebp(intr_info))
5174 WARN_ON(!skip_emulated_instruction(vcpu));
5175 else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
5176 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5177 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
5178 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
5179 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
5180
5181 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
5182 return 1;
5183 }
5184 kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
5185 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
5186 fallthrough;
5187 case BP_VECTOR:
5188
5189
5190
5191
5192
5193 vmx->vcpu.arch.event_exit_inst_len =
5194 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5195 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5196 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5197 kvm_run->debug.arch.exception = ex_no;
5198 break;
5199 case AC_VECTOR:
5200 if (vmx_guest_inject_ac(vcpu)) {
5201 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
5202 return 1;
5203 }
5204
5205
5206
5207
5208
5209
5210 if (handle_guest_split_lock(kvm_rip_read(vcpu)))
5211 return 1;
5212 fallthrough;
5213 default:
5214 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
5215 kvm_run->ex.exception = ex_no;
5216 kvm_run->ex.error_code = error_code;
5217 break;
5218 }
5219 return 0;
5220 }
5221
5222 static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
5223 {
5224 ++vcpu->stat.irq_exits;
5225 return 1;
5226 }
5227
5228 static int handle_triple_fault(struct kvm_vcpu *vcpu)
5229 {
5230 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5231 vcpu->mmio_needed = 0;
5232 return 0;
5233 }
5234
5235 static int handle_io(struct kvm_vcpu *vcpu)
5236 {
5237 unsigned long exit_qualification;
5238 int size, in, string;
5239 unsigned port;
5240
5241 exit_qualification = vmx_get_exit_qual(vcpu);
5242 string = (exit_qualification & 16) != 0;
5243
5244 ++vcpu->stat.io_exits;
5245
5246 if (string)
5247 return kvm_emulate_instruction(vcpu, 0);
5248
5249 port = exit_qualification >> 16;
5250 size = (exit_qualification & 7) + 1;
5251 in = (exit_qualification & 8) != 0;
5252
5253 return kvm_fast_pio(vcpu, size, port, in);
5254 }
5255
5256 static void
5257 vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5258 {
5259
5260
5261
5262 hypercall[0] = 0x0f;
5263 hypercall[1] = 0x01;
5264 hypercall[2] = 0xc1;
5265 }
5266
5267
5268 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
5269 {
5270 if (is_guest_mode(vcpu)) {
5271 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5272 unsigned long orig_val = val;
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282 val = (val & ~vmcs12->cr0_guest_host_mask) |
5283 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
5284
5285 if (!nested_guest_cr0_valid(vcpu, val))
5286 return 1;
5287
5288 if (kvm_set_cr0(vcpu, val))
5289 return 1;
5290 vmcs_writel(CR0_READ_SHADOW, orig_val);
5291 return 0;
5292 } else {
5293 if (to_vmx(vcpu)->nested.vmxon &&
5294 !nested_host_cr0_valid(vcpu, val))
5295 return 1;
5296
5297 return kvm_set_cr0(vcpu, val);
5298 }
5299 }
5300
5301 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
5302 {
5303 if (is_guest_mode(vcpu)) {
5304 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5305 unsigned long orig_val = val;
5306
5307
5308 val = (val & ~vmcs12->cr4_guest_host_mask) |
5309 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
5310 if (kvm_set_cr4(vcpu, val))
5311 return 1;
5312 vmcs_writel(CR4_READ_SHADOW, orig_val);
5313 return 0;
5314 } else
5315 return kvm_set_cr4(vcpu, val);
5316 }
5317
5318 static int handle_desc(struct kvm_vcpu *vcpu)
5319 {
5320 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
5321 return kvm_emulate_instruction(vcpu, 0);
5322 }
5323
5324 static int handle_cr(struct kvm_vcpu *vcpu)
5325 {
5326 unsigned long exit_qualification, val;
5327 int cr;
5328 int reg;
5329 int err;
5330 int ret;
5331
5332 exit_qualification = vmx_get_exit_qual(vcpu);
5333 cr = exit_qualification & 15;
5334 reg = (exit_qualification >> 8) & 15;
5335 switch ((exit_qualification >> 4) & 3) {
5336 case 0:
5337 val = kvm_register_read(vcpu, reg);
5338 trace_kvm_cr_write(cr, val);
5339 switch (cr) {
5340 case 0:
5341 err = handle_set_cr0(vcpu, val);
5342 return kvm_complete_insn_gp(vcpu, err);
5343 case 3:
5344 WARN_ON_ONCE(enable_unrestricted_guest);
5345
5346 err = kvm_set_cr3(vcpu, val);
5347 return kvm_complete_insn_gp(vcpu, err);
5348 case 4:
5349 err = handle_set_cr4(vcpu, val);
5350 return kvm_complete_insn_gp(vcpu, err);
5351 case 8: {
5352 u8 cr8_prev = kvm_get_cr8(vcpu);
5353 u8 cr8 = (u8)val;
5354 err = kvm_set_cr8(vcpu, cr8);
5355 ret = kvm_complete_insn_gp(vcpu, err);
5356 if (lapic_in_kernel(vcpu))
5357 return ret;
5358 if (cr8_prev <= cr8)
5359 return ret;
5360
5361
5362
5363
5364
5365 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
5366 return 0;
5367 }
5368 }
5369 break;
5370 case 2:
5371 KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
5372 return -EIO;
5373 case 1:
5374 switch (cr) {
5375 case 3:
5376 WARN_ON_ONCE(enable_unrestricted_guest);
5377
5378 val = kvm_read_cr3(vcpu);
5379 kvm_register_write(vcpu, reg, val);
5380 trace_kvm_cr_read(cr, val);
5381 return kvm_skip_emulated_instruction(vcpu);
5382 case 8:
5383 val = kvm_get_cr8(vcpu);
5384 kvm_register_write(vcpu, reg, val);
5385 trace_kvm_cr_read(cr, val);
5386 return kvm_skip_emulated_instruction(vcpu);
5387 }
5388 break;
5389 case 3:
5390 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5391 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
5392 kvm_lmsw(vcpu, val);
5393
5394 return kvm_skip_emulated_instruction(vcpu);
5395 default:
5396 break;
5397 }
5398 vcpu->run->exit_reason = 0;
5399 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
5400 (int)(exit_qualification >> 4) & 3, cr);
5401 return 0;
5402 }
5403
5404 static int handle_dr(struct kvm_vcpu *vcpu)
5405 {
5406 unsigned long exit_qualification;
5407 int dr, dr7, reg;
5408 int err = 1;
5409
5410 exit_qualification = vmx_get_exit_qual(vcpu);
5411 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5412
5413
5414 if (!kvm_require_dr(vcpu, dr))
5415 return 1;
5416
5417 if (vmx_get_cpl(vcpu) > 0)
5418 goto out;
5419
5420 dr7 = vmcs_readl(GUEST_DR7);
5421 if (dr7 & DR7_GD) {
5422
5423
5424
5425
5426
5427 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5428 vcpu->run->debug.arch.dr6 = DR6_BD | DR6_ACTIVE_LOW;
5429 vcpu->run->debug.arch.dr7 = dr7;
5430 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5431 vcpu->run->debug.arch.exception = DB_VECTOR;
5432 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
5433 return 0;
5434 } else {
5435 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD);
5436 return 1;
5437 }
5438 }
5439
5440 if (vcpu->guest_debug == 0) {
5441 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5442
5443
5444
5445
5446
5447
5448 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
5449 return 1;
5450 }
5451
5452 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5453 if (exit_qualification & TYPE_MOV_FROM_DR) {
5454 unsigned long val;
5455
5456 kvm_get_dr(vcpu, dr, &val);
5457 kvm_register_write(vcpu, reg, val);
5458 err = 0;
5459 } else {
5460 err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
5461 }
5462
5463 out:
5464 return kvm_complete_insn_gp(vcpu, err);
5465 }
5466
5467 static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5468 {
5469 get_debugreg(vcpu->arch.db[0], 0);
5470 get_debugreg(vcpu->arch.db[1], 1);
5471 get_debugreg(vcpu->arch.db[2], 2);
5472 get_debugreg(vcpu->arch.db[3], 3);
5473 get_debugreg(vcpu->arch.dr6, 6);
5474 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5475
5476 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5477 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5478
5479
5480
5481
5482
5483 set_debugreg(DR6_RESERVED, 6);
5484 }
5485
5486 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5487 {
5488 vmcs_writel(GUEST_DR7, val);
5489 }
5490
5491 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
5492 {
5493 kvm_apic_update_ppr(vcpu);
5494 return 1;
5495 }
5496
5497 static int handle_interrupt_window(struct kvm_vcpu *vcpu)
5498 {
5499 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5500
5501 kvm_make_request(KVM_REQ_EVENT, vcpu);
5502
5503 ++vcpu->stat.irq_window_exits;
5504 return 1;
5505 }
5506
5507 static int handle_invlpg(struct kvm_vcpu *vcpu)
5508 {
5509 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5510
5511 kvm_mmu_invlpg(vcpu, exit_qualification);
5512 return kvm_skip_emulated_instruction(vcpu);
5513 }
5514
5515 static int handle_apic_access(struct kvm_vcpu *vcpu)
5516 {
5517 if (likely(fasteoi)) {
5518 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5519 int access_type, offset;
5520
5521 access_type = exit_qualification & APIC_ACCESS_TYPE;
5522 offset = exit_qualification & APIC_ACCESS_OFFSET;
5523
5524
5525
5526
5527
5528 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
5529 (offset == APIC_EOI)) {
5530 kvm_lapic_set_eoi(vcpu);
5531 return kvm_skip_emulated_instruction(vcpu);
5532 }
5533 }
5534 return kvm_emulate_instruction(vcpu, 0);
5535 }
5536
5537 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
5538 {
5539 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5540 int vector = exit_qualification & 0xff;
5541
5542
5543 kvm_apic_set_eoi_accelerated(vcpu, vector);
5544 return 1;
5545 }
5546
5547 static int handle_apic_write(struct kvm_vcpu *vcpu)
5548 {
5549 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5550
5551
5552
5553
5554
5555
5556
5557
5558 u32 offset = exit_qualification & 0xff0;
5559
5560 kvm_apic_write_nodecode(vcpu, offset);
5561 return 1;
5562 }
5563
5564 static int handle_task_switch(struct kvm_vcpu *vcpu)
5565 {
5566 struct vcpu_vmx *vmx = to_vmx(vcpu);
5567 unsigned long exit_qualification;
5568 bool has_error_code = false;
5569 u32 error_code = 0;
5570 u16 tss_selector;
5571 int reason, type, idt_v, idt_index;
5572
5573 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
5574 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
5575 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
5576
5577 exit_qualification = vmx_get_exit_qual(vcpu);
5578
5579 reason = (u32)exit_qualification >> 30;
5580 if (reason == TASK_SWITCH_GATE && idt_v) {
5581 switch (type) {
5582 case INTR_TYPE_NMI_INTR:
5583 vcpu->arch.nmi_injected = false;
5584 vmx_set_nmi_mask(vcpu, true);
5585 break;
5586 case INTR_TYPE_EXT_INTR:
5587 case INTR_TYPE_SOFT_INTR:
5588 kvm_clear_interrupt_queue(vcpu);
5589 break;
5590 case INTR_TYPE_HARD_EXCEPTION:
5591 if (vmx->idt_vectoring_info &
5592 VECTORING_INFO_DELIVER_CODE_MASK) {
5593 has_error_code = true;
5594 error_code =
5595 vmcs_read32(IDT_VECTORING_ERROR_CODE);
5596 }
5597 fallthrough;
5598 case INTR_TYPE_SOFT_EXCEPTION:
5599 kvm_clear_exception_queue(vcpu);
5600 break;
5601 default:
5602 break;
5603 }
5604 }
5605 tss_selector = exit_qualification;
5606
5607 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
5608 type != INTR_TYPE_EXT_INTR &&
5609 type != INTR_TYPE_NMI_INTR))
5610 WARN_ON(!skip_emulated_instruction(vcpu));
5611
5612
5613
5614
5615
5616 return kvm_task_switch(vcpu, tss_selector,
5617 type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
5618 reason, has_error_code, error_code);
5619 }
5620
5621 static int handle_ept_violation(struct kvm_vcpu *vcpu)
5622 {
5623 unsigned long exit_qualification;
5624 gpa_t gpa;
5625 u64 error_code;
5626
5627 exit_qualification = vmx_get_exit_qual(vcpu);
5628
5629
5630
5631
5632
5633
5634
5635 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5636 enable_vnmi &&
5637 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5638 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5639
5640 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5641 trace_kvm_page_fault(gpa, exit_qualification);
5642
5643
5644 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
5645 ? PFERR_USER_MASK : 0;
5646
5647 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
5648 ? PFERR_WRITE_MASK : 0;
5649
5650 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
5651 ? PFERR_FETCH_MASK : 0;
5652
5653 error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
5654 ? PFERR_PRESENT_MASK : 0;
5655
5656 error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
5657 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
5658
5659 vcpu->arch.exit_qualification = exit_qualification;
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669 if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
5670 return kvm_emulate_instruction(vcpu, 0);
5671
5672 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5673 }
5674
5675 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5676 {
5677 gpa_t gpa;
5678
5679 if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
5680 return 1;
5681
5682
5683
5684
5685
5686 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5687 if (!is_guest_mode(vcpu) &&
5688 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5689 trace_kvm_fast_mmio(gpa);
5690 return kvm_skip_emulated_instruction(vcpu);
5691 }
5692
5693 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
5694 }
5695
5696 static int handle_nmi_window(struct kvm_vcpu *vcpu)
5697 {
5698 if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
5699 return -EIO;
5700
5701 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5702 ++vcpu->stat.nmi_window_exits;
5703 kvm_make_request(KVM_REQ_EVENT, vcpu);
5704
5705 return 1;
5706 }
5707
5708 static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
5709 {
5710 struct vcpu_vmx *vmx = to_vmx(vcpu);
5711
5712 return vmx->emulation_required && !vmx->rmode.vm86_active &&
5713 (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
5714 }
5715
5716 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5717 {
5718 struct vcpu_vmx *vmx = to_vmx(vcpu);
5719 bool intr_window_requested;
5720 unsigned count = 130;
5721
5722 intr_window_requested = exec_controls_get(vmx) &
5723 CPU_BASED_INTR_WINDOW_EXITING;
5724
5725 while (vmx->emulation_required && count-- != 0) {
5726 if (intr_window_requested && !vmx_interrupt_blocked(vcpu))
5727 return handle_interrupt_window(&vmx->vcpu);
5728
5729 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
5730 return 1;
5731
5732 if (!kvm_emulate_instruction(vcpu, 0))
5733 return 0;
5734
5735 if (vmx_emulation_required_with_pending_exception(vcpu)) {
5736 kvm_prepare_emulation_failure_exit(vcpu);
5737 return 0;
5738 }
5739
5740 if (vcpu->arch.halt_request) {
5741 vcpu->arch.halt_request = 0;
5742 return kvm_emulate_halt_noskip(vcpu);
5743 }
5744
5745
5746
5747
5748
5749
5750 if (__xfer_to_guest_mode_work_pending())
5751 return 1;
5752 }
5753
5754 return 1;
5755 }
5756
5757 static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
5758 {
5759 if (vmx_emulation_required_with_pending_exception(vcpu)) {
5760 kvm_prepare_emulation_failure_exit(vcpu);
5761 return 0;
5762 }
5763
5764 return 1;
5765 }
5766
5767 static void grow_ple_window(struct kvm_vcpu *vcpu)
5768 {
5769 struct vcpu_vmx *vmx = to_vmx(vcpu);
5770 unsigned int old = vmx->ple_window;
5771
5772 vmx->ple_window = __grow_ple_window(old, ple_window,
5773 ple_window_grow,
5774 ple_window_max);
5775
5776 if (vmx->ple_window != old) {
5777 vmx->ple_window_dirty = true;
5778 trace_kvm_ple_window_update(vcpu->vcpu_id,
5779 vmx->ple_window, old);
5780 }
5781 }
5782
5783 static void shrink_ple_window(struct kvm_vcpu *vcpu)
5784 {
5785 struct vcpu_vmx *vmx = to_vmx(vcpu);
5786 unsigned int old = vmx->ple_window;
5787
5788 vmx->ple_window = __shrink_ple_window(old, ple_window,
5789 ple_window_shrink,
5790 ple_window);
5791
5792 if (vmx->ple_window != old) {
5793 vmx->ple_window_dirty = true;
5794 trace_kvm_ple_window_update(vcpu->vcpu_id,
5795 vmx->ple_window, old);
5796 }
5797 }
5798
5799
5800
5801
5802
5803 static int handle_pause(struct kvm_vcpu *vcpu)
5804 {
5805 if (!kvm_pause_in_guest(vcpu->kvm))
5806 grow_ple_window(vcpu);
5807
5808
5809
5810
5811
5812
5813
5814 kvm_vcpu_on_spin(vcpu, true);
5815 return kvm_skip_emulated_instruction(vcpu);
5816 }
5817
5818 static int handle_monitor_trap(struct kvm_vcpu *vcpu)
5819 {
5820 return 1;
5821 }
5822
5823 static int handle_invpcid(struct kvm_vcpu *vcpu)
5824 {
5825 u32 vmx_instruction_info;
5826 unsigned long type;
5827 gva_t gva;
5828 struct {
5829 u64 pcid;
5830 u64 gla;
5831 } operand;
5832 int gpr_index;
5833
5834 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
5835 kvm_queue_exception(vcpu, UD_VECTOR);
5836 return 1;
5837 }
5838
5839 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5840 gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
5841 type = kvm_register_read(vcpu, gpr_index);
5842
5843
5844
5845
5846 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5847 vmx_instruction_info, false,
5848 sizeof(operand), &gva))
5849 return 1;
5850
5851 return kvm_handle_invpcid(vcpu, type, gva);
5852 }
5853
5854 static int handle_pml_full(struct kvm_vcpu *vcpu)
5855 {
5856 unsigned long exit_qualification;
5857
5858 trace_kvm_pml_full(vcpu->vcpu_id);
5859
5860 exit_qualification = vmx_get_exit_qual(vcpu);
5861
5862
5863
5864
5865
5866 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5867 enable_vnmi &&
5868 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5869 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5870 GUEST_INTR_STATE_NMI);
5871
5872
5873
5874
5875
5876 return 1;
5877 }
5878
5879 static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
5880 {
5881 struct vcpu_vmx *vmx = to_vmx(vcpu);
5882
5883 if (!vmx->req_immediate_exit &&
5884 !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
5885 kvm_lapic_expired_hv_timer(vcpu);
5886 return EXIT_FASTPATH_REENTER_GUEST;
5887 }
5888
5889 return EXIT_FASTPATH_NONE;
5890 }
5891
5892 static int handle_preemption_timer(struct kvm_vcpu *vcpu)
5893 {
5894 handle_fastpath_preemption_timer(vcpu);
5895 return 1;
5896 }
5897
5898
5899
5900
5901
5902 static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
5903 {
5904 kvm_queue_exception(vcpu, UD_VECTOR);
5905 return 1;
5906 }
5907
5908 #ifndef CONFIG_X86_SGX_KVM
5909 static int handle_encls(struct kvm_vcpu *vcpu)
5910 {
5911
5912
5913
5914
5915
5916 kvm_queue_exception(vcpu, UD_VECTOR);
5917 return 1;
5918 }
5919 #endif
5920
5921 static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
5922 {
5923
5924
5925
5926
5927
5928 to_vmx(vcpu)->exit_reason.bus_lock_detected = true;
5929 return 1;
5930 }
5931
5932 static int handle_notify(struct kvm_vcpu *vcpu)
5933 {
5934 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
5935 bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID;
5936
5937 ++vcpu->stat.notify_window_exits;
5938
5939
5940
5941
5942
5943 if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI))
5944 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5945 GUEST_INTR_STATE_NMI);
5946
5947 if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER ||
5948 context_invalid) {
5949 vcpu->run->exit_reason = KVM_EXIT_NOTIFY;
5950 vcpu->run->notify.flags = context_invalid ?
5951 KVM_NOTIFY_CONTEXT_INVALID : 0;
5952 return 0;
5953 }
5954
5955 return 1;
5956 }
5957
5958
5959
5960
5961
5962
5963 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5964 [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
5965 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
5966 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
5967 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
5968 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
5969 [EXIT_REASON_CR_ACCESS] = handle_cr,
5970 [EXIT_REASON_DR_ACCESS] = handle_dr,
5971 [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
5972 [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
5973 [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
5974 [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
5975 [EXIT_REASON_HLT] = kvm_emulate_halt,
5976 [EXIT_REASON_INVD] = kvm_emulate_invd,
5977 [EXIT_REASON_INVLPG] = handle_invlpg,
5978 [EXIT_REASON_RDPMC] = kvm_emulate_rdpmc,
5979 [EXIT_REASON_VMCALL] = kvm_emulate_hypercall,
5980 [EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
5981 [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
5982 [EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
5983 [EXIT_REASON_VMPTRST] = handle_vmx_instruction,
5984 [EXIT_REASON_VMREAD] = handle_vmx_instruction,
5985 [EXIT_REASON_VMRESUME] = handle_vmx_instruction,
5986 [EXIT_REASON_VMWRITE] = handle_vmx_instruction,
5987 [EXIT_REASON_VMOFF] = handle_vmx_instruction,
5988 [EXIT_REASON_VMON] = handle_vmx_instruction,
5989 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
5990 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
5991 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
5992 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
5993 [EXIT_REASON_WBINVD] = kvm_emulate_wbinvd,
5994 [EXIT_REASON_XSETBV] = kvm_emulate_xsetbv,
5995 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
5996 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
5997 [EXIT_REASON_GDTR_IDTR] = handle_desc,
5998 [EXIT_REASON_LDTR_TR] = handle_desc,
5999 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
6000 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
6001 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
6002 [EXIT_REASON_MWAIT_INSTRUCTION] = kvm_emulate_mwait,
6003 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
6004 [EXIT_REASON_MONITOR_INSTRUCTION] = kvm_emulate_monitor,
6005 [EXIT_REASON_INVEPT] = handle_vmx_instruction,
6006 [EXIT_REASON_INVVPID] = handle_vmx_instruction,
6007 [EXIT_REASON_RDRAND] = kvm_handle_invalid_op,
6008 [EXIT_REASON_RDSEED] = kvm_handle_invalid_op,
6009 [EXIT_REASON_PML_FULL] = handle_pml_full,
6010 [EXIT_REASON_INVPCID] = handle_invpcid,
6011 [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
6012 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
6013 [EXIT_REASON_ENCLS] = handle_encls,
6014 [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
6015 [EXIT_REASON_NOTIFY] = handle_notify,
6016 };
6017
6018 static const int kvm_vmx_max_exit_handlers =
6019 ARRAY_SIZE(kvm_vmx_exit_handlers);
6020
6021 static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
6022 u64 *info1, u64 *info2,
6023 u32 *intr_info, u32 *error_code)
6024 {
6025 struct vcpu_vmx *vmx = to_vmx(vcpu);
6026
6027 *reason = vmx->exit_reason.full;
6028 *info1 = vmx_get_exit_qual(vcpu);
6029 if (!(vmx->exit_reason.failed_vmentry)) {
6030 *info2 = vmx->idt_vectoring_info;
6031 *intr_info = vmx_get_intr_info(vcpu);
6032 if (is_exception_with_error_code(*intr_info))
6033 *error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
6034 else
6035 *error_code = 0;
6036 } else {
6037 *info2 = 0;
6038 *intr_info = 0;
6039 *error_code = 0;
6040 }
6041 }
6042
6043 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
6044 {
6045 if (vmx->pml_pg) {
6046 __free_page(vmx->pml_pg);
6047 vmx->pml_pg = NULL;
6048 }
6049 }
6050
6051 static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
6052 {
6053 struct vcpu_vmx *vmx = to_vmx(vcpu);
6054 u64 *pml_buf;
6055 u16 pml_idx;
6056
6057 pml_idx = vmcs_read16(GUEST_PML_INDEX);
6058
6059
6060 if (pml_idx == (PML_ENTITY_NUM - 1))
6061 return;
6062
6063
6064 if (pml_idx >= PML_ENTITY_NUM)
6065 pml_idx = 0;
6066 else
6067 pml_idx++;
6068
6069 pml_buf = page_address(vmx->pml_pg);
6070 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
6071 u64 gpa;
6072
6073 gpa = pml_buf[pml_idx];
6074 WARN_ON(gpa & (PAGE_SIZE - 1));
6075 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
6076 }
6077
6078
6079 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6080 }
6081
6082 static void vmx_dump_sel(char *name, uint32_t sel)
6083 {
6084 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
6085 name, vmcs_read16(sel),
6086 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
6087 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
6088 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
6089 }
6090
6091 static void vmx_dump_dtsel(char *name, uint32_t limit)
6092 {
6093 pr_err("%s limit=0x%08x, base=0x%016lx\n",
6094 name, vmcs_read32(limit),
6095 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
6096 }
6097
6098 static void vmx_dump_msrs(char *name, struct vmx_msrs *m)
6099 {
6100 unsigned int i;
6101 struct vmx_msr_entry *e;
6102
6103 pr_err("MSR %s:\n", name);
6104 for (i = 0, e = m->val; i < m->nr; ++i, ++e)
6105 pr_err(" %2d: msr=0x%08x value=0x%016llx\n", i, e->index, e->value);
6106 }
6107
6108 void dump_vmcs(struct kvm_vcpu *vcpu)
6109 {
6110 struct vcpu_vmx *vmx = to_vmx(vcpu);
6111 u32 vmentry_ctl, vmexit_ctl;
6112 u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
6113 u64 tertiary_exec_control;
6114 unsigned long cr4;
6115 int efer_slot;
6116
6117 if (!dump_invalid_vmcs) {
6118 pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
6119 return;
6120 }
6121
6122 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
6123 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
6124 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6125 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
6126 cr4 = vmcs_readl(GUEST_CR4);
6127
6128 if (cpu_has_secondary_exec_ctrls())
6129 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6130 else
6131 secondary_exec_control = 0;
6132
6133 if (cpu_has_tertiary_exec_ctrls())
6134 tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL);
6135 else
6136 tertiary_exec_control = 0;
6137
6138 pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
6139 vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
6140 pr_err("*** Guest State ***\n");
6141 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6142 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
6143 vmcs_readl(CR0_GUEST_HOST_MASK));
6144 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6145 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
6146 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
6147 if (cpu_has_vmx_ept()) {
6148 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
6149 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
6150 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
6151 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
6152 }
6153 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
6154 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
6155 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
6156 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
6157 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6158 vmcs_readl(GUEST_SYSENTER_ESP),
6159 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
6160 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
6161 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
6162 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
6163 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
6164 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
6165 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
6166 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
6167 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
6168 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
6169 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
6170 efer_slot = vmx_find_loadstore_msr_slot(&vmx->msr_autoload.guest, MSR_EFER);
6171 if (vmentry_ctl & VM_ENTRY_LOAD_IA32_EFER)
6172 pr_err("EFER= 0x%016llx\n", vmcs_read64(GUEST_IA32_EFER));
6173 else if (efer_slot >= 0)
6174 pr_err("EFER= 0x%016llx (autoload)\n",
6175 vmx->msr_autoload.guest.val[efer_slot].value);
6176 else if (vmentry_ctl & VM_ENTRY_IA32E_MODE)
6177 pr_err("EFER= 0x%016llx (effective)\n",
6178 vcpu->arch.efer | (EFER_LMA | EFER_LME));
6179 else
6180 pr_err("EFER= 0x%016llx (effective)\n",
6181 vcpu->arch.efer & ~(EFER_LMA | EFER_LME));
6182 if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PAT)
6183 pr_err("PAT = 0x%016llx\n", vmcs_read64(GUEST_IA32_PAT));
6184 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
6185 vmcs_read64(GUEST_IA32_DEBUGCTL),
6186 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
6187 if (cpu_has_load_perf_global_ctrl() &&
6188 vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
6189 pr_err("PerfGlobCtl = 0x%016llx\n",
6190 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
6191 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
6192 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
6193 pr_err("Interruptibility = %08x ActivityState = %08x\n",
6194 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
6195 vmcs_read32(GUEST_ACTIVITY_STATE));
6196 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
6197 pr_err("InterruptStatus = %04x\n",
6198 vmcs_read16(GUEST_INTR_STATUS));
6199 if (vmcs_read32(VM_ENTRY_MSR_LOAD_COUNT) > 0)
6200 vmx_dump_msrs("guest autoload", &vmx->msr_autoload.guest);
6201 if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
6202 vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
6203
6204 pr_err("*** Host State ***\n");
6205 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
6206 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
6207 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
6208 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
6209 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
6210 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
6211 vmcs_read16(HOST_TR_SELECTOR));
6212 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
6213 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
6214 vmcs_readl(HOST_TR_BASE));
6215 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
6216 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
6217 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
6218 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
6219 vmcs_readl(HOST_CR4));
6220 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6221 vmcs_readl(HOST_IA32_SYSENTER_ESP),
6222 vmcs_read32(HOST_IA32_SYSENTER_CS),
6223 vmcs_readl(HOST_IA32_SYSENTER_EIP));
6224 if (vmexit_ctl & VM_EXIT_LOAD_IA32_EFER)
6225 pr_err("EFER= 0x%016llx\n", vmcs_read64(HOST_IA32_EFER));
6226 if (vmexit_ctl & VM_EXIT_LOAD_IA32_PAT)
6227 pr_err("PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_PAT));
6228 if (cpu_has_load_perf_global_ctrl() &&
6229 vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
6230 pr_err("PerfGlobCtl = 0x%016llx\n",
6231 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
6232 if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
6233 vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
6234
6235 pr_err("*** Control State ***\n");
6236 pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
6237 cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control);
6238 pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n",
6239 pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl);
6240 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
6241 vmcs_read32(EXCEPTION_BITMAP),
6242 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
6243 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
6244 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
6245 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6246 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
6247 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
6248 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
6249 vmcs_read32(VM_EXIT_INTR_INFO),
6250 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6251 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
6252 pr_err(" reason=%08x qualification=%016lx\n",
6253 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
6254 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
6255 vmcs_read32(IDT_VECTORING_INFO_FIELD),
6256 vmcs_read32(IDT_VECTORING_ERROR_CODE));
6257 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
6258 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
6259 pr_err("TSC Multiplier = 0x%016llx\n",
6260 vmcs_read64(TSC_MULTIPLIER));
6261 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
6262 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
6263 u16 status = vmcs_read16(GUEST_INTR_STATUS);
6264 pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
6265 }
6266 pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
6267 if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
6268 pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
6269 pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
6270 }
6271 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
6272 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
6273 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
6274 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
6275 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
6276 pr_err("PLE Gap=%08x Window=%08x\n",
6277 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
6278 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
6279 pr_err("Virtual processor ID = 0x%04x\n",
6280 vmcs_read16(VIRTUAL_PROCESSOR_ID));
6281 }
6282
6283
6284
6285
6286
6287 static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
6288 {
6289 struct vcpu_vmx *vmx = to_vmx(vcpu);
6290 union vmx_exit_reason exit_reason = vmx->exit_reason;
6291 u32 vectoring_info = vmx->idt_vectoring_info;
6292 u16 exit_handler_index;
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302 if (enable_pml && !is_guest_mode(vcpu))
6303 vmx_flush_pml_buffer(vcpu);
6304
6305
6306
6307
6308
6309
6310
6311 if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
6312 return -EIO;
6313
6314 if (is_guest_mode(vcpu)) {
6315
6316
6317
6318
6319 if (exit_reason.basic == EXIT_REASON_PML_FULL)
6320 goto unexpected_vmexit;
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333 nested_mark_vmcs12_pages_dirty(vcpu);
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346 if (vmx->emulation_required) {
6347 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
6348 return 1;
6349 }
6350
6351 if (nested_vmx_reflect_vmexit(vcpu))
6352 return 1;
6353 }
6354
6355
6356 if (vmx->emulation_required)
6357 return handle_invalid_guest_state(vcpu);
6358
6359 if (exit_reason.failed_vmentry) {
6360 dump_vmcs(vcpu);
6361 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6362 vcpu->run->fail_entry.hardware_entry_failure_reason
6363 = exit_reason.full;
6364 vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
6365 return 0;
6366 }
6367
6368 if (unlikely(vmx->fail)) {
6369 dump_vmcs(vcpu);
6370 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6371 vcpu->run->fail_entry.hardware_entry_failure_reason
6372 = vmcs_read32(VM_INSTRUCTION_ERROR);
6373 vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
6374 return 0;
6375 }
6376
6377
6378
6379
6380
6381
6382
6383
6384 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
6385 (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI &&
6386 exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
6387 exit_reason.basic != EXIT_REASON_PML_FULL &&
6388 exit_reason.basic != EXIT_REASON_APIC_ACCESS &&
6389 exit_reason.basic != EXIT_REASON_TASK_SWITCH &&
6390 exit_reason.basic != EXIT_REASON_NOTIFY)) {
6391 int ndata = 3;
6392
6393 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6394 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
6395 vcpu->run->internal.data[0] = vectoring_info;
6396 vcpu->run->internal.data[1] = exit_reason.full;
6397 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
6398 if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
6399 vcpu->run->internal.data[ndata++] =
6400 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
6401 }
6402 vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
6403 vcpu->run->internal.ndata = ndata;
6404 return 0;
6405 }
6406
6407 if (unlikely(!enable_vnmi &&
6408 vmx->loaded_vmcs->soft_vnmi_blocked)) {
6409 if (!vmx_interrupt_blocked(vcpu)) {
6410 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6411 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
6412 vcpu->arch.nmi_pending) {
6413
6414
6415
6416
6417
6418
6419 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
6420 "state on VCPU %d after 1 s timeout\n",
6421 __func__, vcpu->vcpu_id);
6422 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6423 }
6424 }
6425
6426 if (exit_fastpath != EXIT_FASTPATH_NONE)
6427 return 1;
6428
6429 if (exit_reason.basic >= kvm_vmx_max_exit_handlers)
6430 goto unexpected_vmexit;
6431 #ifdef CONFIG_RETPOLINE
6432 if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
6433 return kvm_emulate_wrmsr(vcpu);
6434 else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
6435 return handle_preemption_timer(vcpu);
6436 else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)
6437 return handle_interrupt_window(vcpu);
6438 else if (exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
6439 return handle_external_interrupt(vcpu);
6440 else if (exit_reason.basic == EXIT_REASON_HLT)
6441 return kvm_emulate_halt(vcpu);
6442 else if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG)
6443 return handle_ept_misconfig(vcpu);
6444 #endif
6445
6446 exit_handler_index = array_index_nospec((u16)exit_reason.basic,
6447 kvm_vmx_max_exit_handlers);
6448 if (!kvm_vmx_exit_handlers[exit_handler_index])
6449 goto unexpected_vmexit;
6450
6451 return kvm_vmx_exit_handlers[exit_handler_index](vcpu);
6452
6453 unexpected_vmexit:
6454 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
6455 exit_reason.full);
6456 dump_vmcs(vcpu);
6457 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6458 vcpu->run->internal.suberror =
6459 KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
6460 vcpu->run->internal.ndata = 2;
6461 vcpu->run->internal.data[0] = exit_reason.full;
6462 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
6463 return 0;
6464 }
6465
6466 static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
6467 {
6468 int ret = __vmx_handle_exit(vcpu, exit_fastpath);
6469
6470
6471
6472
6473
6474 if (to_vmx(vcpu)->exit_reason.bus_lock_detected) {
6475 if (ret > 0)
6476 vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK;
6477
6478 vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK;
6479 return 0;
6480 }
6481 return ret;
6482 }
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494 static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
6495 {
6496 int size = PAGE_SIZE << L1D_CACHE_ORDER;
6497
6498
6499
6500
6501
6502 if (static_branch_likely(&vmx_l1d_flush_cond)) {
6503 bool flush_l1d;
6504
6505
6506
6507
6508
6509
6510 flush_l1d = vcpu->arch.l1tf_flush_l1d;
6511 vcpu->arch.l1tf_flush_l1d = false;
6512
6513
6514
6515
6516
6517 flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
6518 kvm_clear_cpu_l1tf_flush_l1d();
6519
6520 if (!flush_l1d)
6521 return;
6522 }
6523
6524 vcpu->stat.l1d_flush++;
6525
6526 if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
6527 native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
6528 return;
6529 }
6530
6531 asm volatile(
6532
6533 "xorl %%eax, %%eax\n"
6534 ".Lpopulate_tlb:\n\t"
6535 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6536 "addl $4096, %%eax\n\t"
6537 "cmpl %%eax, %[size]\n\t"
6538 "jne .Lpopulate_tlb\n\t"
6539 "xorl %%eax, %%eax\n\t"
6540 "cpuid\n\t"
6541
6542 "xorl %%eax, %%eax\n"
6543 ".Lfill_cache:\n"
6544 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6545 "addl $64, %%eax\n\t"
6546 "cmpl %%eax, %[size]\n\t"
6547 "jne .Lfill_cache\n\t"
6548 "lfence\n"
6549 :: [flush_pages] "r" (vmx_l1d_flush_pages),
6550 [size] "r" (size)
6551 : "eax", "ebx", "ecx", "edx");
6552 }
6553
6554 static void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6555 {
6556 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6557 int tpr_threshold;
6558
6559 if (is_guest_mode(vcpu) &&
6560 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
6561 return;
6562
6563 tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
6564 if (is_guest_mode(vcpu))
6565 to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
6566 else
6567 vmcs_write32(TPR_THRESHOLD, tpr_threshold);
6568 }
6569
6570 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
6571 {
6572 struct vcpu_vmx *vmx = to_vmx(vcpu);
6573 u32 sec_exec_control;
6574
6575 if (!lapic_in_kernel(vcpu))
6576 return;
6577
6578 if (!flexpriority_enabled &&
6579 !cpu_has_vmx_virtualize_x2apic_mode())
6580 return;
6581
6582
6583 if (is_guest_mode(vcpu)) {
6584 vmx->nested.change_vmcs01_virtual_apic_mode = true;
6585 return;
6586 }
6587
6588 sec_exec_control = secondary_exec_controls_get(vmx);
6589 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6590 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
6591
6592 switch (kvm_get_apic_mode(vcpu)) {
6593 case LAPIC_MODE_INVALID:
6594 WARN_ONCE(true, "Invalid local APIC state");
6595 break;
6596 case LAPIC_MODE_DISABLED:
6597 break;
6598 case LAPIC_MODE_XAPIC:
6599 if (flexpriority_enabled) {
6600 sec_exec_control |=
6601 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6602 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
6603
6604
6605
6606
6607
6608
6609
6610 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
6611 }
6612 break;
6613 case LAPIC_MODE_X2APIC:
6614 if (cpu_has_vmx_virtualize_x2apic_mode())
6615 sec_exec_control |=
6616 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6617 break;
6618 }
6619 secondary_exec_controls_set(vmx, sec_exec_control);
6620
6621 vmx_update_msr_bitmap_x2apic(vcpu);
6622 }
6623
6624 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
6625 {
6626 struct page *page;
6627
6628
6629 if (is_guest_mode(vcpu)) {
6630 to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true;
6631 return;
6632 }
6633
6634 if (!(secondary_exec_controls_get(to_vmx(vcpu)) &
6635 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
6636 return;
6637
6638 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6639 if (is_error_page(page))
6640 return;
6641
6642 vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
6643 vmx_flush_tlb_current(vcpu);
6644
6645
6646
6647
6648
6649 put_page(page);
6650 }
6651
6652 static void vmx_hwapic_isr_update(int max_isr)
6653 {
6654 u16 status;
6655 u8 old;
6656
6657 if (max_isr == -1)
6658 max_isr = 0;
6659
6660 status = vmcs_read16(GUEST_INTR_STATUS);
6661 old = status >> 8;
6662 if (max_isr != old) {
6663 status &= 0xff;
6664 status |= max_isr << 8;
6665 vmcs_write16(GUEST_INTR_STATUS, status);
6666 }
6667 }
6668
6669 static void vmx_set_rvi(int vector)
6670 {
6671 u16 status;
6672 u8 old;
6673
6674 if (vector == -1)
6675 vector = 0;
6676
6677 status = vmcs_read16(GUEST_INTR_STATUS);
6678 old = (u8)status & 0xff;
6679 if ((u8)vector != old) {
6680 status &= ~0xff;
6681 status |= (u8)vector;
6682 vmcs_write16(GUEST_INTR_STATUS, status);
6683 }
6684 }
6685
6686 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6687 {
6688
6689
6690
6691
6692
6693
6694
6695
6696 if (!is_guest_mode(vcpu))
6697 vmx_set_rvi(max_irr);
6698 }
6699
6700 static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6701 {
6702 struct vcpu_vmx *vmx = to_vmx(vcpu);
6703 int max_irr;
6704 bool got_posted_interrupt;
6705
6706 if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
6707 return -EIO;
6708
6709 if (pi_test_on(&vmx->pi_desc)) {
6710 pi_clear_on(&vmx->pi_desc);
6711
6712
6713
6714
6715 smp_mb__after_atomic();
6716 got_posted_interrupt =
6717 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
6718 } else {
6719 max_irr = kvm_lapic_find_highest_irr(vcpu);
6720 got_posted_interrupt = false;
6721 }
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738 if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
6739 vmx_set_rvi(max_irr);
6740 else if (got_posted_interrupt)
6741 kvm_make_request(KVM_REQ_EVENT, vcpu);
6742
6743 return max_irr;
6744 }
6745
6746 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6747 {
6748 if (!kvm_vcpu_apicv_active(vcpu))
6749 return;
6750
6751 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6752 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6753 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6754 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6755 }
6756
6757 static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
6758 {
6759 struct vcpu_vmx *vmx = to_vmx(vcpu);
6760
6761 pi_clear_on(&vmx->pi_desc);
6762 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
6763 }
6764
6765 void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
6766
6767 static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
6768 unsigned long entry)
6769 {
6770 bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
6771
6772 kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
6773 vmx_do_interrupt_nmi_irqoff(entry);
6774 kvm_after_interrupt(vcpu);
6775 }
6776
6777 static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
6778 {
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793 if (vcpu->arch.guest_fpu.fpstate->xfd)
6794 rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
6795 }
6796
6797 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
6798 {
6799 const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
6800 u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
6801
6802
6803 if (is_page_fault(intr_info))
6804 vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
6805
6806 else if (is_nm_fault(intr_info))
6807 handle_nm_fault_irqoff(&vmx->vcpu);
6808
6809 else if (is_machine_check(intr_info))
6810 kvm_machine_check();
6811
6812 else if (is_nmi(intr_info))
6813 handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
6814 }
6815
6816 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
6817 {
6818 u32 intr_info = vmx_get_intr_info(vcpu);
6819 unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
6820 gate_desc *desc = (gate_desc *)host_idt_base + vector;
6821
6822 if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
6823 "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
6824 return;
6825
6826 handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
6827 vcpu->arch.at_instruction_boundary = true;
6828 }
6829
6830 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
6831 {
6832 struct vcpu_vmx *vmx = to_vmx(vcpu);
6833
6834 if (vmx->emulation_required)
6835 return;
6836
6837 if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
6838 handle_external_interrupt_irqoff(vcpu);
6839 else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
6840 handle_exception_nmi_irqoff(vmx);
6841 }
6842
6843
6844
6845
6846
6847 static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
6848 {
6849 switch (index) {
6850 case MSR_IA32_SMBASE:
6851
6852
6853
6854
6855 return enable_unrestricted_guest || emulate_invalid_guest_state;
6856 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
6857 return nested;
6858 case MSR_AMD64_VIRT_SPEC_CTRL:
6859 case MSR_AMD64_TSC_RATIO:
6860
6861 return false;
6862 default:
6863 return true;
6864 }
6865 }
6866
6867 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
6868 {
6869 u32 exit_intr_info;
6870 bool unblock_nmi;
6871 u8 vector;
6872 bool idtv_info_valid;
6873
6874 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6875
6876 if (enable_vnmi) {
6877 if (vmx->loaded_vmcs->nmi_known_unmasked)
6878 return;
6879
6880 exit_intr_info = vmx_get_intr_info(&vmx->vcpu);
6881 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
6882 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
6894 vector != DF_VECTOR && !idtv_info_valid)
6895 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6896 GUEST_INTR_STATE_NMI);
6897 else
6898 vmx->loaded_vmcs->nmi_known_unmasked =
6899 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
6900 & GUEST_INTR_STATE_NMI);
6901 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
6902 vmx->loaded_vmcs->vnmi_blocked_time +=
6903 ktime_to_ns(ktime_sub(ktime_get(),
6904 vmx->loaded_vmcs->entry_time));
6905 }
6906
6907 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
6908 u32 idt_vectoring_info,
6909 int instr_len_field,
6910 int error_code_field)
6911 {
6912 u8 vector;
6913 int type;
6914 bool idtv_info_valid;
6915
6916 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6917
6918 vcpu->arch.nmi_injected = false;
6919 kvm_clear_exception_queue(vcpu);
6920 kvm_clear_interrupt_queue(vcpu);
6921
6922 if (!idtv_info_valid)
6923 return;
6924
6925 kvm_make_request(KVM_REQ_EVENT, vcpu);
6926
6927 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
6928 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
6929
6930 switch (type) {
6931 case INTR_TYPE_NMI_INTR:
6932 vcpu->arch.nmi_injected = true;
6933
6934
6935
6936
6937
6938 vmx_set_nmi_mask(vcpu, false);
6939 break;
6940 case INTR_TYPE_SOFT_EXCEPTION:
6941 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6942 fallthrough;
6943 case INTR_TYPE_HARD_EXCEPTION:
6944 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
6945 u32 err = vmcs_read32(error_code_field);
6946 kvm_requeue_exception_e(vcpu, vector, err);
6947 } else
6948 kvm_requeue_exception(vcpu, vector);
6949 break;
6950 case INTR_TYPE_SOFT_INTR:
6951 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6952 fallthrough;
6953 case INTR_TYPE_EXT_INTR:
6954 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
6955 break;
6956 default:
6957 break;
6958 }
6959 }
6960
6961 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6962 {
6963 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
6964 VM_EXIT_INSTRUCTION_LEN,
6965 IDT_VECTORING_ERROR_CODE);
6966 }
6967
6968 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
6969 {
6970 __vmx_complete_interrupts(vcpu,
6971 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6972 VM_ENTRY_INSTRUCTION_LEN,
6973 VM_ENTRY_EXCEPTION_ERROR_CODE);
6974
6975 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
6976 }
6977
6978 static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
6979 {
6980 int i, nr_msrs;
6981 struct perf_guest_switch_msr *msrs;
6982 struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
6983
6984 pmu->host_cross_mapped_mask = 0;
6985 if (pmu->pebs_enable & pmu->global_ctrl)
6986 intel_pmu_cross_mapped_check(pmu);
6987
6988
6989 msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu);
6990 if (!msrs)
6991 return;
6992
6993 for (i = 0; i < nr_msrs; i++)
6994 if (msrs[i].host == msrs[i].guest)
6995 clear_atomic_switch_msr(vmx, msrs[i].msr);
6996 else
6997 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
6998 msrs[i].host, false);
6999 }
7000
7001 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
7002 {
7003 struct vcpu_vmx *vmx = to_vmx(vcpu);
7004 u64 tscl;
7005 u32 delta_tsc;
7006
7007 if (vmx->req_immediate_exit) {
7008 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
7009 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7010 } else if (vmx->hv_deadline_tsc != -1) {
7011 tscl = rdtsc();
7012 if (vmx->hv_deadline_tsc > tscl)
7013
7014 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
7015 cpu_preemption_timer_multi);
7016 else
7017 delta_tsc = 0;
7018
7019 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
7020 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7021 } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
7022 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
7023 vmx->loaded_vmcs->hv_timer_soft_disabled = true;
7024 }
7025 }
7026
7027 void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
7028 {
7029 if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
7030 vmx->loaded_vmcs->host_state.rsp = host_rsp;
7031 vmcs_writel(HOST_RSP, host_rsp);
7032 }
7033 }
7034
7035 void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
7036 unsigned int flags)
7037 {
7038 u64 hostval = this_cpu_read(x86_spec_ctrl_current);
7039
7040 if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
7041 return;
7042
7043 if (flags & VMX_RUN_SAVE_SPEC_CTRL)
7044 vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
7045
7046
7047
7048
7049
7050
7051
7052
7053 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
7054 vmx->spec_ctrl != hostval)
7055 native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
7056
7057 barrier_nospec();
7058 }
7059
7060 static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
7061 {
7062 switch (to_vmx(vcpu)->exit_reason.basic) {
7063 case EXIT_REASON_MSR_WRITE:
7064 return handle_fastpath_set_msr_irqoff(vcpu);
7065 case EXIT_REASON_PREEMPTION_TIMER:
7066 return handle_fastpath_preemption_timer(vcpu);
7067 default:
7068 return EXIT_FASTPATH_NONE;
7069 }
7070 }
7071
7072 static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
7073 struct vcpu_vmx *vmx,
7074 unsigned long flags)
7075 {
7076 guest_state_enter_irqoff();
7077
7078
7079 if (static_branch_unlikely(&vmx_l1d_should_flush))
7080 vmx_l1d_flush(vcpu);
7081 else if (static_branch_unlikely(&mds_user_clear))
7082 mds_clear_cpu_buffers();
7083 else if (static_branch_unlikely(&mmio_stale_data_clear) &&
7084 kvm_arch_has_assigned_device(vcpu->kvm))
7085 mds_clear_cpu_buffers();
7086
7087 vmx_disable_fb_clear(vmx);
7088
7089 if (vcpu->arch.cr2 != native_read_cr2())
7090 native_write_cr2(vcpu->arch.cr2);
7091
7092 vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
7093 flags);
7094
7095 vcpu->arch.cr2 = native_read_cr2();
7096
7097 vmx_enable_fb_clear(vmx);
7098
7099 guest_state_exit_irqoff();
7100 }
7101
7102 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
7103 {
7104 struct vcpu_vmx *vmx = to_vmx(vcpu);
7105 unsigned long cr3, cr4;
7106
7107
7108 if (unlikely(!enable_vnmi &&
7109 vmx->loaded_vmcs->soft_vnmi_blocked))
7110 vmx->loaded_vmcs->entry_time = ktime_get();
7111
7112
7113
7114
7115
7116
7117 if (unlikely(vmx->emulation_required)) {
7118 vmx->fail = 0;
7119
7120 vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
7121 vmx->exit_reason.failed_vmentry = 1;
7122 kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
7123 vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
7124 kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
7125 vmx->exit_intr_info = 0;
7126 return EXIT_FASTPATH_NONE;
7127 }
7128
7129 trace_kvm_entry(vcpu);
7130
7131 if (vmx->ple_window_dirty) {
7132 vmx->ple_window_dirty = false;
7133 vmcs_write32(PLE_WINDOW, vmx->ple_window);
7134 }
7135
7136
7137
7138
7139
7140 WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
7141
7142 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
7143 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
7144 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
7145 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
7146 vcpu->arch.regs_dirty = 0;
7147
7148
7149
7150
7151
7152
7153
7154
7155 cr3 = __get_current_cr3_fast();
7156 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
7157 vmcs_writel(HOST_CR3, cr3);
7158 vmx->loaded_vmcs->host_state.cr3 = cr3;
7159 }
7160
7161 cr4 = cr4_read_shadow();
7162 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
7163 vmcs_writel(HOST_CR4, cr4);
7164 vmx->loaded_vmcs->host_state.cr4 = cr4;
7165 }
7166
7167
7168 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
7169 set_debugreg(vcpu->arch.dr6, 6);
7170
7171
7172
7173
7174
7175
7176 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7177 vmx_set_interrupt_shadow(vcpu, 0);
7178
7179 kvm_load_guest_xsave_state(vcpu);
7180
7181 pt_guest_enter(vmx);
7182
7183 atomic_switch_perf_msrs(vmx);
7184 if (intel_pmu_lbr_is_enabled(vcpu))
7185 vmx_passthrough_lbr_msrs(vcpu);
7186
7187 if (enable_preemption_timer)
7188 vmx_update_hv_timer(vcpu);
7189
7190 kvm_wait_lapic_expire(vcpu);
7191
7192
7193 vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
7194
7195
7196 if (static_branch_unlikely(&enable_evmcs)) {
7197 current_evmcs->hv_clean_fields |=
7198 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
7199
7200 current_evmcs->hv_vp_id = kvm_hv_get_vpindex(vcpu);
7201 }
7202
7203
7204 if (vmx->host_debugctlmsr)
7205 update_debugctlmsr(vmx->host_debugctlmsr);
7206
7207 #ifndef CONFIG_X86_64
7208
7209
7210
7211
7212
7213
7214
7215
7216 loadsegment(ds, __USER_DS);
7217 loadsegment(es, __USER_DS);
7218 #endif
7219
7220 vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
7221
7222 pt_guest_exit(vmx);
7223
7224 kvm_load_host_xsave_state(vcpu);
7225
7226 if (is_guest_mode(vcpu)) {
7227
7228
7229
7230
7231 if (vmx->nested.nested_run_pending &&
7232 !vmx->exit_reason.failed_vmentry)
7233 ++vcpu->stat.nested_run;
7234
7235 vmx->nested.nested_run_pending = 0;
7236 }
7237
7238 vmx->idt_vectoring_info = 0;
7239
7240 if (unlikely(vmx->fail)) {
7241 vmx->exit_reason.full = 0xdead;
7242 return EXIT_FASTPATH_NONE;
7243 }
7244
7245 vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
7246 if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
7247 kvm_machine_check();
7248
7249 if (likely(!vmx->exit_reason.failed_vmentry))
7250 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
7251
7252 trace_kvm_exit(vcpu, KVM_ISA_VMX);
7253
7254 if (unlikely(vmx->exit_reason.failed_vmentry))
7255 return EXIT_FASTPATH_NONE;
7256
7257 vmx->loaded_vmcs->launched = 1;
7258
7259 vmx_recover_nmi_blocking(vmx);
7260 vmx_complete_interrupts(vmx);
7261
7262 if (is_guest_mode(vcpu))
7263 return EXIT_FASTPATH_NONE;
7264
7265 return vmx_exit_handlers_fastpath(vcpu);
7266 }
7267
7268 static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
7269 {
7270 struct vcpu_vmx *vmx = to_vmx(vcpu);
7271
7272 if (enable_pml)
7273 vmx_destroy_pml_buffer(vmx);
7274 free_vpid(vmx->vpid);
7275 nested_vmx_free_vcpu(vcpu);
7276 free_loaded_vmcs(vmx->loaded_vmcs);
7277 }
7278
7279 static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
7280 {
7281 struct vmx_uret_msr *tsx_ctrl;
7282 struct vcpu_vmx *vmx;
7283 int i, err;
7284
7285 BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
7286 vmx = to_vmx(vcpu);
7287
7288 INIT_LIST_HEAD(&vmx->pi_wakeup_list);
7289
7290 err = -ENOMEM;
7291
7292 vmx->vpid = allocate_vpid();
7293
7294
7295
7296
7297
7298
7299
7300 if (enable_pml) {
7301 vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
7302 if (!vmx->pml_pg)
7303 goto free_vpid;
7304 }
7305
7306 for (i = 0; i < kvm_nr_uret_msrs; ++i)
7307 vmx->guest_uret_msrs[i].mask = -1ull;
7308 if (boot_cpu_has(X86_FEATURE_RTM)) {
7309
7310
7311
7312
7313
7314 tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
7315 if (tsx_ctrl)
7316 tsx_ctrl->mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
7317 }
7318
7319 err = alloc_loaded_vmcs(&vmx->vmcs01);
7320 if (err < 0)
7321 goto free_pml;
7322
7323
7324
7325
7326
7327
7328
7329 if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
7330 (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
7331 struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
7332
7333 evmcs->hv_enlightenments_control.msr_bitmap = 1;
7334 }
7335
7336
7337 bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
7338 bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
7339
7340 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
7341 #ifdef CONFIG_X86_64
7342 vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
7343 vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
7344 vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
7345 #endif
7346 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
7347 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
7348 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
7349 if (kvm_cstate_in_guest(vcpu->kvm)) {
7350 vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
7351 vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
7352 vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
7353 vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
7354 }
7355
7356 vmx->loaded_vmcs = &vmx->vmcs01;
7357
7358 if (cpu_need_virtualize_apic_accesses(vcpu)) {
7359 err = alloc_apic_access_page(vcpu->kvm);
7360 if (err)
7361 goto free_vmcs;
7362 }
7363
7364 if (enable_ept && !enable_unrestricted_guest) {
7365 err = init_rmode_identity_map(vcpu->kvm);
7366 if (err)
7367 goto free_vmcs;
7368 }
7369
7370 if (vmx_can_use_ipiv(vcpu))
7371 WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id],
7372 __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
7373
7374 return 0;
7375
7376 free_vmcs:
7377 free_loaded_vmcs(vmx->loaded_vmcs);
7378 free_pml:
7379 vmx_destroy_pml_buffer(vmx);
7380 free_vpid:
7381 free_vpid(vmx->vpid);
7382 return err;
7383 }
7384
7385 #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7386 #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7387
7388 static int vmx_vm_init(struct kvm *kvm)
7389 {
7390 if (!ple_gap)
7391 kvm->arch.pause_in_guest = true;
7392
7393 if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
7394 switch (l1tf_mitigation) {
7395 case L1TF_MITIGATION_OFF:
7396 case L1TF_MITIGATION_FLUSH_NOWARN:
7397
7398 break;
7399 case L1TF_MITIGATION_FLUSH:
7400 case L1TF_MITIGATION_FLUSH_NOSMT:
7401 case L1TF_MITIGATION_FULL:
7402
7403
7404
7405
7406 if (sched_smt_active())
7407 pr_warn_once(L1TF_MSG_SMT);
7408 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
7409 pr_warn_once(L1TF_MSG_L1D);
7410 break;
7411 case L1TF_MITIGATION_FULL_FORCE:
7412
7413 break;
7414 }
7415 }
7416 return 0;
7417 }
7418
7419 static int __init vmx_check_processor_compat(void)
7420 {
7421 struct vmcs_config vmcs_conf;
7422 struct vmx_capability vmx_cap;
7423
7424 if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
7425 !this_cpu_has(X86_FEATURE_VMX)) {
7426 pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id());
7427 return -EIO;
7428 }
7429
7430 if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
7431 return -EIO;
7432 if (nested)
7433 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
7434 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
7435 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
7436 smp_processor_id());
7437 return -EIO;
7438 }
7439 return 0;
7440 }
7441
7442 static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7443 {
7444 u8 cache;
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464 if (is_mmio)
7465 return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
7466
7467 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
7468 return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
7469
7470 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
7471 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
7472 cache = MTRR_TYPE_WRBACK;
7473 else
7474 cache = MTRR_TYPE_UNCACHABLE;
7475
7476 return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
7477 }
7478
7479 return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;
7480 }
7481
7482 static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
7483 {
7484
7485
7486
7487
7488
7489
7490 u32 mask =
7491 SECONDARY_EXEC_SHADOW_VMCS |
7492 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
7493 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
7494 SECONDARY_EXEC_DESC;
7495
7496 u32 cur_ctl = secondary_exec_controls_get(vmx);
7497
7498 secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
7499 }
7500
7501
7502
7503
7504
7505 static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
7506 {
7507 struct vcpu_vmx *vmx = to_vmx(vcpu);
7508 struct kvm_cpuid_entry2 *entry;
7509
7510 vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
7511 vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
7512
7513 #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
7514 if (entry && (entry->_reg & (_cpuid_mask))) \
7515 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
7516 } while (0)
7517
7518 entry = kvm_find_cpuid_entry(vcpu, 0x1);
7519 cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME));
7520 cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME));
7521 cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC));
7522 cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE));
7523 cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE));
7524 cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE));
7525 cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE));
7526 cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE));
7527 cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR));
7528 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM));
7529 cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX));
7530 cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX));
7531 cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID));
7532 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE));
7533
7534 entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0);
7535 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE));
7536 cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP));
7537 cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP));
7538 cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU));
7539 cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
7540 cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));
7541
7542 #undef cr4_fixed1_update
7543 }
7544
7545 static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
7546 {
7547 struct vcpu_vmx *vmx = to_vmx(vcpu);
7548 struct kvm_cpuid_entry2 *best = NULL;
7549 int i;
7550
7551 for (i = 0; i < PT_CPUID_LEAVES; i++) {
7552 best = kvm_find_cpuid_entry_index(vcpu, 0x14, i);
7553 if (!best)
7554 return;
7555 vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
7556 vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
7557 vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
7558 vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
7559 }
7560
7561
7562 vmx->pt_desc.num_address_ranges = intel_pt_validate_cap(vmx->pt_desc.caps,
7563 PT_CAP_num_address_ranges);
7564
7565
7566 vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
7567 RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC |
7568 RTIT_CTL_BRANCH_EN);
7569
7570
7571
7572
7573
7574 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
7575 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
7576
7577
7578
7579
7580
7581 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
7582 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
7583 RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
7584
7585
7586
7587
7588 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
7589 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
7590 RTIT_CTL_MTC_RANGE);
7591
7592
7593 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
7594 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
7595 RTIT_CTL_PTW_EN);
7596
7597
7598 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
7599 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
7600
7601
7602 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
7603 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
7604
7605
7606 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
7607 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
7608
7609
7610 for (i = 0; i < vmx->pt_desc.num_address_ranges; i++)
7611 vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
7612 }
7613
7614 static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
7615 {
7616 struct vcpu_vmx *vmx = to_vmx(vcpu);
7617
7618
7619 vcpu->arch.xsaves_enabled = false;
7620
7621 vmx_setup_uret_msrs(vmx);
7622
7623 if (cpu_has_secondary_exec_ctrls())
7624 vmcs_set_secondary_exec_control(vmx,
7625 vmx_secondary_exec_control(vmx));
7626
7627 if (nested_vmx_allowed(vcpu))
7628 vmx->msr_ia32_feature_control_valid_bits |=
7629 FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
7630 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
7631 else
7632 vmx->msr_ia32_feature_control_valid_bits &=
7633 ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
7634 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
7635
7636 if (nested_vmx_allowed(vcpu))
7637 nested_vmx_cr_fixed1_bits_update(vcpu);
7638
7639 if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
7640 guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
7641 update_intel_pt_cfg(vcpu);
7642
7643 if (boot_cpu_has(X86_FEATURE_RTM)) {
7644 struct vmx_uret_msr *msr;
7645 msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
7646 if (msr) {
7647 bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
7648 vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
7649 }
7650 }
7651
7652 if (kvm_cpu_cap_has(X86_FEATURE_XFD))
7653 vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
7654 !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
7655
7656
7657 set_cr4_guest_host_mask(vmx);
7658
7659 vmx_write_encls_bitmap(vcpu, NULL);
7660 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
7661 vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
7662 else
7663 vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
7664
7665 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
7666 vmx->msr_ia32_feature_control_valid_bits |=
7667 FEAT_CTL_SGX_LC_ENABLED;
7668 else
7669 vmx->msr_ia32_feature_control_valid_bits &=
7670 ~FEAT_CTL_SGX_LC_ENABLED;
7671
7672
7673 vmx_update_exception_bitmap(vcpu);
7674 }
7675
7676 static __init void vmx_set_cpu_caps(void)
7677 {
7678 kvm_set_cpu_caps();
7679
7680
7681 if (nested)
7682 kvm_cpu_cap_set(X86_FEATURE_VMX);
7683
7684
7685 if (kvm_mpx_supported())
7686 kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
7687 if (!cpu_has_vmx_invpcid())
7688 kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
7689 if (vmx_pt_mode_is_host_guest())
7690 kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
7691 if (vmx_pebs_supported()) {
7692 kvm_cpu_cap_check_and_set(X86_FEATURE_DS);
7693 kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64);
7694 }
7695
7696 if (!enable_pmu)
7697 kvm_cpu_cap_clear(X86_FEATURE_PDCM);
7698
7699 if (!enable_sgx) {
7700 kvm_cpu_cap_clear(X86_FEATURE_SGX);
7701 kvm_cpu_cap_clear(X86_FEATURE_SGX_LC);
7702 kvm_cpu_cap_clear(X86_FEATURE_SGX1);
7703 kvm_cpu_cap_clear(X86_FEATURE_SGX2);
7704 }
7705
7706 if (vmx_umip_emulated())
7707 kvm_cpu_cap_set(X86_FEATURE_UMIP);
7708
7709
7710 kvm_caps.supported_xss = 0;
7711 if (!cpu_has_vmx_xsaves())
7712 kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
7713
7714
7715 if (!cpu_has_vmx_rdtscp()) {
7716 kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
7717 kvm_cpu_cap_clear(X86_FEATURE_RDPID);
7718 }
7719
7720 if (cpu_has_vmx_waitpkg())
7721 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
7722 }
7723
7724 static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
7725 {
7726 to_vmx(vcpu)->req_immediate_exit = true;
7727 }
7728
7729 static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
7730 struct x86_instruction_info *info)
7731 {
7732 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7733 unsigned short port;
7734 bool intercept;
7735 int size;
7736
7737 if (info->intercept == x86_intercept_in ||
7738 info->intercept == x86_intercept_ins) {
7739 port = info->src_val;
7740 size = info->dst_bytes;
7741 } else {
7742 port = info->dst_val;
7743 size = info->src_bytes;
7744 }
7745
7746
7747
7748
7749
7750
7751
7752
7753 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
7754 intercept = nested_cpu_has(vmcs12,
7755 CPU_BASED_UNCOND_IO_EXITING);
7756 else
7757 intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
7758
7759
7760 return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
7761 }
7762
7763 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
7764 struct x86_instruction_info *info,
7765 enum x86_intercept_stage stage,
7766 struct x86_exception *exception)
7767 {
7768 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7769
7770 switch (info->intercept) {
7771
7772
7773
7774
7775
7776 case x86_intercept_rdpid:
7777 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
7778 exception->vector = UD_VECTOR;
7779 exception->error_code_valid = false;
7780 return X86EMUL_PROPAGATE_FAULT;
7781 }
7782 break;
7783
7784 case x86_intercept_in:
7785 case x86_intercept_ins:
7786 case x86_intercept_out:
7787 case x86_intercept_outs:
7788 return vmx_check_intercept_io(vcpu, info);
7789
7790 case x86_intercept_lgdt:
7791 case x86_intercept_lidt:
7792 case x86_intercept_lldt:
7793 case x86_intercept_ltr:
7794 case x86_intercept_sgdt:
7795 case x86_intercept_sidt:
7796 case x86_intercept_sldt:
7797 case x86_intercept_str:
7798 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
7799 return X86EMUL_CONTINUE;
7800
7801
7802 break;
7803
7804
7805 default:
7806 break;
7807 }
7808
7809 return X86EMUL_UNHANDLEABLE;
7810 }
7811
7812 #ifdef CONFIG_X86_64
7813
7814 static inline int u64_shl_div_u64(u64 a, unsigned int shift,
7815 u64 divisor, u64 *result)
7816 {
7817 u64 low = a << shift, high = a >> (64 - shift);
7818
7819
7820 if (high >= divisor)
7821 return 1;
7822
7823
7824 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
7825 "rm" (divisor), "0" (low), "1" (high));
7826 *result = low;
7827
7828 return 0;
7829 }
7830
7831 static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
7832 bool *expired)
7833 {
7834 struct vcpu_vmx *vmx;
7835 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7836 struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7837
7838 vmx = to_vmx(vcpu);
7839 tscl = rdtsc();
7840 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
7841 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
7842 lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
7843 ktimer->timer_advance_ns);
7844
7845 if (delta_tsc > lapic_timer_advance_cycles)
7846 delta_tsc -= lapic_timer_advance_cycles;
7847 else
7848 delta_tsc = 0;
7849
7850
7851 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio &&
7852 delta_tsc && u64_shl_div_u64(delta_tsc,
7853 kvm_caps.tsc_scaling_ratio_frac_bits,
7854 vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc))
7855 return -ERANGE;
7856
7857
7858
7859
7860
7861
7862
7863 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
7864 return -ERANGE;
7865
7866 vmx->hv_deadline_tsc = tscl + delta_tsc;
7867 *expired = !delta_tsc;
7868 return 0;
7869 }
7870
7871 static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
7872 {
7873 to_vmx(vcpu)->hv_deadline_tsc = -1;
7874 }
7875 #endif
7876
7877 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
7878 {
7879 if (!kvm_pause_in_guest(vcpu->kvm))
7880 shrink_ple_window(vcpu);
7881 }
7882
7883 void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
7884 {
7885 struct vcpu_vmx *vmx = to_vmx(vcpu);
7886
7887 if (is_guest_mode(vcpu)) {
7888 vmx->nested.update_vmcs01_cpu_dirty_logging = true;
7889 return;
7890 }
7891
7892
7893
7894
7895
7896
7897 if (vcpu->kvm->arch.cpu_dirty_logging_count)
7898 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
7899 else
7900 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
7901 }
7902
7903 static void vmx_setup_mce(struct kvm_vcpu *vcpu)
7904 {
7905 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
7906 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7907 FEAT_CTL_LMCE_ENABLED;
7908 else
7909 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7910 ~FEAT_CTL_LMCE_ENABLED;
7911 }
7912
7913 static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
7914 {
7915
7916 if (to_vmx(vcpu)->nested.nested_run_pending)
7917 return -EBUSY;
7918 return !is_smm(vcpu);
7919 }
7920
7921 static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
7922 {
7923 struct vcpu_vmx *vmx = to_vmx(vcpu);
7924
7925
7926
7927
7928
7929
7930
7931
7932 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
7933 if (vmx->nested.smm.guest_mode)
7934 nested_vmx_vmexit(vcpu, -1, 0, 0);
7935
7936 vmx->nested.smm.vmxon = vmx->nested.vmxon;
7937 vmx->nested.vmxon = false;
7938 vmx_clear_hlt(vcpu);
7939 return 0;
7940 }
7941
7942 static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
7943 {
7944 struct vcpu_vmx *vmx = to_vmx(vcpu);
7945 int ret;
7946
7947 if (vmx->nested.smm.vmxon) {
7948 vmx->nested.vmxon = true;
7949 vmx->nested.smm.vmxon = false;
7950 }
7951
7952 if (vmx->nested.smm.guest_mode) {
7953 ret = nested_vmx_enter_non_root_mode(vcpu, false);
7954 if (ret)
7955 return ret;
7956
7957 vmx->nested.nested_run_pending = 1;
7958 vmx->nested.smm.guest_mode = false;
7959 }
7960 return 0;
7961 }
7962
7963 static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
7964 {
7965
7966 }
7967
7968 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
7969 {
7970 return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu);
7971 }
7972
7973 static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
7974 {
7975 if (is_guest_mode(vcpu)) {
7976 struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer;
7977
7978 if (hrtimer_try_to_cancel(timer) == 1)
7979 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
7980 }
7981 }
7982
7983 static void vmx_hardware_unsetup(void)
7984 {
7985 kvm_set_posted_intr_wakeup_handler(NULL);
7986
7987 if (nested)
7988 nested_vmx_hardware_unsetup();
7989
7990 free_kvm_area();
7991 }
7992
7993 static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
7994 {
7995 ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
7996 BIT(APICV_INHIBIT_REASON_ABSENT) |
7997 BIT(APICV_INHIBIT_REASON_HYPERV) |
7998 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
7999 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
8000 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
8001
8002 return supported & BIT(reason);
8003 }
8004
8005 static void vmx_vm_destroy(struct kvm *kvm)
8006 {
8007 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
8008
8009 free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
8010 }
8011
8012 static struct kvm_x86_ops vmx_x86_ops __initdata = {
8013 .name = "kvm_intel",
8014
8015 .hardware_unsetup = vmx_hardware_unsetup,
8016
8017 .hardware_enable = vmx_hardware_enable,
8018 .hardware_disable = vmx_hardware_disable,
8019 .has_emulated_msr = vmx_has_emulated_msr,
8020
8021 .vm_size = sizeof(struct kvm_vmx),
8022 .vm_init = vmx_vm_init,
8023 .vm_destroy = vmx_vm_destroy,
8024
8025 .vcpu_precreate = vmx_vcpu_precreate,
8026 .vcpu_create = vmx_vcpu_create,
8027 .vcpu_free = vmx_vcpu_free,
8028 .vcpu_reset = vmx_vcpu_reset,
8029
8030 .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
8031 .vcpu_load = vmx_vcpu_load,
8032 .vcpu_put = vmx_vcpu_put,
8033
8034 .update_exception_bitmap = vmx_update_exception_bitmap,
8035 .get_msr_feature = vmx_get_msr_feature,
8036 .get_msr = vmx_get_msr,
8037 .set_msr = vmx_set_msr,
8038 .get_segment_base = vmx_get_segment_base,
8039 .get_segment = vmx_get_segment,
8040 .set_segment = vmx_set_segment,
8041 .get_cpl = vmx_get_cpl,
8042 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
8043 .set_cr0 = vmx_set_cr0,
8044 .is_valid_cr4 = vmx_is_valid_cr4,
8045 .set_cr4 = vmx_set_cr4,
8046 .set_efer = vmx_set_efer,
8047 .get_idt = vmx_get_idt,
8048 .set_idt = vmx_set_idt,
8049 .get_gdt = vmx_get_gdt,
8050 .set_gdt = vmx_set_gdt,
8051 .set_dr7 = vmx_set_dr7,
8052 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
8053 .cache_reg = vmx_cache_reg,
8054 .get_rflags = vmx_get_rflags,
8055 .set_rflags = vmx_set_rflags,
8056 .get_if_flag = vmx_get_if_flag,
8057
8058 .flush_tlb_all = vmx_flush_tlb_all,
8059 .flush_tlb_current = vmx_flush_tlb_current,
8060 .flush_tlb_gva = vmx_flush_tlb_gva,
8061 .flush_tlb_guest = vmx_flush_tlb_guest,
8062
8063 .vcpu_pre_run = vmx_vcpu_pre_run,
8064 .vcpu_run = vmx_vcpu_run,
8065 .handle_exit = vmx_handle_exit,
8066 .skip_emulated_instruction = vmx_skip_emulated_instruction,
8067 .update_emulated_instruction = vmx_update_emulated_instruction,
8068 .set_interrupt_shadow = vmx_set_interrupt_shadow,
8069 .get_interrupt_shadow = vmx_get_interrupt_shadow,
8070 .patch_hypercall = vmx_patch_hypercall,
8071 .inject_irq = vmx_inject_irq,
8072 .inject_nmi = vmx_inject_nmi,
8073 .queue_exception = vmx_queue_exception,
8074 .cancel_injection = vmx_cancel_injection,
8075 .interrupt_allowed = vmx_interrupt_allowed,
8076 .nmi_allowed = vmx_nmi_allowed,
8077 .get_nmi_mask = vmx_get_nmi_mask,
8078 .set_nmi_mask = vmx_set_nmi_mask,
8079 .enable_nmi_window = vmx_enable_nmi_window,
8080 .enable_irq_window = vmx_enable_irq_window,
8081 .update_cr8_intercept = vmx_update_cr8_intercept,
8082 .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
8083 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
8084 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
8085 .load_eoi_exitmap = vmx_load_eoi_exitmap,
8086 .apicv_post_state_restore = vmx_apicv_post_state_restore,
8087 .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
8088 .hwapic_irr_update = vmx_hwapic_irr_update,
8089 .hwapic_isr_update = vmx_hwapic_isr_update,
8090 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
8091 .sync_pir_to_irr = vmx_sync_pir_to_irr,
8092 .deliver_interrupt = vmx_deliver_interrupt,
8093 .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
8094
8095 .set_tss_addr = vmx_set_tss_addr,
8096 .set_identity_map_addr = vmx_set_identity_map_addr,
8097 .get_mt_mask = vmx_get_mt_mask,
8098
8099 .get_exit_info = vmx_get_exit_info,
8100
8101 .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
8102
8103 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
8104
8105 .get_l2_tsc_offset = vmx_get_l2_tsc_offset,
8106 .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
8107 .write_tsc_offset = vmx_write_tsc_offset,
8108 .write_tsc_multiplier = vmx_write_tsc_multiplier,
8109
8110 .load_mmu_pgd = vmx_load_mmu_pgd,
8111
8112 .check_intercept = vmx_check_intercept,
8113 .handle_exit_irqoff = vmx_handle_exit_irqoff,
8114
8115 .request_immediate_exit = vmx_request_immediate_exit,
8116
8117 .sched_in = vmx_sched_in,
8118
8119 .cpu_dirty_log_size = PML_ENTITY_NUM,
8120 .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
8121
8122 .nested_ops = &vmx_nested_ops,
8123
8124 .pi_update_irte = vmx_pi_update_irte,
8125 .pi_start_assignment = vmx_pi_start_assignment,
8126
8127 #ifdef CONFIG_X86_64
8128 .set_hv_timer = vmx_set_hv_timer,
8129 .cancel_hv_timer = vmx_cancel_hv_timer,
8130 #endif
8131
8132 .setup_mce = vmx_setup_mce,
8133
8134 .smi_allowed = vmx_smi_allowed,
8135 .enter_smm = vmx_enter_smm,
8136 .leave_smm = vmx_leave_smm,
8137 .enable_smi_window = vmx_enable_smi_window,
8138
8139 .can_emulate_instruction = vmx_can_emulate_instruction,
8140 .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
8141 .migrate_timers = vmx_migrate_timers,
8142
8143 .msr_filter_changed = vmx_msr_filter_changed,
8144 .complete_emulated_msr = kvm_complete_insn_gp,
8145
8146 .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
8147 };
8148
8149 static unsigned int vmx_handle_intel_pt_intr(void)
8150 {
8151 struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
8152
8153
8154 if (!vcpu || !kvm_handling_nmi_from_guest(vcpu))
8155 return 0;
8156
8157 kvm_make_request(KVM_REQ_PMI, vcpu);
8158 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
8159 (unsigned long *)&vcpu->arch.pmu.global_status);
8160 return 1;
8161 }
8162
8163 static __init void vmx_setup_user_return_msrs(void)
8164 {
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174 const u32 vmx_uret_msrs_list[] = {
8175 #ifdef CONFIG_X86_64
8176 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
8177 #endif
8178 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
8179 MSR_IA32_TSX_CTRL,
8180 };
8181 int i;
8182
8183 BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
8184
8185 for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
8186 kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
8187 }
8188
8189 static void __init vmx_setup_me_spte_mask(void)
8190 {
8191 u64 me_mask = 0;
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203 if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits())
8204 me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
8205 kvm_get_shadow_phys_bits() - 1);
8206
8207
8208
8209
8210
8211 kvm_mmu_set_me_spte_mask(0, me_mask);
8212 }
8213
8214 static struct kvm_x86_init_ops vmx_init_ops __initdata;
8215
8216 static __init int hardware_setup(void)
8217 {
8218 unsigned long host_bndcfgs;
8219 struct desc_ptr dt;
8220 int r;
8221
8222 store_idt(&dt);
8223 host_idt_base = dt.address;
8224
8225 vmx_setup_user_return_msrs();
8226
8227 if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
8228 return -EIO;
8229
8230 if (boot_cpu_has(X86_FEATURE_NX))
8231 kvm_enable_efer_bits(EFER_NX);
8232
8233 if (boot_cpu_has(X86_FEATURE_MPX)) {
8234 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
8235 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
8236 }
8237
8238 if (!cpu_has_vmx_mpx())
8239 kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
8240 XFEATURE_MASK_BNDCSR);
8241
8242 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
8243 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
8244 enable_vpid = 0;
8245
8246 if (!cpu_has_vmx_ept() ||
8247 !cpu_has_vmx_ept_4levels() ||
8248 !cpu_has_vmx_ept_mt_wb() ||
8249 !cpu_has_vmx_invept_global())
8250 enable_ept = 0;
8251
8252
8253 if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
8254 pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n");
8255 return -EOPNOTSUPP;
8256 }
8257
8258 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
8259 enable_ept_ad_bits = 0;
8260
8261 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
8262 enable_unrestricted_guest = 0;
8263
8264 if (!cpu_has_vmx_flexpriority())
8265 flexpriority_enabled = 0;
8266
8267 if (!cpu_has_virtual_nmis())
8268 enable_vnmi = 0;
8269
8270
8271
8272
8273
8274
8275 if (!flexpriority_enabled)
8276 vmx_x86_ops.set_apic_access_page_addr = NULL;
8277
8278 if (!cpu_has_vmx_tpr_shadow())
8279 vmx_x86_ops.update_cr8_intercept = NULL;
8280
8281 #if IS_ENABLED(CONFIG_HYPERV)
8282 if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
8283 && enable_ept) {
8284 vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
8285 vmx_x86_ops.tlb_remote_flush_with_range =
8286 hv_remote_flush_tlb_with_range;
8287 }
8288 #endif
8289
8290 if (!cpu_has_vmx_ple()) {
8291 ple_gap = 0;
8292 ple_window = 0;
8293 ple_window_grow = 0;
8294 ple_window_max = 0;
8295 ple_window_shrink = 0;
8296 }
8297
8298 if (!cpu_has_vmx_apicv())
8299 enable_apicv = 0;
8300 if (!enable_apicv)
8301 vmx_x86_ops.sync_pir_to_irr = NULL;
8302
8303 if (!enable_apicv || !cpu_has_vmx_ipiv())
8304 enable_ipiv = false;
8305
8306 if (cpu_has_vmx_tsc_scaling())
8307 kvm_caps.has_tsc_control = true;
8308
8309 kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
8310 kvm_caps.tsc_scaling_ratio_frac_bits = 48;
8311 kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
8312 kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
8313
8314 set_bit(0, vmx_vpid_bitmap);
8315
8316 if (enable_ept)
8317 kvm_mmu_set_ept_masks(enable_ept_ad_bits,
8318 cpu_has_vmx_ept_execute_only());
8319
8320
8321
8322
8323
8324 vmx_setup_me_spte_mask();
8325
8326 kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
8327 ept_caps_to_lpage_level(vmx_capability.ept));
8328
8329
8330
8331
8332
8333 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
8334 enable_pml = 0;
8335
8336 if (!enable_pml)
8337 vmx_x86_ops.cpu_dirty_log_size = 0;
8338
8339 if (!cpu_has_vmx_preemption_timer())
8340 enable_preemption_timer = false;
8341
8342 if (enable_preemption_timer) {
8343 u64 use_timer_freq = 5000ULL * 1000 * 1000;
8344 u64 vmx_msr;
8345
8346 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
8347 cpu_preemption_timer_multi =
8348 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
8349
8350 if (tsc_khz)
8351 use_timer_freq = (u64)tsc_khz * 1000;
8352 use_timer_freq >>= cpu_preemption_timer_multi;
8353
8354
8355
8356
8357
8358
8359 if (use_timer_freq > 0xffffffffu / 10)
8360 enable_preemption_timer = false;
8361 }
8362
8363 if (!enable_preemption_timer) {
8364 vmx_x86_ops.set_hv_timer = NULL;
8365 vmx_x86_ops.cancel_hv_timer = NULL;
8366 vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
8367 }
8368
8369 kvm_caps.supported_mce_cap |= MCG_LMCE_P;
8370 kvm_caps.supported_mce_cap |= MCG_CMCI_P;
8371
8372 if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
8373 return -EINVAL;
8374 if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt())
8375 pt_mode = PT_MODE_SYSTEM;
8376 if (pt_mode == PT_MODE_HOST_GUEST)
8377 vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
8378 else
8379 vmx_init_ops.handle_intel_pt_intr = NULL;
8380
8381 setup_default_sgx_lepubkeyhash();
8382
8383 if (nested) {
8384 nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
8385 vmx_capability.ept);
8386
8387 r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
8388 if (r)
8389 return r;
8390 }
8391
8392 vmx_set_cpu_caps();
8393
8394 r = alloc_kvm_area();
8395 if (r && nested)
8396 nested_vmx_hardware_unsetup();
8397
8398 kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
8399
8400 return r;
8401 }
8402
8403 static struct kvm_x86_init_ops vmx_init_ops __initdata = {
8404 .cpu_has_kvm_support = cpu_has_kvm_support,
8405 .disabled_by_bios = vmx_disabled_by_bios,
8406 .check_processor_compatibility = vmx_check_processor_compat,
8407 .hardware_setup = hardware_setup,
8408 .handle_intel_pt_intr = NULL,
8409
8410 .runtime_ops = &vmx_x86_ops,
8411 .pmu_ops = &intel_pmu_ops,
8412 };
8413
8414 static void vmx_cleanup_l1d_flush(void)
8415 {
8416 if (vmx_l1d_flush_pages) {
8417 free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
8418 vmx_l1d_flush_pages = NULL;
8419 }
8420
8421 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
8422 }
8423
8424 static void vmx_exit(void)
8425 {
8426 #ifdef CONFIG_KEXEC_CORE
8427 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
8428 synchronize_rcu();
8429 #endif
8430
8431 kvm_exit();
8432
8433 #if IS_ENABLED(CONFIG_HYPERV)
8434 if (static_branch_unlikely(&enable_evmcs)) {
8435 int cpu;
8436 struct hv_vp_assist_page *vp_ap;
8437
8438
8439
8440
8441
8442 for_each_online_cpu(cpu) {
8443 vp_ap = hv_get_vp_assist_page(cpu);
8444
8445 if (!vp_ap)
8446 continue;
8447
8448 vp_ap->nested_control.features.directhypercall = 0;
8449 vp_ap->current_nested_vmcs = 0;
8450 vp_ap->enlighten_vmentry = 0;
8451 }
8452
8453 static_branch_disable(&enable_evmcs);
8454 }
8455 #endif
8456 vmx_cleanup_l1d_flush();
8457
8458 allow_smaller_maxphyaddr = false;
8459 }
8460 module_exit(vmx_exit);
8461
8462 static int __init vmx_init(void)
8463 {
8464 int r, cpu;
8465
8466 #if IS_ENABLED(CONFIG_HYPERV)
8467
8468
8469
8470
8471
8472 if (enlightened_vmcs &&
8473 ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
8474 (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
8475 KVM_EVMCS_VERSION) {
8476
8477
8478 for_each_online_cpu(cpu) {
8479 if (!hv_get_vp_assist_page(cpu)) {
8480 enlightened_vmcs = false;
8481 break;
8482 }
8483 }
8484
8485 if (enlightened_vmcs) {
8486 pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
8487 static_branch_enable(&enable_evmcs);
8488 }
8489
8490 if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
8491 vmx_x86_ops.enable_direct_tlbflush
8492 = hv_enable_direct_tlbflush;
8493
8494 } else {
8495 enlightened_vmcs = false;
8496 }
8497 #endif
8498
8499 r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
8500 __alignof__(struct vcpu_vmx), THIS_MODULE);
8501 if (r)
8502 return r;
8503
8504
8505
8506
8507
8508
8509
8510
8511 r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
8512 if (r) {
8513 vmx_exit();
8514 return r;
8515 }
8516
8517 vmx_setup_fb_clear_ctrl();
8518
8519 for_each_possible_cpu(cpu) {
8520 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
8521
8522 pi_init_cpu(cpu);
8523 }
8524
8525 #ifdef CONFIG_KEXEC_CORE
8526 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
8527 crash_vmclear_local_loaded_vmcss);
8528 #endif
8529 vmx_check_vmcs12_offsets();
8530
8531
8532
8533
8534
8535
8536 if (!enable_ept)
8537 allow_smaller_maxphyaddr = true;
8538
8539 return 0;
8540 }
8541 module_init(vmx_init);