0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 #include <linux/kvm_host.h>
0020 #include "irq.h"
0021 #include "ioapic.h"
0022 #include "mmu.h"
0023 #include "i8254.h"
0024 #include "tss.h"
0025 #include "kvm_cache_regs.h"
0026 #include "kvm_emulate.h"
0027 #include "x86.h"
0028 #include "cpuid.h"
0029 #include "pmu.h"
0030 #include "hyperv.h"
0031 #include "lapic.h"
0032 #include "xen.h"
0033
0034 #include <linux/clocksource.h>
0035 #include <linux/interrupt.h>
0036 #include <linux/kvm.h>
0037 #include <linux/fs.h>
0038 #include <linux/vmalloc.h>
0039 #include <linux/export.h>
0040 #include <linux/moduleparam.h>
0041 #include <linux/mman.h>
0042 #include <linux/highmem.h>
0043 #include <linux/iommu.h>
0044 #include <linux/cpufreq.h>
0045 #include <linux/user-return-notifier.h>
0046 #include <linux/srcu.h>
0047 #include <linux/slab.h>
0048 #include <linux/perf_event.h>
0049 #include <linux/uaccess.h>
0050 #include <linux/hash.h>
0051 #include <linux/pci.h>
0052 #include <linux/timekeeper_internal.h>
0053 #include <linux/pvclock_gtod.h>
0054 #include <linux/kvm_irqfd.h>
0055 #include <linux/irqbypass.h>
0056 #include <linux/sched/stat.h>
0057 #include <linux/sched/isolation.h>
0058 #include <linux/mem_encrypt.h>
0059 #include <linux/entry-kvm.h>
0060 #include <linux/suspend.h>
0061
0062 #include <trace/events/kvm.h>
0063
0064 #include <asm/debugreg.h>
0065 #include <asm/msr.h>
0066 #include <asm/desc.h>
0067 #include <asm/mce.h>
0068 #include <asm/pkru.h>
0069 #include <linux/kernel_stat.h>
0070 #include <asm/fpu/api.h>
0071 #include <asm/fpu/xcr.h>
0072 #include <asm/fpu/xstate.h>
0073 #include <asm/pvclock.h>
0074 #include <asm/div64.h>
0075 #include <asm/irq_remapping.h>
0076 #include <asm/mshyperv.h>
0077 #include <asm/hypervisor.h>
0078 #include <asm/tlbflush.h>
0079 #include <asm/intel_pt.h>
0080 #include <asm/emulate_prefix.h>
0081 #include <asm/sgx.h>
0082 #include <clocksource/hyperv_timer.h>
0083
0084 #define CREATE_TRACE_POINTS
0085 #include "trace.h"
0086
0087 #define MAX_IO_MSRS 256
0088 #define KVM_MAX_MCE_BANKS 32
0089
0090 struct kvm_caps kvm_caps __read_mostly = {
0091 .supported_mce_cap = MCG_CTL_P | MCG_SER_P,
0092 };
0093 EXPORT_SYMBOL_GPL(kvm_caps);
0094
0095 #define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
0096
0097 #define emul_to_vcpu(ctxt) \
0098 ((struct kvm_vcpu *)(ctxt)->vcpu)
0099
0100
0101
0102
0103
0104 #ifdef CONFIG_X86_64
0105 static
0106 u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
0107 #else
0108 static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
0109 #endif
0110
0111 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
0112
0113 #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
0114
0115 #define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
0116
0117 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
0118 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
0119
0120 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
0121 static void process_nmi(struct kvm_vcpu *vcpu);
0122 static void process_smi(struct kvm_vcpu *vcpu);
0123 static void enter_smm(struct kvm_vcpu *vcpu);
0124 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
0125 static void store_regs(struct kvm_vcpu *vcpu);
0126 static int sync_regs(struct kvm_vcpu *vcpu);
0127 static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
0128
0129 static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
0130 static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
0131
0132 struct kvm_x86_ops kvm_x86_ops __read_mostly;
0133
0134 #define KVM_X86_OP(func) \
0135 DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
0136 *(((struct kvm_x86_ops *)0)->func));
0137 #define KVM_X86_OP_OPTIONAL KVM_X86_OP
0138 #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
0139 #include <asm/kvm-x86-ops.h>
0140 EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
0141 EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
0142
0143 static bool __read_mostly ignore_msrs = 0;
0144 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
0145
0146 bool __read_mostly report_ignored_msrs = true;
0147 module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
0148 EXPORT_SYMBOL_GPL(report_ignored_msrs);
0149
0150 unsigned int min_timer_period_us = 200;
0151 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
0152
0153 static bool __read_mostly kvmclock_periodic_sync = true;
0154 module_param(kvmclock_periodic_sync, bool, S_IRUGO);
0155
0156
0157 static u32 __read_mostly tsc_tolerance_ppm = 250;
0158 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
0159
0160
0161
0162
0163
0164
0165
0166 static int __read_mostly lapic_timer_advance_ns = -1;
0167 module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
0168
0169 static bool __read_mostly vector_hashing = true;
0170 module_param(vector_hashing, bool, S_IRUGO);
0171
0172 bool __read_mostly enable_vmware_backdoor = false;
0173 module_param(enable_vmware_backdoor, bool, S_IRUGO);
0174 EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
0175
0176 static bool __read_mostly force_emulation_prefix = false;
0177 module_param(force_emulation_prefix, bool, S_IRUGO);
0178
0179 int __read_mostly pi_inject_timer = -1;
0180 module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
0181
0182
0183 bool __read_mostly enable_pmu = true;
0184 EXPORT_SYMBOL_GPL(enable_pmu);
0185 module_param(enable_pmu, bool, 0444);
0186
0187 bool __read_mostly eager_page_split = true;
0188 module_param(eager_page_split, bool, 0644);
0189
0190
0191
0192
0193
0194
0195 #define KVM_MAX_NR_USER_RETURN_MSRS 16
0196
0197 struct kvm_user_return_msrs {
0198 struct user_return_notifier urn;
0199 bool registered;
0200 struct kvm_user_return_msr_values {
0201 u64 host;
0202 u64 curr;
0203 } values[KVM_MAX_NR_USER_RETURN_MSRS];
0204 };
0205
0206 u32 __read_mostly kvm_nr_uret_msrs;
0207 EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
0208 static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
0209 static struct kvm_user_return_msrs __percpu *user_return_msrs;
0210
0211 #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
0212 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
0213 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
0214 | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
0215
0216 u64 __read_mostly host_efer;
0217 EXPORT_SYMBOL_GPL(host_efer);
0218
0219 bool __read_mostly allow_smaller_maxphyaddr = 0;
0220 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
0221
0222 bool __read_mostly enable_apicv = true;
0223 EXPORT_SYMBOL_GPL(enable_apicv);
0224
0225 u64 __read_mostly host_xss;
0226 EXPORT_SYMBOL_GPL(host_xss);
0227
0228 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
0229 KVM_GENERIC_VM_STATS(),
0230 STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
0231 STATS_DESC_COUNTER(VM, mmu_pte_write),
0232 STATS_DESC_COUNTER(VM, mmu_pde_zapped),
0233 STATS_DESC_COUNTER(VM, mmu_flooded),
0234 STATS_DESC_COUNTER(VM, mmu_recycled),
0235 STATS_DESC_COUNTER(VM, mmu_cache_miss),
0236 STATS_DESC_ICOUNTER(VM, mmu_unsync),
0237 STATS_DESC_ICOUNTER(VM, pages_4k),
0238 STATS_DESC_ICOUNTER(VM, pages_2m),
0239 STATS_DESC_ICOUNTER(VM, pages_1g),
0240 STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
0241 STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
0242 STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
0243 };
0244
0245 const struct kvm_stats_header kvm_vm_stats_header = {
0246 .name_size = KVM_STATS_NAME_SIZE,
0247 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
0248 .id_offset = sizeof(struct kvm_stats_header),
0249 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
0250 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
0251 sizeof(kvm_vm_stats_desc),
0252 };
0253
0254 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
0255 KVM_GENERIC_VCPU_STATS(),
0256 STATS_DESC_COUNTER(VCPU, pf_taken),
0257 STATS_DESC_COUNTER(VCPU, pf_fixed),
0258 STATS_DESC_COUNTER(VCPU, pf_emulate),
0259 STATS_DESC_COUNTER(VCPU, pf_spurious),
0260 STATS_DESC_COUNTER(VCPU, pf_fast),
0261 STATS_DESC_COUNTER(VCPU, pf_mmio_spte_created),
0262 STATS_DESC_COUNTER(VCPU, pf_guest),
0263 STATS_DESC_COUNTER(VCPU, tlb_flush),
0264 STATS_DESC_COUNTER(VCPU, invlpg),
0265 STATS_DESC_COUNTER(VCPU, exits),
0266 STATS_DESC_COUNTER(VCPU, io_exits),
0267 STATS_DESC_COUNTER(VCPU, mmio_exits),
0268 STATS_DESC_COUNTER(VCPU, signal_exits),
0269 STATS_DESC_COUNTER(VCPU, irq_window_exits),
0270 STATS_DESC_COUNTER(VCPU, nmi_window_exits),
0271 STATS_DESC_COUNTER(VCPU, l1d_flush),
0272 STATS_DESC_COUNTER(VCPU, halt_exits),
0273 STATS_DESC_COUNTER(VCPU, request_irq_exits),
0274 STATS_DESC_COUNTER(VCPU, irq_exits),
0275 STATS_DESC_COUNTER(VCPU, host_state_reload),
0276 STATS_DESC_COUNTER(VCPU, fpu_reload),
0277 STATS_DESC_COUNTER(VCPU, insn_emulation),
0278 STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
0279 STATS_DESC_COUNTER(VCPU, hypercalls),
0280 STATS_DESC_COUNTER(VCPU, irq_injections),
0281 STATS_DESC_COUNTER(VCPU, nmi_injections),
0282 STATS_DESC_COUNTER(VCPU, req_event),
0283 STATS_DESC_COUNTER(VCPU, nested_run),
0284 STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
0285 STATS_DESC_COUNTER(VCPU, directed_yield_successful),
0286 STATS_DESC_COUNTER(VCPU, preemption_reported),
0287 STATS_DESC_COUNTER(VCPU, preemption_other),
0288 STATS_DESC_IBOOLEAN(VCPU, guest_mode),
0289 STATS_DESC_COUNTER(VCPU, notify_window_exits),
0290 };
0291
0292 const struct kvm_stats_header kvm_vcpu_stats_header = {
0293 .name_size = KVM_STATS_NAME_SIZE,
0294 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
0295 .id_offset = sizeof(struct kvm_stats_header),
0296 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
0297 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
0298 sizeof(kvm_vcpu_stats_desc),
0299 };
0300
0301 u64 __read_mostly host_xcr0;
0302
0303 static struct kmem_cache *x86_emulator_cache;
0304
0305
0306
0307
0308
0309 static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
0310 {
0311 const char *op = write ? "wrmsr" : "rdmsr";
0312
0313 if (ignore_msrs) {
0314 if (report_ignored_msrs)
0315 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
0316 op, msr, data);
0317
0318 return true;
0319 } else {
0320 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
0321 op, msr, data);
0322 return false;
0323 }
0324 }
0325
0326 static struct kmem_cache *kvm_alloc_emulator_cache(void)
0327 {
0328 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
0329 unsigned int size = sizeof(struct x86_emulate_ctxt);
0330
0331 return kmem_cache_create_usercopy("x86_emulator", size,
0332 __alignof__(struct x86_emulate_ctxt),
0333 SLAB_ACCOUNT, useroffset,
0334 size - useroffset, NULL);
0335 }
0336
0337 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
0338
0339 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
0340 {
0341 int i;
0342 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
0343 vcpu->arch.apf.gfns[i] = ~0;
0344 }
0345
0346 static void kvm_on_user_return(struct user_return_notifier *urn)
0347 {
0348 unsigned slot;
0349 struct kvm_user_return_msrs *msrs
0350 = container_of(urn, struct kvm_user_return_msrs, urn);
0351 struct kvm_user_return_msr_values *values;
0352 unsigned long flags;
0353
0354
0355
0356
0357
0358 local_irq_save(flags);
0359 if (msrs->registered) {
0360 msrs->registered = false;
0361 user_return_notifier_unregister(urn);
0362 }
0363 local_irq_restore(flags);
0364 for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
0365 values = &msrs->values[slot];
0366 if (values->host != values->curr) {
0367 wrmsrl(kvm_uret_msrs_list[slot], values->host);
0368 values->curr = values->host;
0369 }
0370 }
0371 }
0372
0373 static int kvm_probe_user_return_msr(u32 msr)
0374 {
0375 u64 val;
0376 int ret;
0377
0378 preempt_disable();
0379 ret = rdmsrl_safe(msr, &val);
0380 if (ret)
0381 goto out;
0382 ret = wrmsrl_safe(msr, val);
0383 out:
0384 preempt_enable();
0385 return ret;
0386 }
0387
0388 int kvm_add_user_return_msr(u32 msr)
0389 {
0390 BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
0391
0392 if (kvm_probe_user_return_msr(msr))
0393 return -1;
0394
0395 kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
0396 return kvm_nr_uret_msrs++;
0397 }
0398 EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
0399
0400 int kvm_find_user_return_msr(u32 msr)
0401 {
0402 int i;
0403
0404 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
0405 if (kvm_uret_msrs_list[i] == msr)
0406 return i;
0407 }
0408 return -1;
0409 }
0410 EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
0411
0412 static void kvm_user_return_msr_cpu_online(void)
0413 {
0414 unsigned int cpu = smp_processor_id();
0415 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
0416 u64 value;
0417 int i;
0418
0419 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
0420 rdmsrl_safe(kvm_uret_msrs_list[i], &value);
0421 msrs->values[i].host = value;
0422 msrs->values[i].curr = value;
0423 }
0424 }
0425
0426 int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
0427 {
0428 unsigned int cpu = smp_processor_id();
0429 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
0430 int err;
0431
0432 value = (value & mask) | (msrs->values[slot].host & ~mask);
0433 if (value == msrs->values[slot].curr)
0434 return 0;
0435 err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
0436 if (err)
0437 return 1;
0438
0439 msrs->values[slot].curr = value;
0440 if (!msrs->registered) {
0441 msrs->urn.on_user_return = kvm_on_user_return;
0442 user_return_notifier_register(&msrs->urn);
0443 msrs->registered = true;
0444 }
0445 return 0;
0446 }
0447 EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
0448
0449 static void drop_user_return_notifiers(void)
0450 {
0451 unsigned int cpu = smp_processor_id();
0452 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
0453
0454 if (msrs->registered)
0455 kvm_on_user_return(&msrs->urn);
0456 }
0457
0458 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
0459 {
0460 return vcpu->arch.apic_base;
0461 }
0462 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
0463
0464 enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
0465 {
0466 return kvm_apic_mode(kvm_get_apic_base(vcpu));
0467 }
0468 EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
0469
0470 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
0471 {
0472 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
0473 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
0474 u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
0475 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
0476
0477 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
0478 return 1;
0479 if (!msr_info->host_initiated) {
0480 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
0481 return 1;
0482 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
0483 return 1;
0484 }
0485
0486 kvm_lapic_set_base(vcpu, msr_info->data);
0487 kvm_recalculate_apic_map(vcpu->kvm);
0488 return 0;
0489 }
0490 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
0491
0492
0493
0494
0495
0496
0497
0498
0499 noinstr void kvm_spurious_fault(void)
0500 {
0501
0502 BUG_ON(!kvm_rebooting);
0503 }
0504 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
0505
0506 #define EXCPT_BENIGN 0
0507 #define EXCPT_CONTRIBUTORY 1
0508 #define EXCPT_PF 2
0509
0510 static int exception_class(int vector)
0511 {
0512 switch (vector) {
0513 case PF_VECTOR:
0514 return EXCPT_PF;
0515 case DE_VECTOR:
0516 case TS_VECTOR:
0517 case NP_VECTOR:
0518 case SS_VECTOR:
0519 case GP_VECTOR:
0520 return EXCPT_CONTRIBUTORY;
0521 default:
0522 break;
0523 }
0524 return EXCPT_BENIGN;
0525 }
0526
0527 #define EXCPT_FAULT 0
0528 #define EXCPT_TRAP 1
0529 #define EXCPT_ABORT 2
0530 #define EXCPT_INTERRUPT 3
0531
0532 static int exception_type(int vector)
0533 {
0534 unsigned int mask;
0535
0536 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
0537 return EXCPT_INTERRUPT;
0538
0539 mask = 1 << vector;
0540
0541
0542 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
0543 return EXCPT_TRAP;
0544
0545 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
0546 return EXCPT_ABORT;
0547
0548
0549 return EXCPT_FAULT;
0550 }
0551
0552 void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
0553 {
0554 unsigned nr = vcpu->arch.exception.nr;
0555 bool has_payload = vcpu->arch.exception.has_payload;
0556 unsigned long payload = vcpu->arch.exception.payload;
0557
0558 if (!has_payload)
0559 return;
0560
0561 switch (nr) {
0562 case DB_VECTOR:
0563
0564
0565
0566
0567
0568 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585 vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
0586 vcpu->arch.dr6 |= payload;
0587 vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
0588
0589
0590
0591
0592
0593
0594
0595 vcpu->arch.dr6 &= ~BIT(12);
0596 break;
0597 case PF_VECTOR:
0598 vcpu->arch.cr2 = payload;
0599 break;
0600 }
0601
0602 vcpu->arch.exception.has_payload = false;
0603 vcpu->arch.exception.payload = 0;
0604 }
0605 EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
0606
0607 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
0608 unsigned nr, bool has_error, u32 error_code,
0609 bool has_payload, unsigned long payload, bool reinject)
0610 {
0611 u32 prev_nr;
0612 int class1, class2;
0613
0614 kvm_make_request(KVM_REQ_EVENT, vcpu);
0615
0616 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
0617 queue:
0618 if (reinject) {
0619
0620
0621
0622
0623
0624
0625
0626
0627 WARN_ON_ONCE(vcpu->arch.exception.pending);
0628 vcpu->arch.exception.injected = true;
0629 if (WARN_ON_ONCE(has_payload)) {
0630
0631
0632
0633
0634 has_payload = false;
0635 payload = 0;
0636 }
0637 } else {
0638 vcpu->arch.exception.pending = true;
0639 vcpu->arch.exception.injected = false;
0640 }
0641 vcpu->arch.exception.has_error_code = has_error;
0642 vcpu->arch.exception.nr = nr;
0643 vcpu->arch.exception.error_code = error_code;
0644 vcpu->arch.exception.has_payload = has_payload;
0645 vcpu->arch.exception.payload = payload;
0646 if (!is_guest_mode(vcpu))
0647 kvm_deliver_exception_payload(vcpu);
0648 return;
0649 }
0650
0651
0652 prev_nr = vcpu->arch.exception.nr;
0653 if (prev_nr == DF_VECTOR) {
0654
0655 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
0656 return;
0657 }
0658 class1 = exception_class(prev_nr);
0659 class2 = exception_class(nr);
0660 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
0661 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
0662
0663
0664
0665
0666
0667 vcpu->arch.exception.pending = true;
0668 vcpu->arch.exception.injected = false;
0669 vcpu->arch.exception.has_error_code = true;
0670 vcpu->arch.exception.nr = DF_VECTOR;
0671 vcpu->arch.exception.error_code = 0;
0672 vcpu->arch.exception.has_payload = false;
0673 vcpu->arch.exception.payload = 0;
0674 } else
0675
0676
0677
0678 goto queue;
0679 }
0680
0681 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
0682 {
0683 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
0684 }
0685 EXPORT_SYMBOL_GPL(kvm_queue_exception);
0686
0687 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
0688 {
0689 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
0690 }
0691 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
0692
0693 void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
0694 unsigned long payload)
0695 {
0696 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
0697 }
0698 EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
0699
0700 static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
0701 u32 error_code, unsigned long payload)
0702 {
0703 kvm_multiple_exception(vcpu, nr, true, error_code,
0704 true, payload, false);
0705 }
0706
0707 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
0708 {
0709 if (err)
0710 kvm_inject_gp(vcpu, 0);
0711 else
0712 return kvm_skip_emulated_instruction(vcpu);
0713
0714 return 1;
0715 }
0716 EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
0717
0718 static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
0719 {
0720 if (err) {
0721 kvm_inject_gp(vcpu, 0);
0722 return 1;
0723 }
0724
0725 return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE | EMULTYPE_SKIP |
0726 EMULTYPE_COMPLETE_USER_EXIT);
0727 }
0728
0729 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
0730 {
0731 ++vcpu->stat.pf_guest;
0732 vcpu->arch.exception.nested_apf =
0733 is_guest_mode(vcpu) && fault->async_page_fault;
0734 if (vcpu->arch.exception.nested_apf) {
0735 vcpu->arch.apf.nested_apf_token = fault->address;
0736 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
0737 } else {
0738 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
0739 fault->address);
0740 }
0741 }
0742 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
0743
0744
0745 bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
0746 struct x86_exception *fault)
0747 {
0748 struct kvm_mmu *fault_mmu;
0749 WARN_ON_ONCE(fault->vector != PF_VECTOR);
0750
0751 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
0752 vcpu->arch.walk_mmu;
0753
0754
0755
0756
0757
0758 if ((fault->error_code & PFERR_PRESENT_MASK) &&
0759 !(fault->error_code & PFERR_RSVD_MASK))
0760 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
0761 fault_mmu->root.hpa);
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777 if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
0778 kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
0779 return true;
0780
0781 fault_mmu->inject_page_fault(vcpu, fault);
0782 return false;
0783 }
0784 EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
0785
0786 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
0787 {
0788 atomic_inc(&vcpu->arch.nmi_queued);
0789 kvm_make_request(KVM_REQ_NMI, vcpu);
0790 }
0791 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
0792
0793 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
0794 {
0795 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
0796 }
0797 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
0798
0799 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
0800 {
0801 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
0802 }
0803 EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
0804
0805
0806
0807
0808
0809 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
0810 {
0811 if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
0812 return true;
0813 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
0814 return false;
0815 }
0816 EXPORT_SYMBOL_GPL(kvm_require_cpl);
0817
0818 bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
0819 {
0820 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
0821 return true;
0822
0823 kvm_queue_exception(vcpu, UD_VECTOR);
0824 return false;
0825 }
0826 EXPORT_SYMBOL_GPL(kvm_require_dr);
0827
0828 static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
0829 {
0830 return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
0831 }
0832
0833
0834
0835
0836 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
0837 {
0838 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
0839 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
0840 gpa_t real_gpa;
0841 int i;
0842 int ret;
0843 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
0844
0845
0846
0847
0848
0849 real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
0850 PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
0851 if (real_gpa == INVALID_GPA)
0852 return 0;
0853
0854
0855 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(real_gpa), pdpte,
0856 cr3 & GENMASK(11, 5), sizeof(pdpte));
0857 if (ret < 0)
0858 return 0;
0859
0860 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
0861 if ((pdpte[i] & PT_PRESENT_MASK) &&
0862 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
0863 return 0;
0864 }
0865 }
0866
0867
0868
0869
0870
0871 if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
0872 kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
0873
0874 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
0875 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
0876 kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
0877 vcpu->arch.pdptrs_from_userspace = false;
0878
0879 return 1;
0880 }
0881 EXPORT_SYMBOL_GPL(load_pdptrs);
0882
0883 void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
0884 {
0885 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
0886 kvm_clear_async_pf_completion_queue(vcpu);
0887 kvm_async_pf_hash_reset(vcpu);
0888
0889
0890
0891
0892
0893 if (!(cr0 & X86_CR0_PG))
0894 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
0895 }
0896
0897 if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
0898 kvm_mmu_reset_context(vcpu);
0899
0900 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
0901 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
0902 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
0903 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
0904 }
0905 EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
0906
0907 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
0908 {
0909 unsigned long old_cr0 = kvm_read_cr0(vcpu);
0910
0911 cr0 |= X86_CR0_ET;
0912
0913 #ifdef CONFIG_X86_64
0914 if (cr0 & 0xffffffff00000000UL)
0915 return 1;
0916 #endif
0917
0918 cr0 &= ~CR0_RESERVED_BITS;
0919
0920 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
0921 return 1;
0922
0923 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
0924 return 1;
0925
0926 #ifdef CONFIG_X86_64
0927 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
0928 (cr0 & X86_CR0_PG)) {
0929 int cs_db, cs_l;
0930
0931 if (!is_pae(vcpu))
0932 return 1;
0933 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
0934 if (cs_l)
0935 return 1;
0936 }
0937 #endif
0938 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
0939 is_pae(vcpu) && ((cr0 ^ old_cr0) & X86_CR0_PDPTR_BITS) &&
0940 !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
0941 return 1;
0942
0943 if (!(cr0 & X86_CR0_PG) &&
0944 (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
0945 return 1;
0946
0947 static_call(kvm_x86_set_cr0)(vcpu, cr0);
0948
0949 kvm_post_set_cr0(vcpu, old_cr0, cr0);
0950
0951 return 0;
0952 }
0953 EXPORT_SYMBOL_GPL(kvm_set_cr0);
0954
0955 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
0956 {
0957 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
0958 }
0959 EXPORT_SYMBOL_GPL(kvm_lmsw);
0960
0961 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
0962 {
0963 if (vcpu->arch.guest_state_protected)
0964 return;
0965
0966 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
0967
0968 if (vcpu->arch.xcr0 != host_xcr0)
0969 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
0970
0971 if (vcpu->arch.xsaves_enabled &&
0972 vcpu->arch.ia32_xss != host_xss)
0973 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
0974 }
0975
0976 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
0977 if (static_cpu_has(X86_FEATURE_PKU) &&
0978 vcpu->arch.pkru != vcpu->arch.host_pkru &&
0979 ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
0980 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)))
0981 write_pkru(vcpu->arch.pkru);
0982 #endif
0983 }
0984 EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
0985
0986 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
0987 {
0988 if (vcpu->arch.guest_state_protected)
0989 return;
0990
0991 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
0992 if (static_cpu_has(X86_FEATURE_PKU) &&
0993 ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
0994 kvm_read_cr4_bits(vcpu, X86_CR4_PKE))) {
0995 vcpu->arch.pkru = rdpkru();
0996 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
0997 write_pkru(vcpu->arch.host_pkru);
0998 }
0999 #endif
1000
1001 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
1002
1003 if (vcpu->arch.xcr0 != host_xcr0)
1004 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
1005
1006 if (vcpu->arch.xsaves_enabled &&
1007 vcpu->arch.ia32_xss != host_xss)
1008 wrmsrl(MSR_IA32_XSS, host_xss);
1009 }
1010
1011 }
1012 EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
1013
1014 #ifdef CONFIG_X86_64
1015 static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
1016 {
1017 return vcpu->arch.guest_supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC;
1018 }
1019 #endif
1020
1021 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
1022 {
1023 u64 xcr0 = xcr;
1024 u64 old_xcr0 = vcpu->arch.xcr0;
1025 u64 valid_bits;
1026
1027
1028 if (index != XCR_XFEATURE_ENABLED_MASK)
1029 return 1;
1030 if (!(xcr0 & XFEATURE_MASK_FP))
1031 return 1;
1032 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
1033 return 1;
1034
1035
1036
1037
1038
1039
1040 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
1041 if (xcr0 & ~valid_bits)
1042 return 1;
1043
1044 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
1045 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
1046 return 1;
1047
1048 if (xcr0 & XFEATURE_MASK_AVX512) {
1049 if (!(xcr0 & XFEATURE_MASK_YMM))
1050 return 1;
1051 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
1052 return 1;
1053 }
1054
1055 if ((xcr0 & XFEATURE_MASK_XTILE) &&
1056 ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE))
1057 return 1;
1058
1059 vcpu->arch.xcr0 = xcr0;
1060
1061 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
1062 kvm_update_cpuid_runtime(vcpu);
1063 return 0;
1064 }
1065
1066 int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
1067 {
1068
1069 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
1070 __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
1071 kvm_inject_gp(vcpu, 0);
1072 return 1;
1073 }
1074
1075 return kvm_skip_emulated_instruction(vcpu);
1076 }
1077 EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
1078
1079 bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1080 {
1081 if (cr4 & cr4_reserved_bits)
1082 return false;
1083
1084 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
1085 return false;
1086
1087 return true;
1088 }
1089 EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
1090
1091 static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1092 {
1093 return __kvm_is_valid_cr4(vcpu, cr4) &&
1094 static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
1095 }
1096
1097 void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
1098 {
1099 if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS)
1100 kvm_mmu_reset_context(vcpu);
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110 if (!tdp_enabled &&
1111 (cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE))
1112 kvm_mmu_unload(vcpu);
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122 if (((cr4 ^ old_cr4) & X86_CR4_PGE) ||
1123 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1124 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1125
1126
1127
1128
1129
1130
1131
1132 else if (((cr4 ^ old_cr4) & X86_CR4_PAE) ||
1133 ((cr4 & X86_CR4_SMEP) && !(old_cr4 & X86_CR4_SMEP)))
1134 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1135
1136 }
1137 EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
1138
1139 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1140 {
1141 unsigned long old_cr4 = kvm_read_cr4(vcpu);
1142
1143 if (!kvm_is_valid_cr4(vcpu, cr4))
1144 return 1;
1145
1146 if (is_long_mode(vcpu)) {
1147 if (!(cr4 & X86_CR4_PAE))
1148 return 1;
1149 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
1150 return 1;
1151 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
1152 && ((cr4 ^ old_cr4) & X86_CR4_PDPTR_BITS)
1153 && !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
1154 return 1;
1155
1156 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1157 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1158 return 1;
1159
1160
1161 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1162 return 1;
1163 }
1164
1165 static_call(kvm_x86_set_cr4)(vcpu, cr4);
1166
1167 kvm_post_set_cr4(vcpu, old_cr4, cr4);
1168
1169 return 0;
1170 }
1171 EXPORT_SYMBOL_GPL(kvm_set_cr4);
1172
1173 static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
1174 {
1175 struct kvm_mmu *mmu = vcpu->arch.mmu;
1176 unsigned long roots_to_free = 0;
1177 int i;
1178
1179
1180
1181
1182
1183
1184
1185
1186 if (unlikely(tdp_enabled)) {
1187 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1188 return;
1189 }
1190
1191
1192
1193
1194
1195
1196 if (kvm_get_active_pcid(vcpu) == pcid) {
1197 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1198 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1199 }
1200
1201
1202
1203
1204
1205
1206 if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
1207 return;
1208
1209 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
1210 if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
1211 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
1212
1213 kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free);
1214 }
1215
1216 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1217 {
1218 bool skip_tlb_flush = false;
1219 unsigned long pcid = 0;
1220 #ifdef CONFIG_X86_64
1221 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1222
1223 if (pcid_enabled) {
1224 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1225 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1226 pcid = cr3 & X86_CR3_PCID_MASK;
1227 }
1228 #endif
1229
1230
1231 if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
1232 goto handle_tlb_flush;
1233
1234
1235
1236
1237
1238
1239 if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
1240 return 1;
1241
1242 if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
1243 return 1;
1244
1245 if (cr3 != kvm_read_cr3(vcpu))
1246 kvm_mmu_new_pgd(vcpu, cr3);
1247
1248 vcpu->arch.cr3 = cr3;
1249 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
1250
1251
1252 handle_tlb_flush:
1253
1254
1255
1256
1257
1258
1259
1260 if (!skip_tlb_flush)
1261 kvm_invalidate_pcid(vcpu, pcid);
1262
1263 return 0;
1264 }
1265 EXPORT_SYMBOL_GPL(kvm_set_cr3);
1266
1267 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1268 {
1269 if (cr8 & CR8_RESERVED_BITS)
1270 return 1;
1271 if (lapic_in_kernel(vcpu))
1272 kvm_lapic_set_tpr(vcpu, cr8);
1273 else
1274 vcpu->arch.cr8 = cr8;
1275 return 0;
1276 }
1277 EXPORT_SYMBOL_GPL(kvm_set_cr8);
1278
1279 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1280 {
1281 if (lapic_in_kernel(vcpu))
1282 return kvm_lapic_get_cr8(vcpu);
1283 else
1284 return vcpu->arch.cr8;
1285 }
1286 EXPORT_SYMBOL_GPL(kvm_get_cr8);
1287
1288 static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1289 {
1290 int i;
1291
1292 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1293 for (i = 0; i < KVM_NR_DB_REGS; i++)
1294 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1295 }
1296 }
1297
1298 void kvm_update_dr7(struct kvm_vcpu *vcpu)
1299 {
1300 unsigned long dr7;
1301
1302 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1303 dr7 = vcpu->arch.guest_debug_dr7;
1304 else
1305 dr7 = vcpu->arch.dr7;
1306 static_call(kvm_x86_set_dr7)(vcpu, dr7);
1307 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1308 if (dr7 & DR7_BP_EN_MASK)
1309 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1310 }
1311 EXPORT_SYMBOL_GPL(kvm_update_dr7);
1312
1313 static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1314 {
1315 u64 fixed = DR6_FIXED_1;
1316
1317 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1318 fixed |= DR6_RTM;
1319
1320 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
1321 fixed |= DR6_BUS_LOCK;
1322 return fixed;
1323 }
1324
1325 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1326 {
1327 size_t size = ARRAY_SIZE(vcpu->arch.db);
1328
1329 switch (dr) {
1330 case 0 ... 3:
1331 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1332 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1333 vcpu->arch.eff_db[dr] = val;
1334 break;
1335 case 4:
1336 case 6:
1337 if (!kvm_dr6_valid(val))
1338 return 1;
1339 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1340 break;
1341 case 5:
1342 default:
1343 if (!kvm_dr7_valid(val))
1344 return 1;
1345 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1346 kvm_update_dr7(vcpu);
1347 break;
1348 }
1349
1350 return 0;
1351 }
1352 EXPORT_SYMBOL_GPL(kvm_set_dr);
1353
1354 void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1355 {
1356 size_t size = ARRAY_SIZE(vcpu->arch.db);
1357
1358 switch (dr) {
1359 case 0 ... 3:
1360 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1361 break;
1362 case 4:
1363 case 6:
1364 *val = vcpu->arch.dr6;
1365 break;
1366 case 5:
1367 default:
1368 *val = vcpu->arch.dr7;
1369 break;
1370 }
1371 }
1372 EXPORT_SYMBOL_GPL(kvm_get_dr);
1373
1374 int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
1375 {
1376 u32 ecx = kvm_rcx_read(vcpu);
1377 u64 data;
1378
1379 if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
1380 kvm_inject_gp(vcpu, 0);
1381 return 1;
1382 }
1383
1384 kvm_rax_write(vcpu, (u32)data);
1385 kvm_rdx_write(vcpu, data >> 32);
1386 return kvm_skip_emulated_instruction(vcpu);
1387 }
1388 EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 static const u32 msrs_to_save_all[] = {
1403 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1404 MSR_STAR,
1405 #ifdef CONFIG_X86_64
1406 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1407 #endif
1408 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1409 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1410 MSR_IA32_SPEC_CTRL,
1411 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1412 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1413 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1414 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1415 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1416 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1417 MSR_IA32_UMWAIT_CONTROL,
1418
1419 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1420 MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
1421 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1422 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1423 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1424 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1425 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1426 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1427 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1428 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1429 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1430 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1431 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1432 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1433 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1434 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1435 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1436 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1437 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1438 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1439 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1440 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1441 MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
1442
1443 MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
1444 MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
1445 MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
1446 MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
1447 MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
1448 MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
1449 MSR_IA32_XFD, MSR_IA32_XFD_ERR,
1450 };
1451
1452 static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1453 static unsigned num_msrs_to_save;
1454
1455 static const u32 emulated_msrs_all[] = {
1456 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1457 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1458 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1459 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1460 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1461 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1462 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1463 HV_X64_MSR_RESET,
1464 HV_X64_MSR_VP_INDEX,
1465 HV_X64_MSR_VP_RUNTIME,
1466 HV_X64_MSR_SCONTROL,
1467 HV_X64_MSR_STIMER0_CONFIG,
1468 HV_X64_MSR_VP_ASSIST_PAGE,
1469 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1470 HV_X64_MSR_TSC_EMULATION_STATUS,
1471 HV_X64_MSR_SYNDBG_OPTIONS,
1472 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1473 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1474 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1475
1476 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1477 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1478
1479 MSR_IA32_TSC_ADJUST,
1480 MSR_IA32_TSC_DEADLINE,
1481 MSR_IA32_ARCH_CAPABILITIES,
1482 MSR_IA32_PERF_CAPABILITIES,
1483 MSR_IA32_MISC_ENABLE,
1484 MSR_IA32_MCG_STATUS,
1485 MSR_IA32_MCG_CTL,
1486 MSR_IA32_MCG_EXT_CTL,
1487 MSR_IA32_SMBASE,
1488 MSR_SMI_COUNT,
1489 MSR_PLATFORM_INFO,
1490 MSR_MISC_FEATURES_ENABLES,
1491 MSR_AMD64_VIRT_SPEC_CTRL,
1492 MSR_AMD64_TSC_RATIO,
1493 MSR_IA32_POWER_CTL,
1494 MSR_IA32_UCODE_REV,
1495
1496
1497
1498
1499
1500
1501
1502
1503 MSR_IA32_VMX_BASIC,
1504 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1505 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1506 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1507 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1508 MSR_IA32_VMX_MISC,
1509 MSR_IA32_VMX_CR0_FIXED0,
1510 MSR_IA32_VMX_CR4_FIXED0,
1511 MSR_IA32_VMX_VMCS_ENUM,
1512 MSR_IA32_VMX_PROCBASED_CTLS2,
1513 MSR_IA32_VMX_EPT_VPID_CAP,
1514 MSR_IA32_VMX_VMFUNC,
1515
1516 MSR_K7_HWCR,
1517 MSR_KVM_POLL_CONTROL,
1518 };
1519
1520 static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1521 static unsigned num_emulated_msrs;
1522
1523
1524
1525
1526
1527 static const u32 msr_based_features_all[] = {
1528 MSR_IA32_VMX_BASIC,
1529 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1530 MSR_IA32_VMX_PINBASED_CTLS,
1531 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1532 MSR_IA32_VMX_PROCBASED_CTLS,
1533 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1534 MSR_IA32_VMX_EXIT_CTLS,
1535 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1536 MSR_IA32_VMX_ENTRY_CTLS,
1537 MSR_IA32_VMX_MISC,
1538 MSR_IA32_VMX_CR0_FIXED0,
1539 MSR_IA32_VMX_CR0_FIXED1,
1540 MSR_IA32_VMX_CR4_FIXED0,
1541 MSR_IA32_VMX_CR4_FIXED1,
1542 MSR_IA32_VMX_VMCS_ENUM,
1543 MSR_IA32_VMX_PROCBASED_CTLS2,
1544 MSR_IA32_VMX_EPT_VPID_CAP,
1545 MSR_IA32_VMX_VMFUNC,
1546
1547 MSR_F10H_DECFG,
1548 MSR_IA32_UCODE_REV,
1549 MSR_IA32_ARCH_CAPABILITIES,
1550 MSR_IA32_PERF_CAPABILITIES,
1551 };
1552
1553 static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1554 static unsigned int num_msr_based_features;
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567 #define KVM_SUPPORTED_ARCH_CAP \
1568 (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
1569 ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
1570 ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
1571 ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
1572 ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
1573
1574 static u64 kvm_get_arch_capabilities(void)
1575 {
1576 u64 data = 0;
1577
1578 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
1579 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1580 data &= KVM_SUPPORTED_ARCH_CAP;
1581 }
1582
1583
1584
1585
1586
1587
1588
1589 data |= ARCH_CAP_PSCHANGE_MC_NO;
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1601 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1602
1603 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1604 data |= ARCH_CAP_RDCL_NO;
1605 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1606 data |= ARCH_CAP_SSB_NO;
1607 if (!boot_cpu_has_bug(X86_BUG_MDS))
1608 data |= ARCH_CAP_MDS_NO;
1609
1610 if (!boot_cpu_has(X86_FEATURE_RTM)) {
1611
1612
1613
1614
1615
1616
1617
1618 data &= ~ARCH_CAP_TAA_NO;
1619 } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
1620 data |= ARCH_CAP_TAA_NO;
1621 } else {
1622
1623
1624
1625
1626
1627 }
1628
1629 return data;
1630 }
1631
1632 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1633 {
1634 switch (msr->index) {
1635 case MSR_IA32_ARCH_CAPABILITIES:
1636 msr->data = kvm_get_arch_capabilities();
1637 break;
1638 case MSR_IA32_UCODE_REV:
1639 rdmsrl_safe(msr->index, &msr->data);
1640 break;
1641 default:
1642 return static_call(kvm_x86_get_msr_feature)(msr);
1643 }
1644 return 0;
1645 }
1646
1647 static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1648 {
1649 struct kvm_msr_entry msr;
1650 int r;
1651
1652 msr.index = index;
1653 r = kvm_get_msr_feature(&msr);
1654
1655 if (r == KVM_MSR_RET_INVALID) {
1656
1657 *data = 0;
1658 if (kvm_msr_ignored_check(index, 0, false))
1659 r = 0;
1660 }
1661
1662 if (r)
1663 return r;
1664
1665 *data = msr.data;
1666
1667 return 0;
1668 }
1669
1670 static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1671 {
1672 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1673 return false;
1674
1675 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1676 return false;
1677
1678 if (efer & (EFER_LME | EFER_LMA) &&
1679 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1680 return false;
1681
1682 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1683 return false;
1684
1685 return true;
1686
1687 }
1688 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1689 {
1690 if (efer & efer_reserved_bits)
1691 return false;
1692
1693 return __kvm_valid_efer(vcpu, efer);
1694 }
1695 EXPORT_SYMBOL_GPL(kvm_valid_efer);
1696
1697 static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1698 {
1699 u64 old_efer = vcpu->arch.efer;
1700 u64 efer = msr_info->data;
1701 int r;
1702
1703 if (efer & efer_reserved_bits)
1704 return 1;
1705
1706 if (!msr_info->host_initiated) {
1707 if (!__kvm_valid_efer(vcpu, efer))
1708 return 1;
1709
1710 if (is_paging(vcpu) &&
1711 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1712 return 1;
1713 }
1714
1715 efer &= ~EFER_LMA;
1716 efer |= vcpu->arch.efer & EFER_LMA;
1717
1718 r = static_call(kvm_x86_set_efer)(vcpu, efer);
1719 if (r) {
1720 WARN_ON(r > 0);
1721 return r;
1722 }
1723
1724 if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
1725 kvm_mmu_reset_context(vcpu);
1726
1727 return 0;
1728 }
1729
1730 void kvm_enable_efer_bits(u64 mask)
1731 {
1732 efer_reserved_bits &= ~mask;
1733 }
1734 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1735
1736 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1737 {
1738 struct kvm_x86_msr_filter *msr_filter;
1739 struct msr_bitmap_range *ranges;
1740 struct kvm *kvm = vcpu->kvm;
1741 bool allowed;
1742 int idx;
1743 u32 i;
1744
1745
1746 if (index >= 0x800 && index <= 0x8ff)
1747 return true;
1748
1749 idx = srcu_read_lock(&kvm->srcu);
1750
1751 msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
1752 if (!msr_filter) {
1753 allowed = true;
1754 goto out;
1755 }
1756
1757 allowed = msr_filter->default_allow;
1758 ranges = msr_filter->ranges;
1759
1760 for (i = 0; i < msr_filter->count; i++) {
1761 u32 start = ranges[i].base;
1762 u32 end = start + ranges[i].nmsrs;
1763 u32 flags = ranges[i].flags;
1764 unsigned long *bitmap = ranges[i].bitmap;
1765
1766 if ((index >= start) && (index < end) && (flags & type)) {
1767 allowed = !!test_bit(index - start, bitmap);
1768 break;
1769 }
1770 }
1771
1772 out:
1773 srcu_read_unlock(&kvm->srcu, idx);
1774
1775 return allowed;
1776 }
1777 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1778
1779
1780
1781
1782
1783
1784
1785 static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1786 bool host_initiated)
1787 {
1788 struct msr_data msr;
1789
1790 switch (index) {
1791 case MSR_FS_BASE:
1792 case MSR_GS_BASE:
1793 case MSR_KERNEL_GS_BASE:
1794 case MSR_CSTAR:
1795 case MSR_LSTAR:
1796 if (is_noncanonical_address(data, vcpu))
1797 return 1;
1798 break;
1799 case MSR_IA32_SYSENTER_EIP:
1800 case MSR_IA32_SYSENTER_ESP:
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813 data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
1814 break;
1815 case MSR_TSC_AUX:
1816 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1817 return 1;
1818
1819 if (!host_initiated &&
1820 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1821 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1822 return 1;
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833 if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
1834 return 1;
1835
1836 data = (u32)data;
1837 break;
1838 }
1839
1840 msr.data = data;
1841 msr.index = index;
1842 msr.host_initiated = host_initiated;
1843
1844 return static_call(kvm_x86_set_msr)(vcpu, &msr);
1845 }
1846
1847 static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1848 u32 index, u64 data, bool host_initiated)
1849 {
1850 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1851
1852 if (ret == KVM_MSR_RET_INVALID)
1853 if (kvm_msr_ignored_check(index, data, true))
1854 ret = 0;
1855
1856 return ret;
1857 }
1858
1859
1860
1861
1862
1863
1864
1865 int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1866 bool host_initiated)
1867 {
1868 struct msr_data msr;
1869 int ret;
1870
1871 switch (index) {
1872 case MSR_TSC_AUX:
1873 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1874 return 1;
1875
1876 if (!host_initiated &&
1877 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1878 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1879 return 1;
1880 break;
1881 }
1882
1883 msr.index = index;
1884 msr.host_initiated = host_initiated;
1885
1886 ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
1887 if (!ret)
1888 *data = msr.data;
1889 return ret;
1890 }
1891
1892 static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1893 u32 index, u64 *data, bool host_initiated)
1894 {
1895 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1896
1897 if (ret == KVM_MSR_RET_INVALID) {
1898
1899 *data = 0;
1900 if (kvm_msr_ignored_check(index, 0, false))
1901 ret = 0;
1902 }
1903
1904 return ret;
1905 }
1906
1907 static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1908 {
1909 if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1910 return KVM_MSR_RET_FILTERED;
1911 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1912 }
1913
1914 static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
1915 {
1916 if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1917 return KVM_MSR_RET_FILTERED;
1918 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1919 }
1920
1921 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1922 {
1923 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1924 }
1925 EXPORT_SYMBOL_GPL(kvm_get_msr);
1926
1927 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1928 {
1929 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1930 }
1931 EXPORT_SYMBOL_GPL(kvm_set_msr);
1932
1933 static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
1934 {
1935 if (!vcpu->run->msr.error) {
1936 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1937 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1938 }
1939 }
1940
1941 static int complete_emulated_msr_access(struct kvm_vcpu *vcpu)
1942 {
1943 return complete_emulated_insn_gp(vcpu, vcpu->run->msr.error);
1944 }
1945
1946 static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1947 {
1948 complete_userspace_rdmsr(vcpu);
1949 return complete_emulated_msr_access(vcpu);
1950 }
1951
1952 static int complete_fast_msr_access(struct kvm_vcpu *vcpu)
1953 {
1954 return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
1955 }
1956
1957 static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
1958 {
1959 complete_userspace_rdmsr(vcpu);
1960 return complete_fast_msr_access(vcpu);
1961 }
1962
1963 static u64 kvm_msr_reason(int r)
1964 {
1965 switch (r) {
1966 case KVM_MSR_RET_INVALID:
1967 return KVM_MSR_EXIT_REASON_UNKNOWN;
1968 case KVM_MSR_RET_FILTERED:
1969 return KVM_MSR_EXIT_REASON_FILTER;
1970 default:
1971 return KVM_MSR_EXIT_REASON_INVAL;
1972 }
1973 }
1974
1975 static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1976 u32 exit_reason, u64 data,
1977 int (*completion)(struct kvm_vcpu *vcpu),
1978 int r)
1979 {
1980 u64 msr_reason = kvm_msr_reason(r);
1981
1982
1983 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1984 return 0;
1985
1986 vcpu->run->exit_reason = exit_reason;
1987 vcpu->run->msr.error = 0;
1988 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1989 vcpu->run->msr.reason = msr_reason;
1990 vcpu->run->msr.index = index;
1991 vcpu->run->msr.data = data;
1992 vcpu->arch.complete_userspace_io = completion;
1993
1994 return 1;
1995 }
1996
1997 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1998 {
1999 u32 ecx = kvm_rcx_read(vcpu);
2000 u64 data;
2001 int r;
2002
2003 r = kvm_get_msr_with_filter(vcpu, ecx, &data);
2004
2005 if (!r) {
2006 trace_kvm_msr_read(ecx, data);
2007
2008 kvm_rax_write(vcpu, data & -1u);
2009 kvm_rdx_write(vcpu, (data >> 32) & -1u);
2010 } else {
2011
2012 if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
2013 complete_fast_rdmsr, r))
2014 return 0;
2015 trace_kvm_msr_read_ex(ecx);
2016 }
2017
2018 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
2019 }
2020 EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
2021
2022 int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
2023 {
2024 u32 ecx = kvm_rcx_read(vcpu);
2025 u64 data = kvm_read_edx_eax(vcpu);
2026 int r;
2027
2028 r = kvm_set_msr_with_filter(vcpu, ecx, data);
2029
2030 if (!r) {
2031 trace_kvm_msr_write(ecx, data);
2032 } else {
2033
2034 if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
2035 complete_fast_msr_access, r))
2036 return 0;
2037
2038 if (r < 0)
2039 return r;
2040 trace_kvm_msr_write_ex(ecx, data);
2041 }
2042
2043 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
2044 }
2045 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
2046
2047 int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
2048 {
2049 return kvm_skip_emulated_instruction(vcpu);
2050 }
2051 EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
2052
2053 int kvm_emulate_invd(struct kvm_vcpu *vcpu)
2054 {
2055
2056 return kvm_emulate_as_nop(vcpu);
2057 }
2058 EXPORT_SYMBOL_GPL(kvm_emulate_invd);
2059
2060 int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
2061 {
2062 kvm_queue_exception(vcpu, UD_VECTOR);
2063 return 1;
2064 }
2065 EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
2066
2067
2068 static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
2069 {
2070 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
2071 !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
2072 return kvm_handle_invalid_op(vcpu);
2073
2074 pr_warn_once("kvm: %s instruction emulated as NOP!\n", insn);
2075 return kvm_emulate_as_nop(vcpu);
2076 }
2077 int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
2078 {
2079 return kvm_emulate_monitor_mwait(vcpu, "MWAIT");
2080 }
2081 EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
2082
2083 int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
2084 {
2085 return kvm_emulate_monitor_mwait(vcpu, "MONITOR");
2086 }
2087 EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
2088
2089 static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
2090 {
2091 xfer_to_guest_mode_prepare();
2092 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
2093 xfer_to_guest_mode_work_pending();
2094 }
2095
2096
2097
2098
2099
2100
2101
2102
2103 static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
2104 {
2105 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
2106 return 1;
2107
2108 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
2109 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
2110 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
2111 ((u32)(data >> 32) != X2APIC_BROADCAST))
2112 return kvm_x2apic_icr_write(vcpu->arch.apic, data);
2113
2114 return 1;
2115 }
2116
2117 static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
2118 {
2119 if (!kvm_can_use_hv_timer(vcpu))
2120 return 1;
2121
2122 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2123 return 0;
2124 }
2125
2126 fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
2127 {
2128 u32 msr = kvm_rcx_read(vcpu);
2129 u64 data;
2130 fastpath_t ret = EXIT_FASTPATH_NONE;
2131
2132 switch (msr) {
2133 case APIC_BASE_MSR + (APIC_ICR >> 4):
2134 data = kvm_read_edx_eax(vcpu);
2135 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
2136 kvm_skip_emulated_instruction(vcpu);
2137 ret = EXIT_FASTPATH_EXIT_HANDLED;
2138 }
2139 break;
2140 case MSR_IA32_TSC_DEADLINE:
2141 data = kvm_read_edx_eax(vcpu);
2142 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
2143 kvm_skip_emulated_instruction(vcpu);
2144 ret = EXIT_FASTPATH_REENTER_GUEST;
2145 }
2146 break;
2147 default:
2148 break;
2149 }
2150
2151 if (ret != EXIT_FASTPATH_NONE)
2152 trace_kvm_msr_write(msr, data);
2153
2154 return ret;
2155 }
2156 EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
2157
2158
2159
2160
2161 static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2162 {
2163 return kvm_get_msr_ignored_check(vcpu, index, data, true);
2164 }
2165
2166 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2167 {
2168 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
2169 }
2170
2171 #ifdef CONFIG_X86_64
2172 struct pvclock_clock {
2173 int vclock_mode;
2174 u64 cycle_last;
2175 u64 mask;
2176 u32 mult;
2177 u32 shift;
2178 u64 base_cycles;
2179 u64 offset;
2180 };
2181
2182 struct pvclock_gtod_data {
2183 seqcount_t seq;
2184
2185 struct pvclock_clock clock;
2186 struct pvclock_clock raw_clock;
2187
2188 ktime_t offs_boot;
2189 u64 wall_time_sec;
2190 };
2191
2192 static struct pvclock_gtod_data pvclock_gtod_data;
2193
2194 static void update_pvclock_gtod(struct timekeeper *tk)
2195 {
2196 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
2197
2198 write_seqcount_begin(&vdata->seq);
2199
2200
2201 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
2202 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
2203 vdata->clock.mask = tk->tkr_mono.mask;
2204 vdata->clock.mult = tk->tkr_mono.mult;
2205 vdata->clock.shift = tk->tkr_mono.shift;
2206 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
2207 vdata->clock.offset = tk->tkr_mono.base;
2208
2209 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
2210 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
2211 vdata->raw_clock.mask = tk->tkr_raw.mask;
2212 vdata->raw_clock.mult = tk->tkr_raw.mult;
2213 vdata->raw_clock.shift = tk->tkr_raw.shift;
2214 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
2215 vdata->raw_clock.offset = tk->tkr_raw.base;
2216
2217 vdata->wall_time_sec = tk->xtime_sec;
2218
2219 vdata->offs_boot = tk->offs_boot;
2220
2221 write_seqcount_end(&vdata->seq);
2222 }
2223
2224 static s64 get_kvmclock_base_ns(void)
2225 {
2226
2227 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
2228 }
2229 #else
2230 static s64 get_kvmclock_base_ns(void)
2231 {
2232
2233 return ktime_get_boottime_ns();
2234 }
2235 #endif
2236
2237 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
2238 {
2239 int version;
2240 int r;
2241 struct pvclock_wall_clock wc;
2242 u32 wc_sec_hi;
2243 u64 wall_nsec;
2244
2245 if (!wall_clock)
2246 return;
2247
2248 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
2249 if (r)
2250 return;
2251
2252 if (version & 1)
2253 ++version;
2254
2255 ++version;
2256
2257 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
2258 return;
2259
2260
2261
2262
2263
2264
2265 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
2266
2267 wc.nsec = do_div(wall_nsec, 1000000000);
2268 wc.sec = (u32)wall_nsec;
2269 wc.version = version;
2270
2271 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
2272
2273 if (sec_hi_ofs) {
2274 wc_sec_hi = wall_nsec >> 32;
2275 kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
2276 &wc_sec_hi, sizeof(wc_sec_hi));
2277 }
2278
2279 version++;
2280 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
2281 }
2282
2283 static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
2284 bool old_msr, bool host_initiated)
2285 {
2286 struct kvm_arch *ka = &vcpu->kvm->arch;
2287
2288 if (vcpu->vcpu_id == 0 && !host_initiated) {
2289 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
2290 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2291
2292 ka->boot_vcpu_runs_old_kvmclock = old_msr;
2293 }
2294
2295 vcpu->arch.time = system_time;
2296 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2297
2298
2299 if (system_time & 1) {
2300 kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
2301 KVM_HOST_USES_PFN, system_time & ~1ULL,
2302 sizeof(struct pvclock_vcpu_time_info));
2303 } else {
2304 kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
2305 }
2306
2307 return;
2308 }
2309
2310 static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
2311 {
2312 do_shl32_div32(dividend, divisor);
2313 return dividend;
2314 }
2315
2316 static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
2317 s8 *pshift, u32 *pmultiplier)
2318 {
2319 uint64_t scaled64;
2320 int32_t shift = 0;
2321 uint64_t tps64;
2322 uint32_t tps32;
2323
2324 tps64 = base_hz;
2325 scaled64 = scaled_hz;
2326 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2327 tps64 >>= 1;
2328 shift--;
2329 }
2330
2331 tps32 = (uint32_t)tps64;
2332 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2333 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2334 scaled64 >>= 1;
2335 else
2336 tps32 <<= 1;
2337 shift++;
2338 }
2339
2340 *pshift = shift;
2341 *pmultiplier = div_frac(scaled64, tps32);
2342 }
2343
2344 #ifdef CONFIG_X86_64
2345 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2346 #endif
2347
2348 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2349 static unsigned long max_tsc_khz;
2350
2351 static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2352 {
2353 u64 v = (u64)khz * (1000000 + ppm);
2354 do_div(v, 1000000);
2355 return v;
2356 }
2357
2358 static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
2359
2360 static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2361 {
2362 u64 ratio;
2363
2364
2365 if (!scale) {
2366 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
2367 return 0;
2368 }
2369
2370
2371 if (!kvm_caps.has_tsc_control) {
2372 if (user_tsc_khz > tsc_khz) {
2373 vcpu->arch.tsc_catchup = 1;
2374 vcpu->arch.tsc_always_catchup = 1;
2375 return 0;
2376 } else {
2377 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2378 return -1;
2379 }
2380 }
2381
2382
2383 ratio = mul_u64_u32_div(1ULL << kvm_caps.tsc_scaling_ratio_frac_bits,
2384 user_tsc_khz, tsc_khz);
2385
2386 if (ratio == 0 || ratio >= kvm_caps.max_tsc_scaling_ratio) {
2387 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2388 user_tsc_khz);
2389 return -1;
2390 }
2391
2392 kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
2393 return 0;
2394 }
2395
2396 static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2397 {
2398 u32 thresh_lo, thresh_hi;
2399 int use_scaling = 0;
2400
2401
2402 if (user_tsc_khz == 0) {
2403
2404 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
2405 return -1;
2406 }
2407
2408
2409 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2410 &vcpu->arch.virtual_tsc_shift,
2411 &vcpu->arch.virtual_tsc_mult);
2412 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2413
2414
2415
2416
2417
2418
2419
2420 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2421 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2422 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2423 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2424 use_scaling = 1;
2425 }
2426 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2427 }
2428
2429 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2430 {
2431 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2432 vcpu->arch.virtual_tsc_mult,
2433 vcpu->arch.virtual_tsc_shift);
2434 tsc += vcpu->arch.this_tsc_write;
2435 return tsc;
2436 }
2437
2438 #ifdef CONFIG_X86_64
2439 static inline int gtod_is_based_on_tsc(int mode)
2440 {
2441 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2442 }
2443 #endif
2444
2445 static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2446 {
2447 #ifdef CONFIG_X86_64
2448 bool vcpus_matched;
2449 struct kvm_arch *ka = &vcpu->kvm->arch;
2450 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2451
2452 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2453 atomic_read(&vcpu->kvm->online_vcpus));
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463 if (ka->use_master_clock ||
2464 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2465 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2466
2467 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2468 atomic_read(&vcpu->kvm->online_vcpus),
2469 ka->use_master_clock, gtod->clock.vclock_mode);
2470 #endif
2471 }
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483 static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2484 {
2485 return mul_u64_u64_shr(tsc, ratio, kvm_caps.tsc_scaling_ratio_frac_bits);
2486 }
2487
2488 u64 kvm_scale_tsc(u64 tsc, u64 ratio)
2489 {
2490 u64 _tsc = tsc;
2491
2492 if (ratio != kvm_caps.default_tsc_scaling_ratio)
2493 _tsc = __scale_tsc(ratio, tsc);
2494
2495 return _tsc;
2496 }
2497 EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2498
2499 static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2500 {
2501 u64 tsc;
2502
2503 tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
2504
2505 return target_tsc - tsc;
2506 }
2507
2508 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2509 {
2510 return vcpu->arch.l1_tsc_offset +
2511 kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
2512 }
2513 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2514
2515 u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
2516 {
2517 u64 nested_offset;
2518
2519 if (l2_multiplier == kvm_caps.default_tsc_scaling_ratio)
2520 nested_offset = l1_offset;
2521 else
2522 nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
2523 kvm_caps.tsc_scaling_ratio_frac_bits);
2524
2525 nested_offset += l2_offset;
2526 return nested_offset;
2527 }
2528 EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
2529
2530 u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
2531 {
2532 if (l2_multiplier != kvm_caps.default_tsc_scaling_ratio)
2533 return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
2534 kvm_caps.tsc_scaling_ratio_frac_bits);
2535
2536 return l1_multiplier;
2537 }
2538 EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
2539
2540 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
2541 {
2542 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2543 vcpu->arch.l1_tsc_offset,
2544 l1_offset);
2545
2546 vcpu->arch.l1_tsc_offset = l1_offset;
2547
2548
2549
2550
2551
2552
2553 if (is_guest_mode(vcpu))
2554 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2555 l1_offset,
2556 static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
2557 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2558 else
2559 vcpu->arch.tsc_offset = l1_offset;
2560
2561 static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
2562 }
2563
2564 static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
2565 {
2566 vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
2567
2568
2569 if (is_guest_mode(vcpu))
2570 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2571 l1_multiplier,
2572 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2573 else
2574 vcpu->arch.tsc_scaling_ratio = l1_multiplier;
2575
2576 if (kvm_caps.has_tsc_control)
2577 static_call(kvm_x86_write_tsc_multiplier)(
2578 vcpu, vcpu->arch.tsc_scaling_ratio);
2579 }
2580
2581 static inline bool kvm_check_tsc_unstable(void)
2582 {
2583 #ifdef CONFIG_X86_64
2584
2585
2586
2587
2588 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2589 return false;
2590 #endif
2591 return check_tsc_unstable();
2592 }
2593
2594
2595
2596
2597
2598
2599 static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
2600 u64 ns, bool matched)
2601 {
2602 struct kvm *kvm = vcpu->kvm;
2603
2604 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2605
2606
2607
2608
2609
2610 kvm->arch.last_tsc_nsec = ns;
2611 kvm->arch.last_tsc_write = tsc;
2612 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2613 kvm->arch.last_tsc_offset = offset;
2614
2615 vcpu->arch.last_guest_tsc = tsc;
2616
2617 kvm_vcpu_write_tsc_offset(vcpu, offset);
2618
2619 if (!matched) {
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629 kvm->arch.cur_tsc_generation++;
2630 kvm->arch.cur_tsc_nsec = ns;
2631 kvm->arch.cur_tsc_write = tsc;
2632 kvm->arch.cur_tsc_offset = offset;
2633 kvm->arch.nr_vcpus_matched_tsc = 0;
2634 } else if (vcpu->arch.this_tsc_generation != kvm->arch.cur_tsc_generation) {
2635 kvm->arch.nr_vcpus_matched_tsc++;
2636 }
2637
2638
2639 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2640 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2641 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2642
2643 kvm_track_tsc_matching(vcpu);
2644 }
2645
2646 static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2647 {
2648 struct kvm *kvm = vcpu->kvm;
2649 u64 offset, ns, elapsed;
2650 unsigned long flags;
2651 bool matched = false;
2652 bool synchronizing = false;
2653
2654 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2655 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2656 ns = get_kvmclock_base_ns();
2657 elapsed = ns - kvm->arch.last_tsc_nsec;
2658
2659 if (vcpu->arch.virtual_tsc_khz) {
2660 if (data == 0) {
2661
2662
2663
2664
2665
2666 synchronizing = true;
2667 } else {
2668 u64 tsc_exp = kvm->arch.last_tsc_write +
2669 nsec_to_cycles(vcpu, elapsed);
2670 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2671
2672
2673
2674
2675
2676 synchronizing = data < tsc_exp + tsc_hz &&
2677 data + tsc_hz > tsc_exp;
2678 }
2679 }
2680
2681
2682
2683
2684
2685
2686
2687 if (synchronizing &&
2688 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2689 if (!kvm_check_tsc_unstable()) {
2690 offset = kvm->arch.cur_tsc_offset;
2691 } else {
2692 u64 delta = nsec_to_cycles(vcpu, elapsed);
2693 data += delta;
2694 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2695 }
2696 matched = true;
2697 }
2698
2699 __kvm_synchronize_tsc(vcpu, offset, data, ns, matched);
2700 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2701 }
2702
2703 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2704 s64 adjustment)
2705 {
2706 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2707 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2708 }
2709
2710 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2711 {
2712 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio)
2713 WARN_ON(adjustment < 0);
2714 adjustment = kvm_scale_tsc((u64) adjustment,
2715 vcpu->arch.l1_tsc_scaling_ratio);
2716 adjust_tsc_offset_guest(vcpu, adjustment);
2717 }
2718
2719 #ifdef CONFIG_X86_64
2720
2721 static u64 read_tsc(void)
2722 {
2723 u64 ret = (u64)rdtsc_ordered();
2724 u64 last = pvclock_gtod_data.clock.cycle_last;
2725
2726 if (likely(ret >= last))
2727 return ret;
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737 asm volatile ("");
2738 return last;
2739 }
2740
2741 static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2742 int *mode)
2743 {
2744 long v;
2745 u64 tsc_pg_val;
2746
2747 switch (clock->vclock_mode) {
2748 case VDSO_CLOCKMODE_HVCLOCK:
2749 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2750 tsc_timestamp);
2751 if (tsc_pg_val != U64_MAX) {
2752
2753 *mode = VDSO_CLOCKMODE_HVCLOCK;
2754 v = (tsc_pg_val - clock->cycle_last) &
2755 clock->mask;
2756 } else {
2757
2758 *mode = VDSO_CLOCKMODE_NONE;
2759 }
2760 break;
2761 case VDSO_CLOCKMODE_TSC:
2762 *mode = VDSO_CLOCKMODE_TSC;
2763 *tsc_timestamp = read_tsc();
2764 v = (*tsc_timestamp - clock->cycle_last) &
2765 clock->mask;
2766 break;
2767 default:
2768 *mode = VDSO_CLOCKMODE_NONE;
2769 }
2770
2771 if (*mode == VDSO_CLOCKMODE_NONE)
2772 *tsc_timestamp = v = 0;
2773
2774 return v * clock->mult;
2775 }
2776
2777 static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2778 {
2779 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2780 unsigned long seq;
2781 int mode;
2782 u64 ns;
2783
2784 do {
2785 seq = read_seqcount_begin(>od->seq);
2786 ns = gtod->raw_clock.base_cycles;
2787 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2788 ns >>= gtod->raw_clock.shift;
2789 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2790 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2791 *t = ns;
2792
2793 return mode;
2794 }
2795
2796 static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2797 {
2798 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2799 unsigned long seq;
2800 int mode;
2801 u64 ns;
2802
2803 do {
2804 seq = read_seqcount_begin(>od->seq);
2805 ts->tv_sec = gtod->wall_time_sec;
2806 ns = gtod->clock.base_cycles;
2807 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2808 ns >>= gtod->clock.shift;
2809 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2810
2811 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2812 ts->tv_nsec = ns;
2813
2814 return mode;
2815 }
2816
2817
2818 static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2819 {
2820
2821 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2822 return false;
2823
2824 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2825 tsc_timestamp));
2826 }
2827
2828
2829 static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2830 u64 *tsc_timestamp)
2831 {
2832
2833 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2834 return false;
2835
2836 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2837 }
2838 #endif
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881 static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2882 {
2883 #ifdef CONFIG_X86_64
2884 struct kvm_arch *ka = &kvm->arch;
2885 int vclock_mode;
2886 bool host_tsc_clocksource, vcpus_matched;
2887
2888 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2889 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2890 atomic_read(&kvm->online_vcpus));
2891
2892
2893
2894
2895
2896 host_tsc_clocksource = kvm_get_time_and_clockread(
2897 &ka->master_kernel_ns,
2898 &ka->master_cycle_now);
2899
2900 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2901 && !ka->backwards_tsc_observed
2902 && !ka->boot_vcpu_runs_old_kvmclock;
2903
2904 if (ka->use_master_clock)
2905 atomic_set(&kvm_guest_has_master_clock, 1);
2906
2907 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2908 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2909 vcpus_matched);
2910 #endif
2911 }
2912
2913 static void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2914 {
2915 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2916 }
2917
2918 static void __kvm_start_pvclock_update(struct kvm *kvm)
2919 {
2920 raw_spin_lock_irq(&kvm->arch.tsc_write_lock);
2921 write_seqcount_begin(&kvm->arch.pvclock_sc);
2922 }
2923
2924 static void kvm_start_pvclock_update(struct kvm *kvm)
2925 {
2926 kvm_make_mclock_inprogress_request(kvm);
2927
2928
2929 __kvm_start_pvclock_update(kvm);
2930 }
2931
2932 static void kvm_end_pvclock_update(struct kvm *kvm)
2933 {
2934 struct kvm_arch *ka = &kvm->arch;
2935 struct kvm_vcpu *vcpu;
2936 unsigned long i;
2937
2938 write_seqcount_end(&ka->pvclock_sc);
2939 raw_spin_unlock_irq(&ka->tsc_write_lock);
2940 kvm_for_each_vcpu(i, vcpu, kvm)
2941 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2942
2943
2944 kvm_for_each_vcpu(i, vcpu, kvm)
2945 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2946 }
2947
2948 static void kvm_update_masterclock(struct kvm *kvm)
2949 {
2950 kvm_hv_request_tsc_page_update(kvm);
2951 kvm_start_pvclock_update(kvm);
2952 pvclock_update_vm_gtod_copy(kvm);
2953 kvm_end_pvclock_update(kvm);
2954 }
2955
2956
2957 static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2958 {
2959 struct kvm_arch *ka = &kvm->arch;
2960 struct pvclock_vcpu_time_info hv_clock;
2961
2962
2963 get_cpu();
2964
2965 data->flags = 0;
2966 if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
2967 #ifdef CONFIG_X86_64
2968 struct timespec64 ts;
2969
2970 if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
2971 data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
2972 data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
2973 } else
2974 #endif
2975 data->host_tsc = rdtsc();
2976
2977 data->flags |= KVM_CLOCK_TSC_STABLE;
2978 hv_clock.tsc_timestamp = ka->master_cycle_now;
2979 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2980 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2981 &hv_clock.tsc_shift,
2982 &hv_clock.tsc_to_system_mul);
2983 data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
2984 } else {
2985 data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
2986 }
2987
2988 put_cpu();
2989 }
2990
2991 static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2992 {
2993 struct kvm_arch *ka = &kvm->arch;
2994 unsigned seq;
2995
2996 do {
2997 seq = read_seqcount_begin(&ka->pvclock_sc);
2998 __get_kvmclock(kvm, data);
2999 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
3000 }
3001
3002 u64 get_kvmclock_ns(struct kvm *kvm)
3003 {
3004 struct kvm_clock_data data;
3005
3006 get_kvmclock(kvm, &data);
3007 return data.clock;
3008 }
3009
3010 static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
3011 struct gfn_to_pfn_cache *gpc,
3012 unsigned int offset)
3013 {
3014 struct kvm_vcpu_arch *vcpu = &v->arch;
3015 struct pvclock_vcpu_time_info *guest_hv_clock;
3016 unsigned long flags;
3017
3018 read_lock_irqsave(&gpc->lock, flags);
3019 while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
3020 offset + sizeof(*guest_hv_clock))) {
3021 read_unlock_irqrestore(&gpc->lock, flags);
3022
3023 if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
3024 offset + sizeof(*guest_hv_clock)))
3025 return;
3026
3027 read_lock_irqsave(&gpc->lock, flags);
3028 }
3029
3030 guest_hv_clock = (void *)(gpc->khva + offset);
3031
3032
3033
3034
3035
3036
3037
3038
3039 guest_hv_clock->version = vcpu->hv_clock.version = (guest_hv_clock->version + 1) | 1;
3040 smp_wmb();
3041
3042
3043 vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
3044
3045 if (vcpu->pvclock_set_guest_stopped_request) {
3046 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
3047 vcpu->pvclock_set_guest_stopped_request = false;
3048 }
3049
3050 memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
3051 smp_wmb();
3052
3053 guest_hv_clock->version = ++vcpu->hv_clock.version;
3054
3055 mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
3056 read_unlock_irqrestore(&gpc->lock, flags);
3057
3058 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
3059 }
3060
3061 static int kvm_guest_time_update(struct kvm_vcpu *v)
3062 {
3063 unsigned long flags, tgt_tsc_khz;
3064 unsigned seq;
3065 struct kvm_vcpu_arch *vcpu = &v->arch;
3066 struct kvm_arch *ka = &v->kvm->arch;
3067 s64 kernel_ns;
3068 u64 tsc_timestamp, host_tsc;
3069 u8 pvclock_flags;
3070 bool use_master_clock;
3071
3072 kernel_ns = 0;
3073 host_tsc = 0;
3074
3075
3076
3077
3078
3079 do {
3080 seq = read_seqcount_begin(&ka->pvclock_sc);
3081 use_master_clock = ka->use_master_clock;
3082 if (use_master_clock) {
3083 host_tsc = ka->master_cycle_now;
3084 kernel_ns = ka->master_kernel_ns;
3085 }
3086 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
3087
3088
3089 local_irq_save(flags);
3090 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
3091 if (unlikely(tgt_tsc_khz == 0)) {
3092 local_irq_restore(flags);
3093 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3094 return 1;
3095 }
3096 if (!use_master_clock) {
3097 host_tsc = rdtsc();
3098 kernel_ns = get_kvmclock_base_ns();
3099 }
3100
3101 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113 if (vcpu->tsc_catchup) {
3114 u64 tsc = compute_guest_tsc(v, kernel_ns);
3115 if (tsc > tsc_timestamp) {
3116 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
3117 tsc_timestamp = tsc;
3118 }
3119 }
3120
3121 local_irq_restore(flags);
3122
3123
3124
3125 if (kvm_caps.has_tsc_control)
3126 tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
3127 v->arch.l1_tsc_scaling_ratio);
3128
3129 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
3130 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
3131 &vcpu->hv_clock.tsc_shift,
3132 &vcpu->hv_clock.tsc_to_system_mul);
3133 vcpu->hw_tsc_khz = tgt_tsc_khz;
3134 }
3135
3136 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
3137 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
3138 vcpu->last_guest_tsc = tsc_timestamp;
3139
3140
3141 pvclock_flags = 0;
3142 if (use_master_clock)
3143 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
3144
3145 vcpu->hv_clock.flags = pvclock_flags;
3146
3147 if (vcpu->pv_time.active)
3148 kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
3149 if (vcpu->xen.vcpu_info_cache.active)
3150 kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
3151 offsetof(struct compat_vcpu_info, time));
3152 if (vcpu->xen.vcpu_time_info_cache.active)
3153 kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
3154 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
3155 return 0;
3156 }
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172 #define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
3173
3174 static void kvmclock_update_fn(struct work_struct *work)
3175 {
3176 unsigned long i;
3177 struct delayed_work *dwork = to_delayed_work(work);
3178 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3179 kvmclock_update_work);
3180 struct kvm *kvm = container_of(ka, struct kvm, arch);
3181 struct kvm_vcpu *vcpu;
3182
3183 kvm_for_each_vcpu(i, vcpu, kvm) {
3184 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3185 kvm_vcpu_kick(vcpu);
3186 }
3187 }
3188
3189 static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
3190 {
3191 struct kvm *kvm = v->kvm;
3192
3193 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3194 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
3195 KVMCLOCK_UPDATE_DELAY);
3196 }
3197
3198 #define KVMCLOCK_SYNC_PERIOD (300 * HZ)
3199
3200 static void kvmclock_sync_fn(struct work_struct *work)
3201 {
3202 struct delayed_work *dwork = to_delayed_work(work);
3203 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3204 kvmclock_sync_work);
3205 struct kvm *kvm = container_of(ka, struct kvm, arch);
3206
3207 if (!kvmclock_periodic_sync)
3208 return;
3209
3210 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
3211 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
3212 KVMCLOCK_SYNC_PERIOD);
3213 }
3214
3215
3216 static bool is_mci_control_msr(u32 msr)
3217 {
3218 return (msr & 3) == 0;
3219 }
3220 static bool is_mci_status_msr(u32 msr)
3221 {
3222 return (msr & 3) == 1;
3223 }
3224
3225
3226
3227
3228 static bool can_set_mci_status(struct kvm_vcpu *vcpu)
3229 {
3230
3231 if (guest_cpuid_is_amd_or_hygon(vcpu))
3232 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
3233
3234 return false;
3235 }
3236
3237 static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3238 {
3239 u64 mcg_cap = vcpu->arch.mcg_cap;
3240 unsigned bank_num = mcg_cap & 0xff;
3241 u32 msr = msr_info->index;
3242 u64 data = msr_info->data;
3243 u32 offset, last_msr;
3244
3245 switch (msr) {
3246 case MSR_IA32_MCG_STATUS:
3247 vcpu->arch.mcg_status = data;
3248 break;
3249 case MSR_IA32_MCG_CTL:
3250 if (!(mcg_cap & MCG_CTL_P) &&
3251 (data || !msr_info->host_initiated))
3252 return 1;
3253 if (data != 0 && data != ~(u64)0)
3254 return 1;
3255 vcpu->arch.mcg_ctl = data;
3256 break;
3257 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
3258 last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
3259 if (msr > last_msr)
3260 return 1;
3261
3262 if (!(mcg_cap & MCG_CMCI_P) && (data || !msr_info->host_initiated))
3263 return 1;
3264
3265 if (data & ~(MCI_CTL2_CMCI_EN | MCI_CTL2_CMCI_THRESHOLD_MASK))
3266 return 1;
3267 offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
3268 last_msr + 1 - MSR_IA32_MC0_CTL2);
3269 vcpu->arch.mci_ctl2_banks[offset] = data;
3270 break;
3271 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3272 last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
3273 if (msr > last_msr)
3274 return 1;
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286 if (is_mci_control_msr(msr) &&
3287 data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
3288 return 1;
3289
3290
3291
3292
3293
3294
3295 if (!msr_info->host_initiated && is_mci_status_msr(msr) &&
3296 data != 0 && !can_set_mci_status(vcpu))
3297 return 1;
3298
3299 offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
3300 last_msr + 1 - MSR_IA32_MC0_CTL);
3301 vcpu->arch.mce_banks[offset] = data;
3302 break;
3303 default:
3304 return 1;
3305 }
3306 return 0;
3307 }
3308
3309 static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
3310 {
3311 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
3312
3313 return (vcpu->arch.apf.msr_en_val & mask) == mask;
3314 }
3315
3316 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
3317 {
3318 gpa_t gpa = data & ~0x3f;
3319
3320
3321 if (data & 0x30)
3322 return 1;
3323
3324 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
3325 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
3326 return 1;
3327
3328 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
3329 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
3330 return 1;
3331
3332 if (!lapic_in_kernel(vcpu))
3333 return data ? 1 : 0;
3334
3335 vcpu->arch.apf.msr_en_val = data;
3336
3337 if (!kvm_pv_async_pf_enabled(vcpu)) {
3338 kvm_clear_async_pf_completion_queue(vcpu);
3339 kvm_async_pf_hash_reset(vcpu);
3340 return 0;
3341 }
3342
3343 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
3344 sizeof(u64)))
3345 return 1;
3346
3347 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
3348 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
3349
3350 kvm_async_pf_wakeup_all(vcpu);
3351
3352 return 0;
3353 }
3354
3355 static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
3356 {
3357
3358 if (data >> 8)
3359 return 1;
3360
3361 if (!lapic_in_kernel(vcpu))
3362 return 1;
3363
3364 vcpu->arch.apf.msr_int_val = data;
3365
3366 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
3367
3368 return 0;
3369 }
3370
3371 static void kvmclock_reset(struct kvm_vcpu *vcpu)
3372 {
3373 kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
3374 vcpu->arch.time = 0;
3375 }
3376
3377 static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
3378 {
3379 ++vcpu->stat.tlb_flush;
3380 static_call(kvm_x86_flush_tlb_all)(vcpu);
3381 }
3382
3383 static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
3384 {
3385 ++vcpu->stat.tlb_flush;
3386
3387 if (!tdp_enabled) {
3388
3389
3390
3391
3392
3393
3394 kvm_mmu_sync_roots(vcpu);
3395 kvm_mmu_sync_prev_roots(vcpu);
3396 }
3397
3398 static_call(kvm_x86_flush_tlb_guest)(vcpu);
3399 }
3400
3401
3402 static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
3403 {
3404 ++vcpu->stat.tlb_flush;
3405 static_call(kvm_x86_flush_tlb_current)(vcpu);
3406 }
3407
3408
3409
3410
3411
3412
3413
3414 void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
3415 {
3416 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3417 kvm_vcpu_flush_tlb_current(vcpu);
3418
3419 if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
3420 kvm_vcpu_flush_tlb_guest(vcpu);
3421 }
3422 EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
3423
3424 static void record_steal_time(struct kvm_vcpu *vcpu)
3425 {
3426 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
3427 struct kvm_steal_time __user *st;
3428 struct kvm_memslots *slots;
3429 gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
3430 u64 steal;
3431 u32 version;
3432
3433 if (kvm_xen_msr_enabled(vcpu->kvm)) {
3434 kvm_xen_runstate_set_running(vcpu);
3435 return;
3436 }
3437
3438 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3439 return;
3440
3441 if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
3442 return;
3443
3444 slots = kvm_memslots(vcpu->kvm);
3445
3446 if (unlikely(slots->generation != ghc->generation ||
3447 gpa != ghc->gpa ||
3448 kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
3449
3450 BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
3451
3452 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
3453 kvm_is_error_hva(ghc->hva) || !ghc->memslot)
3454 return;
3455 }
3456
3457 st = (struct kvm_steal_time __user *)ghc->hva;
3458
3459
3460
3461
3462 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3463 u8 st_preempted = 0;
3464 int err = -EFAULT;
3465
3466 if (!user_access_begin(st, sizeof(*st)))
3467 return;
3468
3469 asm volatile("1: xchgb %0, %2\n"
3470 "xor %1, %1\n"
3471 "2:\n"
3472 _ASM_EXTABLE_UA(1b, 2b)
3473 : "+q" (st_preempted),
3474 "+&r" (err),
3475 "+m" (st->preempted));
3476 if (err)
3477 goto out;
3478
3479 user_access_end();
3480
3481 vcpu->arch.st.preempted = 0;
3482
3483 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3484 st_preempted & KVM_VCPU_FLUSH_TLB);
3485 if (st_preempted & KVM_VCPU_FLUSH_TLB)
3486 kvm_vcpu_flush_tlb_guest(vcpu);
3487
3488 if (!user_access_begin(st, sizeof(*st)))
3489 goto dirty;
3490 } else {
3491 if (!user_access_begin(st, sizeof(*st)))
3492 return;
3493
3494 unsafe_put_user(0, &st->preempted, out);
3495 vcpu->arch.st.preempted = 0;
3496 }
3497
3498 unsafe_get_user(version, &st->version, out);
3499 if (version & 1)
3500 version += 1;
3501
3502 version += 1;
3503 unsafe_put_user(version, &st->version, out);
3504
3505 smp_wmb();
3506
3507 unsafe_get_user(steal, &st->steal, out);
3508 steal += current->sched_info.run_delay -
3509 vcpu->arch.st.last_steal;
3510 vcpu->arch.st.last_steal = current->sched_info.run_delay;
3511 unsafe_put_user(steal, &st->steal, out);
3512
3513 version += 1;
3514 unsafe_put_user(version, &st->version, out);
3515
3516 out:
3517 user_access_end();
3518 dirty:
3519 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
3520 }
3521
3522 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3523 {
3524 bool pr = false;
3525 u32 msr = msr_info->index;
3526 u64 data = msr_info->data;
3527
3528 if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
3529 return kvm_xen_write_hypercall_page(vcpu, data);
3530
3531 switch (msr) {
3532 case MSR_AMD64_NB_CFG:
3533 case MSR_IA32_UCODE_WRITE:
3534 case MSR_VM_HSAVE_PA:
3535 case MSR_AMD64_PATCH_LOADER:
3536 case MSR_AMD64_BU_CFG2:
3537 case MSR_AMD64_DC_CFG:
3538 case MSR_F15H_EX_CFG:
3539 break;
3540
3541 case MSR_IA32_UCODE_REV:
3542 if (msr_info->host_initiated)
3543 vcpu->arch.microcode_version = data;
3544 break;
3545 case MSR_IA32_ARCH_CAPABILITIES:
3546 if (!msr_info->host_initiated)
3547 return 1;
3548 vcpu->arch.arch_capabilities = data;
3549 break;
3550 case MSR_IA32_PERF_CAPABILITIES: {
3551 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3552
3553 if (!msr_info->host_initiated)
3554 return 1;
3555 if (kvm_get_msr_feature(&msr_ent))
3556 return 1;
3557 if (data & ~msr_ent.data)
3558 return 1;
3559
3560 vcpu->arch.perf_capabilities = data;
3561 kvm_pmu_refresh(vcpu);
3562 return 0;
3563 }
3564 case MSR_EFER:
3565 return set_efer(vcpu, msr_info);
3566 case MSR_K7_HWCR:
3567 data &= ~(u64)0x40;
3568 data &= ~(u64)0x100;
3569 data &= ~(u64)0x8;
3570
3571
3572 if (data == BIT_ULL(18)) {
3573 vcpu->arch.msr_hwcr = data;
3574 } else if (data != 0) {
3575 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3576 data);
3577 return 1;
3578 }
3579 break;
3580 case MSR_FAM10H_MMIO_CONF_BASE:
3581 if (data != 0) {
3582 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3583 "0x%llx\n", data);
3584 return 1;
3585 }
3586 break;
3587 case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
3588 case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
3589 return kvm_mtrr_set_msr(vcpu, msr, data);
3590 case MSR_IA32_APICBASE:
3591 return kvm_set_apic_base(vcpu, msr_info);
3592 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3593 return kvm_x2apic_msr_write(vcpu, msr, data);
3594 case MSR_IA32_TSC_DEADLINE:
3595 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3596 break;
3597 case MSR_IA32_TSC_ADJUST:
3598 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3599 if (!msr_info->host_initiated) {
3600 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3601 adjust_tsc_offset_guest(vcpu, adj);
3602
3603
3604
3605 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3606 }
3607 vcpu->arch.ia32_tsc_adjust_msr = data;
3608 }
3609 break;
3610 case MSR_IA32_MISC_ENABLE: {
3611 u64 old_val = vcpu->arch.ia32_misc_enable_msr;
3612
3613 if (!msr_info->host_initiated) {
3614
3615 if ((old_val ^ data) & MSR_IA32_MISC_ENABLE_PMU_RO_MASK)
3616 return 1;
3617
3618
3619 data = data & ~MSR_IA32_MISC_ENABLE_EMON;
3620 data |= old_val & MSR_IA32_MISC_ENABLE_EMON;
3621 }
3622
3623 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3624 ((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3625 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3626 return 1;
3627 vcpu->arch.ia32_misc_enable_msr = data;
3628 kvm_update_cpuid_runtime(vcpu);
3629 } else {
3630 vcpu->arch.ia32_misc_enable_msr = data;
3631 }
3632 break;
3633 }
3634 case MSR_IA32_SMBASE:
3635 if (!msr_info->host_initiated)
3636 return 1;
3637 vcpu->arch.smbase = data;
3638 break;
3639 case MSR_IA32_POWER_CTL:
3640 vcpu->arch.msr_ia32_power_ctl = data;
3641 break;
3642 case MSR_IA32_TSC:
3643 if (msr_info->host_initiated) {
3644 kvm_synchronize_tsc(vcpu, data);
3645 } else {
3646 u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3647 adjust_tsc_offset_guest(vcpu, adj);
3648 vcpu->arch.ia32_tsc_adjust_msr += adj;
3649 }
3650 break;
3651 case MSR_IA32_XSS:
3652 if (!msr_info->host_initiated &&
3653 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3654 return 1;
3655
3656
3657
3658
3659
3660 if (data & ~kvm_caps.supported_xss)
3661 return 1;
3662 vcpu->arch.ia32_xss = data;
3663 kvm_update_cpuid_runtime(vcpu);
3664 break;
3665 case MSR_SMI_COUNT:
3666 if (!msr_info->host_initiated)
3667 return 1;
3668 vcpu->arch.smi_count = data;
3669 break;
3670 case MSR_KVM_WALL_CLOCK_NEW:
3671 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3672 return 1;
3673
3674 vcpu->kvm->arch.wall_clock = data;
3675 kvm_write_wall_clock(vcpu->kvm, data, 0);
3676 break;
3677 case MSR_KVM_WALL_CLOCK:
3678 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3679 return 1;
3680
3681 vcpu->kvm->arch.wall_clock = data;
3682 kvm_write_wall_clock(vcpu->kvm, data, 0);
3683 break;
3684 case MSR_KVM_SYSTEM_TIME_NEW:
3685 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3686 return 1;
3687
3688 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3689 break;
3690 case MSR_KVM_SYSTEM_TIME:
3691 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3692 return 1;
3693
3694 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3695 break;
3696 case MSR_KVM_ASYNC_PF_EN:
3697 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3698 return 1;
3699
3700 if (kvm_pv_enable_async_pf(vcpu, data))
3701 return 1;
3702 break;
3703 case MSR_KVM_ASYNC_PF_INT:
3704 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3705 return 1;
3706
3707 if (kvm_pv_enable_async_pf_int(vcpu, data))
3708 return 1;
3709 break;
3710 case MSR_KVM_ASYNC_PF_ACK:
3711 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3712 return 1;
3713 if (data & 0x1) {
3714 vcpu->arch.apf.pageready_pending = false;
3715 kvm_check_async_pf_completion(vcpu);
3716 }
3717 break;
3718 case MSR_KVM_STEAL_TIME:
3719 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3720 return 1;
3721
3722 if (unlikely(!sched_info_on()))
3723 return 1;
3724
3725 if (data & KVM_STEAL_RESERVED_MASK)
3726 return 1;
3727
3728 vcpu->arch.st.msr_val = data;
3729
3730 if (!(data & KVM_MSR_ENABLED))
3731 break;
3732
3733 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3734
3735 break;
3736 case MSR_KVM_PV_EOI_EN:
3737 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3738 return 1;
3739
3740 if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
3741 return 1;
3742 break;
3743
3744 case MSR_KVM_POLL_CONTROL:
3745 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3746 return 1;
3747
3748
3749 if (data & (-1ULL << 1))
3750 return 1;
3751
3752 vcpu->arch.msr_kvm_poll_control = data;
3753 break;
3754
3755 case MSR_IA32_MCG_CTL:
3756 case MSR_IA32_MCG_STATUS:
3757 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3758 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
3759 return set_msr_mce(vcpu, msr_info);
3760
3761 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3762 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3763 pr = true;
3764 fallthrough;
3765 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3766 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3767 if (kvm_pmu_is_valid_msr(vcpu, msr))
3768 return kvm_pmu_set_msr(vcpu, msr_info);
3769
3770 if (pr || data != 0)
3771 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3772 "0x%x data 0x%llx\n", msr, data);
3773 break;
3774 case MSR_K7_CLK_CTL:
3775
3776
3777
3778
3779
3780
3781
3782
3783 break;
3784 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3785 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3786 case HV_X64_MSR_SYNDBG_OPTIONS:
3787 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3788 case HV_X64_MSR_CRASH_CTL:
3789 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3790 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3791 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3792 case HV_X64_MSR_TSC_EMULATION_STATUS:
3793 return kvm_hv_set_msr_common(vcpu, msr, data,
3794 msr_info->host_initiated);
3795 case MSR_IA32_BBL_CR_CTL3:
3796
3797
3798
3799 if (report_ignored_msrs)
3800 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3801 msr, data);
3802 break;
3803 case MSR_AMD64_OSVW_ID_LENGTH:
3804 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3805 return 1;
3806 vcpu->arch.osvw.length = data;
3807 break;
3808 case MSR_AMD64_OSVW_STATUS:
3809 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3810 return 1;
3811 vcpu->arch.osvw.status = data;
3812 break;
3813 case MSR_PLATFORM_INFO:
3814 if (!msr_info->host_initiated ||
3815 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3816 cpuid_fault_enabled(vcpu)))
3817 return 1;
3818 vcpu->arch.msr_platform_info = data;
3819 break;
3820 case MSR_MISC_FEATURES_ENABLES:
3821 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3822 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3823 !supports_cpuid_fault(vcpu)))
3824 return 1;
3825 vcpu->arch.msr_misc_features_enables = data;
3826 break;
3827 #ifdef CONFIG_X86_64
3828 case MSR_IA32_XFD:
3829 if (!msr_info->host_initiated &&
3830 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
3831 return 1;
3832
3833 if (data & ~kvm_guest_supported_xfd(vcpu))
3834 return 1;
3835
3836 fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
3837 break;
3838 case MSR_IA32_XFD_ERR:
3839 if (!msr_info->host_initiated &&
3840 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
3841 return 1;
3842
3843 if (data & ~kvm_guest_supported_xfd(vcpu))
3844 return 1;
3845
3846 vcpu->arch.guest_fpu.xfd_err = data;
3847 break;
3848 #endif
3849 case MSR_IA32_PEBS_ENABLE:
3850 case MSR_IA32_DS_AREA:
3851 case MSR_PEBS_DATA_CFG:
3852 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3853 if (kvm_pmu_is_valid_msr(vcpu, msr))
3854 return kvm_pmu_set_msr(vcpu, msr_info);
3855
3856
3857
3858
3859 return !msr_info->host_initiated || data;
3860 default:
3861 if (kvm_pmu_is_valid_msr(vcpu, msr))
3862 return kvm_pmu_set_msr(vcpu, msr_info);
3863 return KVM_MSR_RET_INVALID;
3864 }
3865 return 0;
3866 }
3867 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3868
3869 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3870 {
3871 u64 data;
3872 u64 mcg_cap = vcpu->arch.mcg_cap;
3873 unsigned bank_num = mcg_cap & 0xff;
3874 u32 offset, last_msr;
3875
3876 switch (msr) {
3877 case MSR_IA32_P5_MC_ADDR:
3878 case MSR_IA32_P5_MC_TYPE:
3879 data = 0;
3880 break;
3881 case MSR_IA32_MCG_CAP:
3882 data = vcpu->arch.mcg_cap;
3883 break;
3884 case MSR_IA32_MCG_CTL:
3885 if (!(mcg_cap & MCG_CTL_P) && !host)
3886 return 1;
3887 data = vcpu->arch.mcg_ctl;
3888 break;
3889 case MSR_IA32_MCG_STATUS:
3890 data = vcpu->arch.mcg_status;
3891 break;
3892 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
3893 last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
3894 if (msr > last_msr)
3895 return 1;
3896
3897 if (!(mcg_cap & MCG_CMCI_P) && !host)
3898 return 1;
3899 offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
3900 last_msr + 1 - MSR_IA32_MC0_CTL2);
3901 data = vcpu->arch.mci_ctl2_banks[offset];
3902 break;
3903 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3904 last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
3905 if (msr > last_msr)
3906 return 1;
3907
3908 offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
3909 last_msr + 1 - MSR_IA32_MC0_CTL);
3910 data = vcpu->arch.mce_banks[offset];
3911 break;
3912 default:
3913 return 1;
3914 }
3915 *pdata = data;
3916 return 0;
3917 }
3918
3919 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3920 {
3921 switch (msr_info->index) {
3922 case MSR_IA32_PLATFORM_ID:
3923 case MSR_IA32_EBL_CR_POWERON:
3924 case MSR_IA32_LASTBRANCHFROMIP:
3925 case MSR_IA32_LASTBRANCHTOIP:
3926 case MSR_IA32_LASTINTFROMIP:
3927 case MSR_IA32_LASTINTTOIP:
3928 case MSR_AMD64_SYSCFG:
3929 case MSR_K8_TSEG_ADDR:
3930 case MSR_K8_TSEG_MASK:
3931 case MSR_VM_HSAVE_PA:
3932 case MSR_K8_INT_PENDING_MSG:
3933 case MSR_AMD64_NB_CFG:
3934 case MSR_FAM10H_MMIO_CONF_BASE:
3935 case MSR_AMD64_BU_CFG2:
3936 case MSR_IA32_PERF_CTL:
3937 case MSR_AMD64_DC_CFG:
3938 case MSR_F15H_EX_CFG:
3939
3940
3941
3942
3943
3944
3945 case MSR_RAPL_POWER_UNIT:
3946 case MSR_PP0_ENERGY_STATUS:
3947 case MSR_PP1_ENERGY_STATUS:
3948 case MSR_PKG_ENERGY_STATUS:
3949 case MSR_DRAM_ENERGY_STATUS:
3950 msr_info->data = 0;
3951 break;
3952 case MSR_IA32_PEBS_ENABLE:
3953 case MSR_IA32_DS_AREA:
3954 case MSR_PEBS_DATA_CFG:
3955 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3956 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3957 return kvm_pmu_get_msr(vcpu, msr_info);
3958
3959
3960
3961
3962 if (!msr_info->host_initiated)
3963 return 1;
3964 msr_info->data = 0;
3965 break;
3966 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3967 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3968 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3969 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3970 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3971 return kvm_pmu_get_msr(vcpu, msr_info);
3972 msr_info->data = 0;
3973 break;
3974 case MSR_IA32_UCODE_REV:
3975 msr_info->data = vcpu->arch.microcode_version;
3976 break;
3977 case MSR_IA32_ARCH_CAPABILITIES:
3978 if (!msr_info->host_initiated &&
3979 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3980 return 1;
3981 msr_info->data = vcpu->arch.arch_capabilities;
3982 break;
3983 case MSR_IA32_PERF_CAPABILITIES:
3984 if (!msr_info->host_initiated &&
3985 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3986 return 1;
3987 msr_info->data = vcpu->arch.perf_capabilities;
3988 break;
3989 case MSR_IA32_POWER_CTL:
3990 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3991 break;
3992 case MSR_IA32_TSC: {
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002 u64 offset, ratio;
4003
4004 if (msr_info->host_initiated) {
4005 offset = vcpu->arch.l1_tsc_offset;
4006 ratio = vcpu->arch.l1_tsc_scaling_ratio;
4007 } else {
4008 offset = vcpu->arch.tsc_offset;
4009 ratio = vcpu->arch.tsc_scaling_ratio;
4010 }
4011
4012 msr_info->data = kvm_scale_tsc(rdtsc(), ratio) + offset;
4013 break;
4014 }
4015 case MSR_MTRRcap:
4016 case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
4017 case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
4018 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
4019 case 0xcd:
4020 msr_info->data = 3;
4021 break;
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033 case MSR_EBC_FREQUENCY_ID:
4034 msr_info->data = 1 << 24;
4035 break;
4036 case MSR_IA32_APICBASE:
4037 msr_info->data = kvm_get_apic_base(vcpu);
4038 break;
4039 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
4040 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
4041 case MSR_IA32_TSC_DEADLINE:
4042 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
4043 break;
4044 case MSR_IA32_TSC_ADJUST:
4045 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
4046 break;
4047 case MSR_IA32_MISC_ENABLE:
4048 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
4049 break;
4050 case MSR_IA32_SMBASE:
4051 if (!msr_info->host_initiated)
4052 return 1;
4053 msr_info->data = vcpu->arch.smbase;
4054 break;
4055 case MSR_SMI_COUNT:
4056 msr_info->data = vcpu->arch.smi_count;
4057 break;
4058 case MSR_IA32_PERF_STATUS:
4059
4060 msr_info->data = 1000ULL;
4061
4062 msr_info->data |= (((uint64_t)4ULL) << 40);
4063 break;
4064 case MSR_EFER:
4065 msr_info->data = vcpu->arch.efer;
4066 break;
4067 case MSR_KVM_WALL_CLOCK:
4068 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
4069 return 1;
4070
4071 msr_info->data = vcpu->kvm->arch.wall_clock;
4072 break;
4073 case MSR_KVM_WALL_CLOCK_NEW:
4074 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
4075 return 1;
4076
4077 msr_info->data = vcpu->kvm->arch.wall_clock;
4078 break;
4079 case MSR_KVM_SYSTEM_TIME:
4080 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
4081 return 1;
4082
4083 msr_info->data = vcpu->arch.time;
4084 break;
4085 case MSR_KVM_SYSTEM_TIME_NEW:
4086 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
4087 return 1;
4088
4089 msr_info->data = vcpu->arch.time;
4090 break;
4091 case MSR_KVM_ASYNC_PF_EN:
4092 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
4093 return 1;
4094
4095 msr_info->data = vcpu->arch.apf.msr_en_val;
4096 break;
4097 case MSR_KVM_ASYNC_PF_INT:
4098 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
4099 return 1;
4100
4101 msr_info->data = vcpu->arch.apf.msr_int_val;
4102 break;
4103 case MSR_KVM_ASYNC_PF_ACK:
4104 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
4105 return 1;
4106
4107 msr_info->data = 0;
4108 break;
4109 case MSR_KVM_STEAL_TIME:
4110 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
4111 return 1;
4112
4113 msr_info->data = vcpu->arch.st.msr_val;
4114 break;
4115 case MSR_KVM_PV_EOI_EN:
4116 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
4117 return 1;
4118
4119 msr_info->data = vcpu->arch.pv_eoi.msr_val;
4120 break;
4121 case MSR_KVM_POLL_CONTROL:
4122 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
4123 return 1;
4124
4125 msr_info->data = vcpu->arch.msr_kvm_poll_control;
4126 break;
4127 case MSR_IA32_P5_MC_ADDR:
4128 case MSR_IA32_P5_MC_TYPE:
4129 case MSR_IA32_MCG_CAP:
4130 case MSR_IA32_MCG_CTL:
4131 case MSR_IA32_MCG_STATUS:
4132 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
4133 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
4134 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
4135 msr_info->host_initiated);
4136 case MSR_IA32_XSS:
4137 if (!msr_info->host_initiated &&
4138 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
4139 return 1;
4140 msr_info->data = vcpu->arch.ia32_xss;
4141 break;
4142 case MSR_K7_CLK_CTL:
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152 msr_info->data = 0x20000000;
4153 break;
4154 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
4155 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
4156 case HV_X64_MSR_SYNDBG_OPTIONS:
4157 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
4158 case HV_X64_MSR_CRASH_CTL:
4159 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
4160 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
4161 case HV_X64_MSR_TSC_EMULATION_CONTROL:
4162 case HV_X64_MSR_TSC_EMULATION_STATUS:
4163 return kvm_hv_get_msr_common(vcpu,
4164 msr_info->index, &msr_info->data,
4165 msr_info->host_initiated);
4166 case MSR_IA32_BBL_CR_CTL3:
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177 msr_info->data = 0xbe702111;
4178 break;
4179 case MSR_AMD64_OSVW_ID_LENGTH:
4180 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
4181 return 1;
4182 msr_info->data = vcpu->arch.osvw.length;
4183 break;
4184 case MSR_AMD64_OSVW_STATUS:
4185 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
4186 return 1;
4187 msr_info->data = vcpu->arch.osvw.status;
4188 break;
4189 case MSR_PLATFORM_INFO:
4190 if (!msr_info->host_initiated &&
4191 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
4192 return 1;
4193 msr_info->data = vcpu->arch.msr_platform_info;
4194 break;
4195 case MSR_MISC_FEATURES_ENABLES:
4196 msr_info->data = vcpu->arch.msr_misc_features_enables;
4197 break;
4198 case MSR_K7_HWCR:
4199 msr_info->data = vcpu->arch.msr_hwcr;
4200 break;
4201 #ifdef CONFIG_X86_64
4202 case MSR_IA32_XFD:
4203 if (!msr_info->host_initiated &&
4204 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
4205 return 1;
4206
4207 msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
4208 break;
4209 case MSR_IA32_XFD_ERR:
4210 if (!msr_info->host_initiated &&
4211 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
4212 return 1;
4213
4214 msr_info->data = vcpu->arch.guest_fpu.xfd_err;
4215 break;
4216 #endif
4217 default:
4218 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
4219 return kvm_pmu_get_msr(vcpu, msr_info);
4220 return KVM_MSR_RET_INVALID;
4221 }
4222 return 0;
4223 }
4224 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
4225
4226
4227
4228
4229
4230
4231 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
4232 struct kvm_msr_entry *entries,
4233 int (*do_msr)(struct kvm_vcpu *vcpu,
4234 unsigned index, u64 *data))
4235 {
4236 int i;
4237
4238 for (i = 0; i < msrs->nmsrs; ++i)
4239 if (do_msr(vcpu, entries[i].index, &entries[i].data))
4240 break;
4241
4242 return i;
4243 }
4244
4245
4246
4247
4248
4249
4250 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
4251 int (*do_msr)(struct kvm_vcpu *vcpu,
4252 unsigned index, u64 *data),
4253 int writeback)
4254 {
4255 struct kvm_msrs msrs;
4256 struct kvm_msr_entry *entries;
4257 int r, n;
4258 unsigned size;
4259
4260 r = -EFAULT;
4261 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
4262 goto out;
4263
4264 r = -E2BIG;
4265 if (msrs.nmsrs >= MAX_IO_MSRS)
4266 goto out;
4267
4268 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
4269 entries = memdup_user(user_msrs->entries, size);
4270 if (IS_ERR(entries)) {
4271 r = PTR_ERR(entries);
4272 goto out;
4273 }
4274
4275 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
4276 if (r < 0)
4277 goto out_free;
4278
4279 r = -EFAULT;
4280 if (writeback && copy_to_user(user_msrs->entries, entries, size))
4281 goto out_free;
4282
4283 r = n;
4284
4285 out_free:
4286 kfree(entries);
4287 out:
4288 return r;
4289 }
4290
4291 static inline bool kvm_can_mwait_in_guest(void)
4292 {
4293 return boot_cpu_has(X86_FEATURE_MWAIT) &&
4294 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
4295 boot_cpu_has(X86_FEATURE_ARAT);
4296 }
4297
4298 static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
4299 struct kvm_cpuid2 __user *cpuid_arg)
4300 {
4301 struct kvm_cpuid2 cpuid;
4302 int r;
4303
4304 r = -EFAULT;
4305 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4306 return r;
4307
4308 r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4309 if (r)
4310 return r;
4311
4312 r = -EFAULT;
4313 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4314 return r;
4315
4316 return 0;
4317 }
4318
4319 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
4320 {
4321 int r = 0;
4322
4323 switch (ext) {
4324 case KVM_CAP_IRQCHIP:
4325 case KVM_CAP_HLT:
4326 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
4327 case KVM_CAP_SET_TSS_ADDR:
4328 case KVM_CAP_EXT_CPUID:
4329 case KVM_CAP_EXT_EMUL_CPUID:
4330 case KVM_CAP_CLOCKSOURCE:
4331 case KVM_CAP_PIT:
4332 case KVM_CAP_NOP_IO_DELAY:
4333 case KVM_CAP_MP_STATE:
4334 case KVM_CAP_SYNC_MMU:
4335 case KVM_CAP_USER_NMI:
4336 case KVM_CAP_REINJECT_CONTROL:
4337 case KVM_CAP_IRQ_INJECT_STATUS:
4338 case KVM_CAP_IOEVENTFD:
4339 case KVM_CAP_IOEVENTFD_NO_LENGTH:
4340 case KVM_CAP_PIT2:
4341 case KVM_CAP_PIT_STATE2:
4342 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
4343 case KVM_CAP_VCPU_EVENTS:
4344 case KVM_CAP_HYPERV:
4345 case KVM_CAP_HYPERV_VAPIC:
4346 case KVM_CAP_HYPERV_SPIN:
4347 case KVM_CAP_HYPERV_SYNIC:
4348 case KVM_CAP_HYPERV_SYNIC2:
4349 case KVM_CAP_HYPERV_VP_INDEX:
4350 case KVM_CAP_HYPERV_EVENTFD:
4351 case KVM_CAP_HYPERV_TLBFLUSH:
4352 case KVM_CAP_HYPERV_SEND_IPI:
4353 case KVM_CAP_HYPERV_CPUID:
4354 case KVM_CAP_HYPERV_ENFORCE_CPUID:
4355 case KVM_CAP_SYS_HYPERV_CPUID:
4356 case KVM_CAP_PCI_SEGMENT:
4357 case KVM_CAP_DEBUGREGS:
4358 case KVM_CAP_X86_ROBUST_SINGLESTEP:
4359 case KVM_CAP_XSAVE:
4360 case KVM_CAP_ASYNC_PF:
4361 case KVM_CAP_ASYNC_PF_INT:
4362 case KVM_CAP_GET_TSC_KHZ:
4363 case KVM_CAP_KVMCLOCK_CTRL:
4364 case KVM_CAP_READONLY_MEM:
4365 case KVM_CAP_HYPERV_TIME:
4366 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
4367 case KVM_CAP_TSC_DEADLINE_TIMER:
4368 case KVM_CAP_DISABLE_QUIRKS:
4369 case KVM_CAP_SET_BOOT_CPU_ID:
4370 case KVM_CAP_SPLIT_IRQCHIP:
4371 case KVM_CAP_IMMEDIATE_EXIT:
4372 case KVM_CAP_PMU_EVENT_FILTER:
4373 case KVM_CAP_GET_MSR_FEATURES:
4374 case KVM_CAP_MSR_PLATFORM_INFO:
4375 case KVM_CAP_EXCEPTION_PAYLOAD:
4376 case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
4377 case KVM_CAP_SET_GUEST_DEBUG:
4378 case KVM_CAP_LAST_CPU:
4379 case KVM_CAP_X86_USER_SPACE_MSR:
4380 case KVM_CAP_X86_MSR_FILTER:
4381 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4382 #ifdef CONFIG_X86_SGX_KVM
4383 case KVM_CAP_SGX_ATTRIBUTE:
4384 #endif
4385 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
4386 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
4387 case KVM_CAP_SREGS2:
4388 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
4389 case KVM_CAP_VCPU_ATTRIBUTES:
4390 case KVM_CAP_SYS_ATTRIBUTES:
4391 case KVM_CAP_VAPIC:
4392 case KVM_CAP_ENABLE_CAP:
4393 case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
4394 r = 1;
4395 break;
4396 case KVM_CAP_EXIT_HYPERCALL:
4397 r = KVM_EXIT_HYPERCALL_VALID_MASK;
4398 break;
4399 case KVM_CAP_SET_GUEST_DEBUG2:
4400 return KVM_GUESTDBG_VALID_MASK;
4401 #ifdef CONFIG_KVM_XEN
4402 case KVM_CAP_XEN_HVM:
4403 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
4404 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
4405 KVM_XEN_HVM_CONFIG_SHARED_INFO |
4406 KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
4407 KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
4408 if (sched_info_on())
4409 r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
4410 break;
4411 #endif
4412 case KVM_CAP_SYNC_REGS:
4413 r = KVM_SYNC_X86_VALID_FIELDS;
4414 break;
4415 case KVM_CAP_ADJUST_CLOCK:
4416 r = KVM_CLOCK_VALID_FLAGS;
4417 break;
4418 case KVM_CAP_X86_DISABLE_EXITS:
4419 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
4420 KVM_X86_DISABLE_EXITS_CSTATE;
4421 if(kvm_can_mwait_in_guest())
4422 r |= KVM_X86_DISABLE_EXITS_MWAIT;
4423 break;
4424 case KVM_CAP_X86_SMM:
4425
4426
4427
4428
4429
4430
4431
4432
4433 r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
4434 break;
4435 case KVM_CAP_NR_VCPUS:
4436 r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
4437 break;
4438 case KVM_CAP_MAX_VCPUS:
4439 r = KVM_MAX_VCPUS;
4440 break;
4441 case KVM_CAP_MAX_VCPU_ID:
4442 r = KVM_MAX_VCPU_IDS;
4443 break;
4444 case KVM_CAP_PV_MMU:
4445 r = 0;
4446 break;
4447 case KVM_CAP_MCE:
4448 r = KVM_MAX_MCE_BANKS;
4449 break;
4450 case KVM_CAP_XCRS:
4451 r = boot_cpu_has(X86_FEATURE_XSAVE);
4452 break;
4453 case KVM_CAP_TSC_CONTROL:
4454 case KVM_CAP_VM_TSC_CONTROL:
4455 r = kvm_caps.has_tsc_control;
4456 break;
4457 case KVM_CAP_X2APIC_API:
4458 r = KVM_X2APIC_API_VALID_FLAGS;
4459 break;
4460 case KVM_CAP_NESTED_STATE:
4461 r = kvm_x86_ops.nested_ops->get_state ?
4462 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
4463 break;
4464 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4465 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
4466 break;
4467 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4468 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
4469 break;
4470 case KVM_CAP_SMALLER_MAXPHYADDR:
4471 r = (int) allow_smaller_maxphyaddr;
4472 break;
4473 case KVM_CAP_STEAL_TIME:
4474 r = sched_info_on();
4475 break;
4476 case KVM_CAP_X86_BUS_LOCK_EXIT:
4477 if (kvm_caps.has_bus_lock_exit)
4478 r = KVM_BUS_LOCK_DETECTION_OFF |
4479 KVM_BUS_LOCK_DETECTION_EXIT;
4480 else
4481 r = 0;
4482 break;
4483 case KVM_CAP_XSAVE2: {
4484 u64 guest_perm = xstate_get_guest_group_perm();
4485
4486 r = xstate_required_size(kvm_caps.supported_xcr0 & guest_perm, false);
4487 if (r < sizeof(struct kvm_xsave))
4488 r = sizeof(struct kvm_xsave);
4489 break;
4490 }
4491 case KVM_CAP_PMU_CAPABILITY:
4492 r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0;
4493 break;
4494 case KVM_CAP_DISABLE_QUIRKS2:
4495 r = KVM_X86_VALID_QUIRKS;
4496 break;
4497 case KVM_CAP_X86_NOTIFY_VMEXIT:
4498 r = kvm_caps.has_notify_vmexit;
4499 break;
4500 default:
4501 break;
4502 }
4503 return r;
4504 }
4505
4506 static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
4507 {
4508 void __user *uaddr = (void __user*)(unsigned long)attr->addr;
4509
4510 if ((u64)(unsigned long)uaddr != attr->addr)
4511 return ERR_PTR_USR(-EFAULT);
4512 return uaddr;
4513 }
4514
4515 static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
4516 {
4517 u64 __user *uaddr = kvm_get_attr_addr(attr);
4518
4519 if (attr->group)
4520 return -ENXIO;
4521
4522 if (IS_ERR(uaddr))
4523 return PTR_ERR(uaddr);
4524
4525 switch (attr->attr) {
4526 case KVM_X86_XCOMP_GUEST_SUPP:
4527 if (put_user(kvm_caps.supported_xcr0, uaddr))
4528 return -EFAULT;
4529 return 0;
4530 default:
4531 return -ENXIO;
4532 break;
4533 }
4534 }
4535
4536 static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
4537 {
4538 if (attr->group)
4539 return -ENXIO;
4540
4541 switch (attr->attr) {
4542 case KVM_X86_XCOMP_GUEST_SUPP:
4543 return 0;
4544 default:
4545 return -ENXIO;
4546 }
4547 }
4548
4549 long kvm_arch_dev_ioctl(struct file *filp,
4550 unsigned int ioctl, unsigned long arg)
4551 {
4552 void __user *argp = (void __user *)arg;
4553 long r;
4554
4555 switch (ioctl) {
4556 case KVM_GET_MSR_INDEX_LIST: {
4557 struct kvm_msr_list __user *user_msr_list = argp;
4558 struct kvm_msr_list msr_list;
4559 unsigned n;
4560
4561 r = -EFAULT;
4562 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4563 goto out;
4564 n = msr_list.nmsrs;
4565 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
4566 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4567 goto out;
4568 r = -E2BIG;
4569 if (n < msr_list.nmsrs)
4570 goto out;
4571 r = -EFAULT;
4572 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
4573 num_msrs_to_save * sizeof(u32)))
4574 goto out;
4575 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
4576 &emulated_msrs,
4577 num_emulated_msrs * sizeof(u32)))
4578 goto out;
4579 r = 0;
4580 break;
4581 }
4582 case KVM_GET_SUPPORTED_CPUID:
4583 case KVM_GET_EMULATED_CPUID: {
4584 struct kvm_cpuid2 __user *cpuid_arg = argp;
4585 struct kvm_cpuid2 cpuid;
4586
4587 r = -EFAULT;
4588 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4589 goto out;
4590
4591 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
4592 ioctl);
4593 if (r)
4594 goto out;
4595
4596 r = -EFAULT;
4597 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4598 goto out;
4599 r = 0;
4600 break;
4601 }
4602 case KVM_X86_GET_MCE_CAP_SUPPORTED:
4603 r = -EFAULT;
4604 if (copy_to_user(argp, &kvm_caps.supported_mce_cap,
4605 sizeof(kvm_caps.supported_mce_cap)))
4606 goto out;
4607 r = 0;
4608 break;
4609 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
4610 struct kvm_msr_list __user *user_msr_list = argp;
4611 struct kvm_msr_list msr_list;
4612 unsigned int n;
4613
4614 r = -EFAULT;
4615 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4616 goto out;
4617 n = msr_list.nmsrs;
4618 msr_list.nmsrs = num_msr_based_features;
4619 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4620 goto out;
4621 r = -E2BIG;
4622 if (n < msr_list.nmsrs)
4623 goto out;
4624 r = -EFAULT;
4625 if (copy_to_user(user_msr_list->indices, &msr_based_features,
4626 num_msr_based_features * sizeof(u32)))
4627 goto out;
4628 r = 0;
4629 break;
4630 }
4631 case KVM_GET_MSRS:
4632 r = msr_io(NULL, argp, do_get_msr_feature, 1);
4633 break;
4634 case KVM_GET_SUPPORTED_HV_CPUID:
4635 r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
4636 break;
4637 case KVM_GET_DEVICE_ATTR: {
4638 struct kvm_device_attr attr;
4639 r = -EFAULT;
4640 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
4641 break;
4642 r = kvm_x86_dev_get_attr(&attr);
4643 break;
4644 }
4645 case KVM_HAS_DEVICE_ATTR: {
4646 struct kvm_device_attr attr;
4647 r = -EFAULT;
4648 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
4649 break;
4650 r = kvm_x86_dev_has_attr(&attr);
4651 break;
4652 }
4653 default:
4654 r = -EINVAL;
4655 break;
4656 }
4657 out:
4658 return r;
4659 }
4660
4661 static void wbinvd_ipi(void *garbage)
4662 {
4663 wbinvd();
4664 }
4665
4666 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
4667 {
4668 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
4669 }
4670
4671 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4672 {
4673
4674 if (need_emulate_wbinvd(vcpu)) {
4675 if (static_call(kvm_x86_has_wbinvd_exit)())
4676 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4677 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
4678 smp_call_function_single(vcpu->cpu,
4679 wbinvd_ipi, NULL, 1);
4680 }
4681
4682 static_call(kvm_x86_vcpu_load)(vcpu, cpu);
4683
4684
4685 vcpu->arch.host_pkru = read_pkru();
4686
4687
4688 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
4689 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
4690 vcpu->arch.tsc_offset_adjustment = 0;
4691 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4692 }
4693
4694 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
4695 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
4696 rdtsc() - vcpu->arch.last_host_tsc;
4697 if (tsc_delta < 0)
4698 mark_tsc_unstable("KVM discovered backwards TSC");
4699
4700 if (kvm_check_tsc_unstable()) {
4701 u64 offset = kvm_compute_l1_tsc_offset(vcpu,
4702 vcpu->arch.last_guest_tsc);
4703 kvm_vcpu_write_tsc_offset(vcpu, offset);
4704 vcpu->arch.tsc_catchup = 1;
4705 }
4706
4707 if (kvm_lapic_hv_timer_in_use(vcpu))
4708 kvm_lapic_restart_hv_timer(vcpu);
4709
4710
4711
4712
4713
4714 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
4715 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
4716 if (vcpu->cpu != cpu)
4717 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
4718 vcpu->cpu = cpu;
4719 }
4720
4721 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
4722 }
4723
4724 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4725 {
4726 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
4727 struct kvm_steal_time __user *st;
4728 struct kvm_memslots *slots;
4729 static const u8 preempted = KVM_VCPU_PREEMPTED;
4730 gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
4731
4732
4733
4734
4735
4736
4737
4738
4739 if (!vcpu->arch.at_instruction_boundary) {
4740 vcpu->stat.preemption_other++;
4741 return;
4742 }
4743
4744 vcpu->stat.preemption_reported++;
4745 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4746 return;
4747
4748 if (vcpu->arch.st.preempted)
4749 return;
4750
4751
4752 if (unlikely(current->mm != vcpu->kvm->mm))
4753 return;
4754
4755 slots = kvm_memslots(vcpu->kvm);
4756
4757 if (unlikely(slots->generation != ghc->generation ||
4758 gpa != ghc->gpa ||
4759 kvm_is_error_hva(ghc->hva) || !ghc->memslot))
4760 return;
4761
4762 st = (struct kvm_steal_time __user *)ghc->hva;
4763 BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
4764
4765 if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
4766 vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
4767
4768 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
4769 }
4770
4771 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4772 {
4773 int idx;
4774
4775 if (vcpu->preempted) {
4776 if (!vcpu->arch.guest_state_protected)
4777 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
4778
4779
4780
4781
4782
4783 idx = srcu_read_lock(&vcpu->kvm->srcu);
4784 if (kvm_xen_msr_enabled(vcpu->kvm))
4785 kvm_xen_runstate_set_preempted(vcpu);
4786 else
4787 kvm_steal_time_set_preempted(vcpu);
4788 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4789 }
4790
4791 static_call(kvm_x86_vcpu_put)(vcpu);
4792 vcpu->arch.last_host_tsc = rdtsc();
4793 }
4794
4795 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4796 struct kvm_lapic_state *s)
4797 {
4798 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
4799
4800 return kvm_apic_get_state(vcpu, s);
4801 }
4802
4803 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4804 struct kvm_lapic_state *s)
4805 {
4806 int r;
4807
4808 r = kvm_apic_set_state(vcpu, s);
4809 if (r)
4810 return r;
4811 update_cr8_intercept(vcpu);
4812
4813 return 0;
4814 }
4815
4816 static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4817 {
4818
4819
4820
4821
4822
4823
4824 if (kvm_cpu_has_extint(vcpu))
4825 return false;
4826
4827
4828 return (!lapic_in_kernel(vcpu) ||
4829 kvm_apic_accept_pic_intr(vcpu));
4830 }
4831
4832 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4833 {
4834
4835
4836
4837
4838
4839
4840
4841 return (kvm_arch_interrupt_allowed(vcpu) &&
4842 kvm_cpu_accept_dm_intr(vcpu) &&
4843 !kvm_event_needs_reinjection(vcpu) &&
4844 !vcpu->arch.exception.pending);
4845 }
4846
4847 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4848 struct kvm_interrupt *irq)
4849 {
4850 if (irq->irq >= KVM_NR_INTERRUPTS)
4851 return -EINVAL;
4852
4853 if (!irqchip_in_kernel(vcpu->kvm)) {
4854 kvm_queue_interrupt(vcpu, irq->irq, false);
4855 kvm_make_request(KVM_REQ_EVENT, vcpu);
4856 return 0;
4857 }
4858
4859
4860
4861
4862
4863 if (pic_in_kernel(vcpu->kvm))
4864 return -ENXIO;
4865
4866 if (vcpu->arch.pending_external_vector != -1)
4867 return -EEXIST;
4868
4869 vcpu->arch.pending_external_vector = irq->irq;
4870 kvm_make_request(KVM_REQ_EVENT, vcpu);
4871 return 0;
4872 }
4873
4874 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4875 {
4876 kvm_inject_nmi(vcpu);
4877
4878 return 0;
4879 }
4880
4881 static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4882 {
4883 kvm_make_request(KVM_REQ_SMI, vcpu);
4884
4885 return 0;
4886 }
4887
4888 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4889 struct kvm_tpr_access_ctl *tac)
4890 {
4891 if (tac->flags)
4892 return -EINVAL;
4893 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4894 return 0;
4895 }
4896
4897 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4898 u64 mcg_cap)
4899 {
4900 int r;
4901 unsigned bank_num = mcg_cap & 0xff, bank;
4902
4903 r = -EINVAL;
4904 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4905 goto out;
4906 if (mcg_cap & ~(kvm_caps.supported_mce_cap | 0xff | 0xff0000))
4907 goto out;
4908 r = 0;
4909 vcpu->arch.mcg_cap = mcg_cap;
4910
4911 if (mcg_cap & MCG_CTL_P)
4912 vcpu->arch.mcg_ctl = ~(u64)0;
4913
4914 for (bank = 0; bank < bank_num; bank++) {
4915 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4916 if (mcg_cap & MCG_CMCI_P)
4917 vcpu->arch.mci_ctl2_banks[bank] = 0;
4918 }
4919
4920 kvm_apic_after_set_mcg_cap(vcpu);
4921
4922 static_call(kvm_x86_setup_mce)(vcpu);
4923 out:
4924 return r;
4925 }
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936 static bool is_ucna(struct kvm_x86_mce *mce)
4937 {
4938 return !mce->mcg_status &&
4939 !(mce->status & (MCI_STATUS_PCC | MCI_STATUS_S | MCI_STATUS_AR)) &&
4940 (mce->status & MCI_STATUS_VAL) &&
4941 (mce->status & MCI_STATUS_UC);
4942 }
4943
4944 static int kvm_vcpu_x86_set_ucna(struct kvm_vcpu *vcpu, struct kvm_x86_mce *mce, u64* banks)
4945 {
4946 u64 mcg_cap = vcpu->arch.mcg_cap;
4947
4948 banks[1] = mce->status;
4949 banks[2] = mce->addr;
4950 banks[3] = mce->misc;
4951 vcpu->arch.mcg_status = mce->mcg_status;
4952
4953 if (!(mcg_cap & MCG_CMCI_P) ||
4954 !(vcpu->arch.mci_ctl2_banks[mce->bank] & MCI_CTL2_CMCI_EN))
4955 return 0;
4956
4957 if (lapic_in_kernel(vcpu))
4958 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTCMCI);
4959
4960 return 0;
4961 }
4962
4963 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4964 struct kvm_x86_mce *mce)
4965 {
4966 u64 mcg_cap = vcpu->arch.mcg_cap;
4967 unsigned bank_num = mcg_cap & 0xff;
4968 u64 *banks = vcpu->arch.mce_banks;
4969
4970 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4971 return -EINVAL;
4972
4973 banks += array_index_nospec(4 * mce->bank, 4 * bank_num);
4974
4975 if (is_ucna(mce))
4976 return kvm_vcpu_x86_set_ucna(vcpu, mce, banks);
4977
4978
4979
4980
4981
4982 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4983 vcpu->arch.mcg_ctl != ~(u64)0)
4984 return 0;
4985
4986
4987
4988
4989 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4990 return 0;
4991 if (mce->status & MCI_STATUS_UC) {
4992 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4993 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4994 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4995 return 0;
4996 }
4997 if (banks[1] & MCI_STATUS_VAL)
4998 mce->status |= MCI_STATUS_OVER;
4999 banks[2] = mce->addr;
5000 banks[3] = mce->misc;
5001 vcpu->arch.mcg_status = mce->mcg_status;
5002 banks[1] = mce->status;
5003 kvm_queue_exception(vcpu, MC_VECTOR);
5004 } else if (!(banks[1] & MCI_STATUS_VAL)
5005 || !(banks[1] & MCI_STATUS_UC)) {
5006 if (banks[1] & MCI_STATUS_VAL)
5007 mce->status |= MCI_STATUS_OVER;
5008 banks[2] = mce->addr;
5009 banks[3] = mce->misc;
5010 banks[1] = mce->status;
5011 } else
5012 banks[1] |= MCI_STATUS_OVER;
5013 return 0;
5014 }
5015
5016 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
5017 struct kvm_vcpu_events *events)
5018 {
5019 process_nmi(vcpu);
5020
5021 if (kvm_check_request(KVM_REQ_SMI, vcpu))
5022 process_smi(vcpu);
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035 if (!vcpu->kvm->arch.exception_payload_enabled &&
5036 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
5037 kvm_deliver_exception_payload(vcpu);
5038
5039
5040
5041
5042
5043
5044
5045 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
5046 events->exception.injected = 0;
5047 events->exception.pending = 0;
5048 } else {
5049 events->exception.injected = vcpu->arch.exception.injected;
5050 events->exception.pending = vcpu->arch.exception.pending;
5051
5052
5053
5054
5055
5056 if (!vcpu->kvm->arch.exception_payload_enabled)
5057 events->exception.injected |=
5058 vcpu->arch.exception.pending;
5059 }
5060 events->exception.nr = vcpu->arch.exception.nr;
5061 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
5062 events->exception.error_code = vcpu->arch.exception.error_code;
5063 events->exception_has_payload = vcpu->arch.exception.has_payload;
5064 events->exception_payload = vcpu->arch.exception.payload;
5065
5066 events->interrupt.injected =
5067 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
5068 events->interrupt.nr = vcpu->arch.interrupt.nr;
5069 events->interrupt.soft = 0;
5070 events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
5071
5072 events->nmi.injected = vcpu->arch.nmi_injected;
5073 events->nmi.pending = vcpu->arch.nmi_pending != 0;
5074 events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
5075 events->nmi.pad = 0;
5076
5077 events->sipi_vector = 0;
5078
5079 events->smi.smm = is_smm(vcpu);
5080 events->smi.pending = vcpu->arch.smi_pending;
5081 events->smi.smm_inside_nmi =
5082 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
5083 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
5084
5085 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
5086 | KVM_VCPUEVENT_VALID_SHADOW
5087 | KVM_VCPUEVENT_VALID_SMM);
5088 if (vcpu->kvm->arch.exception_payload_enabled)
5089 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
5090 if (vcpu->kvm->arch.triple_fault_event) {
5091 events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
5092 events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
5093 }
5094
5095 memset(&events->reserved, 0, sizeof(events->reserved));
5096 }
5097
5098 static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
5099
5100 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
5101 struct kvm_vcpu_events *events)
5102 {
5103 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
5104 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
5105 | KVM_VCPUEVENT_VALID_SHADOW
5106 | KVM_VCPUEVENT_VALID_SMM
5107 | KVM_VCPUEVENT_VALID_PAYLOAD
5108 | KVM_VCPUEVENT_VALID_TRIPLE_FAULT))
5109 return -EINVAL;
5110
5111 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
5112 if (!vcpu->kvm->arch.exception_payload_enabled)
5113 return -EINVAL;
5114 if (events->exception.pending)
5115 events->exception.injected = 0;
5116 else
5117 events->exception_has_payload = 0;
5118 } else {
5119 events->exception.pending = 0;
5120 events->exception_has_payload = 0;
5121 }
5122
5123 if ((events->exception.injected || events->exception.pending) &&
5124 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
5125 return -EINVAL;
5126
5127
5128 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
5129 (events->smi.smm || events->smi.pending) &&
5130 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
5131 return -EINVAL;
5132
5133 process_nmi(vcpu);
5134 vcpu->arch.exception.injected = events->exception.injected;
5135 vcpu->arch.exception.pending = events->exception.pending;
5136 vcpu->arch.exception.nr = events->exception.nr;
5137 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
5138 vcpu->arch.exception.error_code = events->exception.error_code;
5139 vcpu->arch.exception.has_payload = events->exception_has_payload;
5140 vcpu->arch.exception.payload = events->exception_payload;
5141
5142 vcpu->arch.interrupt.injected = events->interrupt.injected;
5143 vcpu->arch.interrupt.nr = events->interrupt.nr;
5144 vcpu->arch.interrupt.soft = events->interrupt.soft;
5145 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
5146 static_call(kvm_x86_set_interrupt_shadow)(vcpu,
5147 events->interrupt.shadow);
5148
5149 vcpu->arch.nmi_injected = events->nmi.injected;
5150 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
5151 vcpu->arch.nmi_pending = events->nmi.pending;
5152 static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
5153
5154 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
5155 lapic_in_kernel(vcpu))
5156 vcpu->arch.apic->sipi_vector = events->sipi_vector;
5157
5158 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
5159 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
5160 kvm_x86_ops.nested_ops->leave_nested(vcpu);
5161 kvm_smm_changed(vcpu, events->smi.smm);
5162 }
5163
5164 vcpu->arch.smi_pending = events->smi.pending;
5165
5166 if (events->smi.smm) {
5167 if (events->smi.smm_inside_nmi)
5168 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
5169 else
5170 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
5171 }
5172
5173 if (lapic_in_kernel(vcpu)) {
5174 if (events->smi.latched_init)
5175 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
5176 else
5177 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
5178 }
5179 }
5180
5181 if (events->flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) {
5182 if (!vcpu->kvm->arch.triple_fault_event)
5183 return -EINVAL;
5184 if (events->triple_fault.pending)
5185 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
5186 else
5187 kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
5188 }
5189
5190 kvm_make_request(KVM_REQ_EVENT, vcpu);
5191
5192 return 0;
5193 }
5194
5195 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
5196 struct kvm_debugregs *dbgregs)
5197 {
5198 unsigned long val;
5199
5200 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
5201 kvm_get_dr(vcpu, 6, &val);
5202 dbgregs->dr6 = val;
5203 dbgregs->dr7 = vcpu->arch.dr7;
5204 dbgregs->flags = 0;
5205 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
5206 }
5207
5208 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
5209 struct kvm_debugregs *dbgregs)
5210 {
5211 if (dbgregs->flags)
5212 return -EINVAL;
5213
5214 if (!kvm_dr6_valid(dbgregs->dr6))
5215 return -EINVAL;
5216 if (!kvm_dr7_valid(dbgregs->dr7))
5217 return -EINVAL;
5218
5219 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
5220 kvm_update_dr0123(vcpu);
5221 vcpu->arch.dr6 = dbgregs->dr6;
5222 vcpu->arch.dr7 = dbgregs->dr7;
5223 kvm_update_dr7(vcpu);
5224
5225 return 0;
5226 }
5227
5228 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
5229 struct kvm_xsave *guest_xsave)
5230 {
5231 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5232 return;
5233
5234 fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
5235 guest_xsave->region,
5236 sizeof(guest_xsave->region),
5237 vcpu->arch.pkru);
5238 }
5239
5240 static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
5241 u8 *state, unsigned int size)
5242 {
5243 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5244 return;
5245
5246 fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
5247 state, size, vcpu->arch.pkru);
5248 }
5249
5250 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
5251 struct kvm_xsave *guest_xsave)
5252 {
5253 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5254 return 0;
5255
5256 return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
5257 guest_xsave->region,
5258 kvm_caps.supported_xcr0,
5259 &vcpu->arch.pkru);
5260 }
5261
5262 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
5263 struct kvm_xcrs *guest_xcrs)
5264 {
5265 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
5266 guest_xcrs->nr_xcrs = 0;
5267 return;
5268 }
5269
5270 guest_xcrs->nr_xcrs = 1;
5271 guest_xcrs->flags = 0;
5272 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
5273 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
5274 }
5275
5276 static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
5277 struct kvm_xcrs *guest_xcrs)
5278 {
5279 int i, r = 0;
5280
5281 if (!boot_cpu_has(X86_FEATURE_XSAVE))
5282 return -EINVAL;
5283
5284 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
5285 return -EINVAL;
5286
5287 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
5288
5289 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
5290 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
5291 guest_xcrs->xcrs[i].value);
5292 break;
5293 }
5294 if (r)
5295 r = -EINVAL;
5296 return r;
5297 }
5298
5299
5300
5301
5302
5303
5304
5305 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
5306 {
5307 if (!vcpu->arch.pv_time.active)
5308 return -EINVAL;
5309 vcpu->arch.pvclock_set_guest_stopped_request = true;
5310 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5311 return 0;
5312 }
5313
5314 static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
5315 struct kvm_device_attr *attr)
5316 {
5317 int r;
5318
5319 switch (attr->attr) {
5320 case KVM_VCPU_TSC_OFFSET:
5321 r = 0;
5322 break;
5323 default:
5324 r = -ENXIO;
5325 }
5326
5327 return r;
5328 }
5329
5330 static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
5331 struct kvm_device_attr *attr)
5332 {
5333 u64 __user *uaddr = kvm_get_attr_addr(attr);
5334 int r;
5335
5336 if (IS_ERR(uaddr))
5337 return PTR_ERR(uaddr);
5338
5339 switch (attr->attr) {
5340 case KVM_VCPU_TSC_OFFSET:
5341 r = -EFAULT;
5342 if (put_user(vcpu->arch.l1_tsc_offset, uaddr))
5343 break;
5344 r = 0;
5345 break;
5346 default:
5347 r = -ENXIO;
5348 }
5349
5350 return r;
5351 }
5352
5353 static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
5354 struct kvm_device_attr *attr)
5355 {
5356 u64 __user *uaddr = kvm_get_attr_addr(attr);
5357 struct kvm *kvm = vcpu->kvm;
5358 int r;
5359
5360 if (IS_ERR(uaddr))
5361 return PTR_ERR(uaddr);
5362
5363 switch (attr->attr) {
5364 case KVM_VCPU_TSC_OFFSET: {
5365 u64 offset, tsc, ns;
5366 unsigned long flags;
5367 bool matched;
5368
5369 r = -EFAULT;
5370 if (get_user(offset, uaddr))
5371 break;
5372
5373 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
5374
5375 matched = (vcpu->arch.virtual_tsc_khz &&
5376 kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz &&
5377 kvm->arch.last_tsc_offset == offset);
5378
5379 tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
5380 ns = get_kvmclock_base_ns();
5381
5382 __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
5383 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
5384
5385 r = 0;
5386 break;
5387 }
5388 default:
5389 r = -ENXIO;
5390 }
5391
5392 return r;
5393 }
5394
5395 static int kvm_vcpu_ioctl_device_attr(struct kvm_vcpu *vcpu,
5396 unsigned int ioctl,
5397 void __user *argp)
5398 {
5399 struct kvm_device_attr attr;
5400 int r;
5401
5402 if (copy_from_user(&attr, argp, sizeof(attr)))
5403 return -EFAULT;
5404
5405 if (attr.group != KVM_VCPU_TSC_CTRL)
5406 return -ENXIO;
5407
5408 switch (ioctl) {
5409 case KVM_HAS_DEVICE_ATTR:
5410 r = kvm_arch_tsc_has_attr(vcpu, &attr);
5411 break;
5412 case KVM_GET_DEVICE_ATTR:
5413 r = kvm_arch_tsc_get_attr(vcpu, &attr);
5414 break;
5415 case KVM_SET_DEVICE_ATTR:
5416 r = kvm_arch_tsc_set_attr(vcpu, &attr);
5417 break;
5418 }
5419
5420 return r;
5421 }
5422
5423 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5424 struct kvm_enable_cap *cap)
5425 {
5426 int r;
5427 uint16_t vmcs_version;
5428 void __user *user_ptr;
5429
5430 if (cap->flags)
5431 return -EINVAL;
5432
5433 switch (cap->cap) {
5434 case KVM_CAP_HYPERV_SYNIC2:
5435 if (cap->args[0])
5436 return -EINVAL;
5437 fallthrough;
5438
5439 case KVM_CAP_HYPERV_SYNIC:
5440 if (!irqchip_in_kernel(vcpu->kvm))
5441 return -EINVAL;
5442 return kvm_hv_activate_synic(vcpu, cap->cap ==
5443 KVM_CAP_HYPERV_SYNIC2);
5444 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
5445 if (!kvm_x86_ops.nested_ops->enable_evmcs)
5446 return -ENOTTY;
5447 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
5448 if (!r) {
5449 user_ptr = (void __user *)(uintptr_t)cap->args[0];
5450 if (copy_to_user(user_ptr, &vmcs_version,
5451 sizeof(vmcs_version)))
5452 r = -EFAULT;
5453 }
5454 return r;
5455 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
5456 if (!kvm_x86_ops.enable_direct_tlbflush)
5457 return -ENOTTY;
5458
5459 return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
5460
5461 case KVM_CAP_HYPERV_ENFORCE_CPUID:
5462 return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
5463
5464 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
5465 vcpu->arch.pv_cpuid.enforce = cap->args[0];
5466 if (vcpu->arch.pv_cpuid.enforce)
5467 kvm_update_pv_runtime(vcpu);
5468
5469 return 0;
5470 default:
5471 return -EINVAL;
5472 }
5473 }
5474
5475 long kvm_arch_vcpu_ioctl(struct file *filp,
5476 unsigned int ioctl, unsigned long arg)
5477 {
5478 struct kvm_vcpu *vcpu = filp->private_data;
5479 void __user *argp = (void __user *)arg;
5480 int r;
5481 union {
5482 struct kvm_sregs2 *sregs2;
5483 struct kvm_lapic_state *lapic;
5484 struct kvm_xsave *xsave;
5485 struct kvm_xcrs *xcrs;
5486 void *buffer;
5487 } u;
5488
5489 vcpu_load(vcpu);
5490
5491 u.buffer = NULL;
5492 switch (ioctl) {
5493 case KVM_GET_LAPIC: {
5494 r = -EINVAL;
5495 if (!lapic_in_kernel(vcpu))
5496 goto out;
5497 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
5498 GFP_KERNEL_ACCOUNT);
5499
5500 r = -ENOMEM;
5501 if (!u.lapic)
5502 goto out;
5503 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
5504 if (r)
5505 goto out;
5506 r = -EFAULT;
5507 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
5508 goto out;
5509 r = 0;
5510 break;
5511 }
5512 case KVM_SET_LAPIC: {
5513 r = -EINVAL;
5514 if (!lapic_in_kernel(vcpu))
5515 goto out;
5516 u.lapic = memdup_user(argp, sizeof(*u.lapic));
5517 if (IS_ERR(u.lapic)) {
5518 r = PTR_ERR(u.lapic);
5519 goto out_nofree;
5520 }
5521
5522 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
5523 break;
5524 }
5525 case KVM_INTERRUPT: {
5526 struct kvm_interrupt irq;
5527
5528 r = -EFAULT;
5529 if (copy_from_user(&irq, argp, sizeof(irq)))
5530 goto out;
5531 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
5532 break;
5533 }
5534 case KVM_NMI: {
5535 r = kvm_vcpu_ioctl_nmi(vcpu);
5536 break;
5537 }
5538 case KVM_SMI: {
5539 r = kvm_vcpu_ioctl_smi(vcpu);
5540 break;
5541 }
5542 case KVM_SET_CPUID: {
5543 struct kvm_cpuid __user *cpuid_arg = argp;
5544 struct kvm_cpuid cpuid;
5545
5546 r = -EFAULT;
5547 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5548 goto out;
5549 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
5550 break;
5551 }
5552 case KVM_SET_CPUID2: {
5553 struct kvm_cpuid2 __user *cpuid_arg = argp;
5554 struct kvm_cpuid2 cpuid;
5555
5556 r = -EFAULT;
5557 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5558 goto out;
5559 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
5560 cpuid_arg->entries);
5561 break;
5562 }
5563 case KVM_GET_CPUID2: {
5564 struct kvm_cpuid2 __user *cpuid_arg = argp;
5565 struct kvm_cpuid2 cpuid;
5566
5567 r = -EFAULT;
5568 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5569 goto out;
5570 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
5571 cpuid_arg->entries);
5572 if (r)
5573 goto out;
5574 r = -EFAULT;
5575 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
5576 goto out;
5577 r = 0;
5578 break;
5579 }
5580 case KVM_GET_MSRS: {
5581 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5582 r = msr_io(vcpu, argp, do_get_msr, 1);
5583 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5584 break;
5585 }
5586 case KVM_SET_MSRS: {
5587 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5588 r = msr_io(vcpu, argp, do_set_msr, 0);
5589 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5590 break;
5591 }
5592 case KVM_TPR_ACCESS_REPORTING: {
5593 struct kvm_tpr_access_ctl tac;
5594
5595 r = -EFAULT;
5596 if (copy_from_user(&tac, argp, sizeof(tac)))
5597 goto out;
5598 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
5599 if (r)
5600 goto out;
5601 r = -EFAULT;
5602 if (copy_to_user(argp, &tac, sizeof(tac)))
5603 goto out;
5604 r = 0;
5605 break;
5606 };
5607 case KVM_SET_VAPIC_ADDR: {
5608 struct kvm_vapic_addr va;
5609 int idx;
5610
5611 r = -EINVAL;
5612 if (!lapic_in_kernel(vcpu))
5613 goto out;
5614 r = -EFAULT;
5615 if (copy_from_user(&va, argp, sizeof(va)))
5616 goto out;
5617 idx = srcu_read_lock(&vcpu->kvm->srcu);
5618 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
5619 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5620 break;
5621 }
5622 case KVM_X86_SETUP_MCE: {
5623 u64 mcg_cap;
5624
5625 r = -EFAULT;
5626 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
5627 goto out;
5628 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
5629 break;
5630 }
5631 case KVM_X86_SET_MCE: {
5632 struct kvm_x86_mce mce;
5633
5634 r = -EFAULT;
5635 if (copy_from_user(&mce, argp, sizeof(mce)))
5636 goto out;
5637 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
5638 break;
5639 }
5640 case KVM_GET_VCPU_EVENTS: {
5641 struct kvm_vcpu_events events;
5642
5643 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
5644
5645 r = -EFAULT;
5646 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
5647 break;
5648 r = 0;
5649 break;
5650 }
5651 case KVM_SET_VCPU_EVENTS: {
5652 struct kvm_vcpu_events events;
5653
5654 r = -EFAULT;
5655 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
5656 break;
5657
5658 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
5659 break;
5660 }
5661 case KVM_GET_DEBUGREGS: {
5662 struct kvm_debugregs dbgregs;
5663
5664 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
5665
5666 r = -EFAULT;
5667 if (copy_to_user(argp, &dbgregs,
5668 sizeof(struct kvm_debugregs)))
5669 break;
5670 r = 0;
5671 break;
5672 }
5673 case KVM_SET_DEBUGREGS: {
5674 struct kvm_debugregs dbgregs;
5675
5676 r = -EFAULT;
5677 if (copy_from_user(&dbgregs, argp,
5678 sizeof(struct kvm_debugregs)))
5679 break;
5680
5681 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
5682 break;
5683 }
5684 case KVM_GET_XSAVE: {
5685 r = -EINVAL;
5686 if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
5687 break;
5688
5689 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
5690 r = -ENOMEM;
5691 if (!u.xsave)
5692 break;
5693
5694 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
5695
5696 r = -EFAULT;
5697 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
5698 break;
5699 r = 0;
5700 break;
5701 }
5702 case KVM_SET_XSAVE: {
5703 int size = vcpu->arch.guest_fpu.uabi_size;
5704
5705 u.xsave = memdup_user(argp, size);
5706 if (IS_ERR(u.xsave)) {
5707 r = PTR_ERR(u.xsave);
5708 goto out_nofree;
5709 }
5710
5711 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
5712 break;
5713 }
5714
5715 case KVM_GET_XSAVE2: {
5716 int size = vcpu->arch.guest_fpu.uabi_size;
5717
5718 u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
5719 r = -ENOMEM;
5720 if (!u.xsave)
5721 break;
5722
5723 kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
5724
5725 r = -EFAULT;
5726 if (copy_to_user(argp, u.xsave, size))
5727 break;
5728
5729 r = 0;
5730 break;
5731 }
5732
5733 case KVM_GET_XCRS: {
5734 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
5735 r = -ENOMEM;
5736 if (!u.xcrs)
5737 break;
5738
5739 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
5740
5741 r = -EFAULT;
5742 if (copy_to_user(argp, u.xcrs,
5743 sizeof(struct kvm_xcrs)))
5744 break;
5745 r = 0;
5746 break;
5747 }
5748 case KVM_SET_XCRS: {
5749 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
5750 if (IS_ERR(u.xcrs)) {
5751 r = PTR_ERR(u.xcrs);
5752 goto out_nofree;
5753 }
5754
5755 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
5756 break;
5757 }
5758 case KVM_SET_TSC_KHZ: {
5759 u32 user_tsc_khz;
5760
5761 r = -EINVAL;
5762 user_tsc_khz = (u32)arg;
5763
5764 if (kvm_caps.has_tsc_control &&
5765 user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
5766 goto out;
5767
5768 if (user_tsc_khz == 0)
5769 user_tsc_khz = tsc_khz;
5770
5771 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
5772 r = 0;
5773
5774 goto out;
5775 }
5776 case KVM_GET_TSC_KHZ: {
5777 r = vcpu->arch.virtual_tsc_khz;
5778 goto out;
5779 }
5780 case KVM_KVMCLOCK_CTRL: {
5781 r = kvm_set_guest_paused(vcpu);
5782 goto out;
5783 }
5784 case KVM_ENABLE_CAP: {
5785 struct kvm_enable_cap cap;
5786
5787 r = -EFAULT;
5788 if (copy_from_user(&cap, argp, sizeof(cap)))
5789 goto out;
5790 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5791 break;
5792 }
5793 case KVM_GET_NESTED_STATE: {
5794 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5795 u32 user_data_size;
5796
5797 r = -EINVAL;
5798 if (!kvm_x86_ops.nested_ops->get_state)
5799 break;
5800
5801 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
5802 r = -EFAULT;
5803 if (get_user(user_data_size, &user_kvm_nested_state->size))
5804 break;
5805
5806 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
5807 user_data_size);
5808 if (r < 0)
5809 break;
5810
5811 if (r > user_data_size) {
5812 if (put_user(r, &user_kvm_nested_state->size))
5813 r = -EFAULT;
5814 else
5815 r = -E2BIG;
5816 break;
5817 }
5818
5819 r = 0;
5820 break;
5821 }
5822 case KVM_SET_NESTED_STATE: {
5823 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5824 struct kvm_nested_state kvm_state;
5825 int idx;
5826
5827 r = -EINVAL;
5828 if (!kvm_x86_ops.nested_ops->set_state)
5829 break;
5830
5831 r = -EFAULT;
5832 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
5833 break;
5834
5835 r = -EINVAL;
5836 if (kvm_state.size < sizeof(kvm_state))
5837 break;
5838
5839 if (kvm_state.flags &
5840 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
5841 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
5842 | KVM_STATE_NESTED_GIF_SET))
5843 break;
5844
5845
5846 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
5847 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
5848 break;
5849
5850 idx = srcu_read_lock(&vcpu->kvm->srcu);
5851 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
5852 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5853 break;
5854 }
5855 case KVM_GET_SUPPORTED_HV_CPUID:
5856 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
5857 break;
5858 #ifdef CONFIG_KVM_XEN
5859 case KVM_XEN_VCPU_GET_ATTR: {
5860 struct kvm_xen_vcpu_attr xva;
5861
5862 r = -EFAULT;
5863 if (copy_from_user(&xva, argp, sizeof(xva)))
5864 goto out;
5865 r = kvm_xen_vcpu_get_attr(vcpu, &xva);
5866 if (!r && copy_to_user(argp, &xva, sizeof(xva)))
5867 r = -EFAULT;
5868 break;
5869 }
5870 case KVM_XEN_VCPU_SET_ATTR: {
5871 struct kvm_xen_vcpu_attr xva;
5872
5873 r = -EFAULT;
5874 if (copy_from_user(&xva, argp, sizeof(xva)))
5875 goto out;
5876 r = kvm_xen_vcpu_set_attr(vcpu, &xva);
5877 break;
5878 }
5879 #endif
5880 case KVM_GET_SREGS2: {
5881 u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
5882 r = -ENOMEM;
5883 if (!u.sregs2)
5884 goto out;
5885 __get_sregs2(vcpu, u.sregs2);
5886 r = -EFAULT;
5887 if (copy_to_user(argp, u.sregs2, sizeof(struct kvm_sregs2)))
5888 goto out;
5889 r = 0;
5890 break;
5891 }
5892 case KVM_SET_SREGS2: {
5893 u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
5894 if (IS_ERR(u.sregs2)) {
5895 r = PTR_ERR(u.sregs2);
5896 u.sregs2 = NULL;
5897 goto out;
5898 }
5899 r = __set_sregs2(vcpu, u.sregs2);
5900 break;
5901 }
5902 case KVM_HAS_DEVICE_ATTR:
5903 case KVM_GET_DEVICE_ATTR:
5904 case KVM_SET_DEVICE_ATTR:
5905 r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
5906 break;
5907 default:
5908 r = -EINVAL;
5909 }
5910 out:
5911 kfree(u.buffer);
5912 out_nofree:
5913 vcpu_put(vcpu);
5914 return r;
5915 }
5916
5917 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5918 {
5919 return VM_FAULT_SIGBUS;
5920 }
5921
5922 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
5923 {
5924 int ret;
5925
5926 if (addr > (unsigned int)(-3 * PAGE_SIZE))
5927 return -EINVAL;
5928 ret = static_call(kvm_x86_set_tss_addr)(kvm, addr);
5929 return ret;
5930 }
5931
5932 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
5933 u64 ident_addr)
5934 {
5935 return static_call(kvm_x86_set_identity_map_addr)(kvm, ident_addr);
5936 }
5937
5938 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
5939 unsigned long kvm_nr_mmu_pages)
5940 {
5941 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
5942 return -EINVAL;
5943
5944 mutex_lock(&kvm->slots_lock);
5945
5946 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
5947 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
5948
5949 mutex_unlock(&kvm->slots_lock);
5950 return 0;
5951 }
5952
5953 static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
5954 {
5955 return kvm->arch.n_max_mmu_pages;
5956 }
5957
5958 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5959 {
5960 struct kvm_pic *pic = kvm->arch.vpic;
5961 int r;
5962
5963 r = 0;
5964 switch (chip->chip_id) {
5965 case KVM_IRQCHIP_PIC_MASTER:
5966 memcpy(&chip->chip.pic, &pic->pics[0],
5967 sizeof(struct kvm_pic_state));
5968 break;
5969 case KVM_IRQCHIP_PIC_SLAVE:
5970 memcpy(&chip->chip.pic, &pic->pics[1],
5971 sizeof(struct kvm_pic_state));
5972 break;
5973 case KVM_IRQCHIP_IOAPIC:
5974 kvm_get_ioapic(kvm, &chip->chip.ioapic);
5975 break;
5976 default:
5977 r = -EINVAL;
5978 break;
5979 }
5980 return r;
5981 }
5982
5983 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5984 {
5985 struct kvm_pic *pic = kvm->arch.vpic;
5986 int r;
5987
5988 r = 0;
5989 switch (chip->chip_id) {
5990 case KVM_IRQCHIP_PIC_MASTER:
5991 spin_lock(&pic->lock);
5992 memcpy(&pic->pics[0], &chip->chip.pic,
5993 sizeof(struct kvm_pic_state));
5994 spin_unlock(&pic->lock);
5995 break;
5996 case KVM_IRQCHIP_PIC_SLAVE:
5997 spin_lock(&pic->lock);
5998 memcpy(&pic->pics[1], &chip->chip.pic,
5999 sizeof(struct kvm_pic_state));
6000 spin_unlock(&pic->lock);
6001 break;
6002 case KVM_IRQCHIP_IOAPIC:
6003 kvm_set_ioapic(kvm, &chip->chip.ioapic);
6004 break;
6005 default:
6006 r = -EINVAL;
6007 break;
6008 }
6009 kvm_pic_update_irq(pic);
6010 return r;
6011 }
6012
6013 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
6014 {
6015 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
6016
6017 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
6018
6019 mutex_lock(&kps->lock);
6020 memcpy(ps, &kps->channels, sizeof(*ps));
6021 mutex_unlock(&kps->lock);
6022 return 0;
6023 }
6024
6025 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
6026 {
6027 int i;
6028 struct kvm_pit *pit = kvm->arch.vpit;
6029
6030 mutex_lock(&pit->pit_state.lock);
6031 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
6032 for (i = 0; i < 3; i++)
6033 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
6034 mutex_unlock(&pit->pit_state.lock);
6035 return 0;
6036 }
6037
6038 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
6039 {
6040 mutex_lock(&kvm->arch.vpit->pit_state.lock);
6041 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
6042 sizeof(ps->channels));
6043 ps->flags = kvm->arch.vpit->pit_state.flags;
6044 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
6045 memset(&ps->reserved, 0, sizeof(ps->reserved));
6046 return 0;
6047 }
6048
6049 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
6050 {
6051 int start = 0;
6052 int i;
6053 u32 prev_legacy, cur_legacy;
6054 struct kvm_pit *pit = kvm->arch.vpit;
6055
6056 mutex_lock(&pit->pit_state.lock);
6057 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
6058 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
6059 if (!prev_legacy && cur_legacy)
6060 start = 1;
6061 memcpy(&pit->pit_state.channels, &ps->channels,
6062 sizeof(pit->pit_state.channels));
6063 pit->pit_state.flags = ps->flags;
6064 for (i = 0; i < 3; i++)
6065 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
6066 start && i == 0);
6067 mutex_unlock(&pit->pit_state.lock);
6068 return 0;
6069 }
6070
6071 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
6072 struct kvm_reinject_control *control)
6073 {
6074 struct kvm_pit *pit = kvm->arch.vpit;
6075
6076
6077
6078
6079
6080 mutex_lock(&pit->pit_state.lock);
6081 kvm_pit_set_reinject(pit, control->pit_reinject);
6082 mutex_unlock(&pit->pit_state.lock);
6083
6084 return 0;
6085 }
6086
6087 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
6088 {
6089
6090
6091
6092
6093
6094
6095
6096 struct kvm_vcpu *vcpu;
6097 unsigned long i;
6098
6099 kvm_for_each_vcpu(i, vcpu, kvm)
6100 kvm_vcpu_kick(vcpu);
6101 }
6102
6103 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
6104 bool line_status)
6105 {
6106 if (!irqchip_in_kernel(kvm))
6107 return -ENXIO;
6108
6109 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
6110 irq_event->irq, irq_event->level,
6111 line_status);
6112 return 0;
6113 }
6114
6115 int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
6116 struct kvm_enable_cap *cap)
6117 {
6118 int r;
6119
6120 if (cap->flags)
6121 return -EINVAL;
6122
6123 switch (cap->cap) {
6124 case KVM_CAP_DISABLE_QUIRKS2:
6125 r = -EINVAL;
6126 if (cap->args[0] & ~KVM_X86_VALID_QUIRKS)
6127 break;
6128 fallthrough;
6129 case KVM_CAP_DISABLE_QUIRKS:
6130 kvm->arch.disabled_quirks = cap->args[0];
6131 r = 0;
6132 break;
6133 case KVM_CAP_SPLIT_IRQCHIP: {
6134 mutex_lock(&kvm->lock);
6135 r = -EINVAL;
6136 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
6137 goto split_irqchip_unlock;
6138 r = -EEXIST;
6139 if (irqchip_in_kernel(kvm))
6140 goto split_irqchip_unlock;
6141 if (kvm->created_vcpus)
6142 goto split_irqchip_unlock;
6143 r = kvm_setup_empty_irq_routing(kvm);
6144 if (r)
6145 goto split_irqchip_unlock;
6146
6147 smp_wmb();
6148 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
6149 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
6150 kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT);
6151 r = 0;
6152 split_irqchip_unlock:
6153 mutex_unlock(&kvm->lock);
6154 break;
6155 }
6156 case KVM_CAP_X2APIC_API:
6157 r = -EINVAL;
6158 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
6159 break;
6160
6161 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
6162 kvm->arch.x2apic_format = true;
6163 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
6164 kvm->arch.x2apic_broadcast_quirk_disabled = true;
6165
6166 r = 0;
6167 break;
6168 case KVM_CAP_X86_DISABLE_EXITS:
6169 r = -EINVAL;
6170 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
6171 break;
6172
6173 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
6174 kvm_can_mwait_in_guest())
6175 kvm->arch.mwait_in_guest = true;
6176 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
6177 kvm->arch.hlt_in_guest = true;
6178 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
6179 kvm->arch.pause_in_guest = true;
6180 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
6181 kvm->arch.cstate_in_guest = true;
6182 r = 0;
6183 break;
6184 case KVM_CAP_MSR_PLATFORM_INFO:
6185 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
6186 r = 0;
6187 break;
6188 case KVM_CAP_EXCEPTION_PAYLOAD:
6189 kvm->arch.exception_payload_enabled = cap->args[0];
6190 r = 0;
6191 break;
6192 case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
6193 kvm->arch.triple_fault_event = cap->args[0];
6194 r = 0;
6195 break;
6196 case KVM_CAP_X86_USER_SPACE_MSR:
6197 r = -EINVAL;
6198 if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
6199 KVM_MSR_EXIT_REASON_UNKNOWN |
6200 KVM_MSR_EXIT_REASON_FILTER))
6201 break;
6202 kvm->arch.user_space_msr_mask = cap->args[0];
6203 r = 0;
6204 break;
6205 case KVM_CAP_X86_BUS_LOCK_EXIT:
6206 r = -EINVAL;
6207 if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE)
6208 break;
6209
6210 if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) &&
6211 (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
6212 break;
6213
6214 if (kvm_caps.has_bus_lock_exit &&
6215 cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
6216 kvm->arch.bus_lock_detection_enabled = true;
6217 r = 0;
6218 break;
6219 #ifdef CONFIG_X86_SGX_KVM
6220 case KVM_CAP_SGX_ATTRIBUTE: {
6221 unsigned long allowed_attributes = 0;
6222
6223 r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
6224 if (r)
6225 break;
6226
6227
6228 if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
6229 !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
6230 kvm->arch.sgx_provisioning_allowed = true;
6231 else
6232 r = -EINVAL;
6233 break;
6234 }
6235 #endif
6236 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
6237 r = -EINVAL;
6238 if (!kvm_x86_ops.vm_copy_enc_context_from)
6239 break;
6240
6241 r = static_call(kvm_x86_vm_copy_enc_context_from)(kvm, cap->args[0]);
6242 break;
6243 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
6244 r = -EINVAL;
6245 if (!kvm_x86_ops.vm_move_enc_context_from)
6246 break;
6247
6248 r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, cap->args[0]);
6249 break;
6250 case KVM_CAP_EXIT_HYPERCALL:
6251 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
6252 r = -EINVAL;
6253 break;
6254 }
6255 kvm->arch.hypercall_exit_enabled = cap->args[0];
6256 r = 0;
6257 break;
6258 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
6259 r = -EINVAL;
6260 if (cap->args[0] & ~1)
6261 break;
6262 kvm->arch.exit_on_emulation_error = cap->args[0];
6263 r = 0;
6264 break;
6265 case KVM_CAP_PMU_CAPABILITY:
6266 r = -EINVAL;
6267 if (!enable_pmu || (cap->args[0] & ~KVM_CAP_PMU_VALID_MASK))
6268 break;
6269
6270 mutex_lock(&kvm->lock);
6271 if (!kvm->created_vcpus) {
6272 kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
6273 r = 0;
6274 }
6275 mutex_unlock(&kvm->lock);
6276 break;
6277 case KVM_CAP_MAX_VCPU_ID:
6278 r = -EINVAL;
6279 if (cap->args[0] > KVM_MAX_VCPU_IDS)
6280 break;
6281
6282 mutex_lock(&kvm->lock);
6283 if (kvm->arch.max_vcpu_ids == cap->args[0]) {
6284 r = 0;
6285 } else if (!kvm->arch.max_vcpu_ids) {
6286 kvm->arch.max_vcpu_ids = cap->args[0];
6287 r = 0;
6288 }
6289 mutex_unlock(&kvm->lock);
6290 break;
6291 case KVM_CAP_X86_NOTIFY_VMEXIT:
6292 r = -EINVAL;
6293 if ((u32)cap->args[0] & ~KVM_X86_NOTIFY_VMEXIT_VALID_BITS)
6294 break;
6295 if (!kvm_caps.has_notify_vmexit)
6296 break;
6297 if (!((u32)cap->args[0] & KVM_X86_NOTIFY_VMEXIT_ENABLED))
6298 break;
6299 mutex_lock(&kvm->lock);
6300 if (!kvm->created_vcpus) {
6301 kvm->arch.notify_window = cap->args[0] >> 32;
6302 kvm->arch.notify_vmexit_flags = (u32)cap->args[0];
6303 r = 0;
6304 }
6305 mutex_unlock(&kvm->lock);
6306 break;
6307 case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
6308 r = -EINVAL;
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321 if (!capable(CAP_SYS_BOOT)) {
6322 r = -EPERM;
6323 break;
6324 }
6325
6326 if (cap->args[0])
6327 break;
6328
6329 mutex_lock(&kvm->lock);
6330 if (!kvm->created_vcpus) {
6331 kvm->arch.disable_nx_huge_pages = true;
6332 r = 0;
6333 }
6334 mutex_unlock(&kvm->lock);
6335 break;
6336 default:
6337 r = -EINVAL;
6338 break;
6339 }
6340 return r;
6341 }
6342
6343 static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
6344 {
6345 struct kvm_x86_msr_filter *msr_filter;
6346
6347 msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
6348 if (!msr_filter)
6349 return NULL;
6350
6351 msr_filter->default_allow = default_allow;
6352 return msr_filter;
6353 }
6354
6355 static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
6356 {
6357 u32 i;
6358
6359 if (!msr_filter)
6360 return;
6361
6362 for (i = 0; i < msr_filter->count; i++)
6363 kfree(msr_filter->ranges[i].bitmap);
6364
6365 kfree(msr_filter);
6366 }
6367
6368 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
6369 struct kvm_msr_filter_range *user_range)
6370 {
6371 unsigned long *bitmap = NULL;
6372 size_t bitmap_size;
6373
6374 if (!user_range->nmsrs)
6375 return 0;
6376
6377 if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
6378 return -EINVAL;
6379
6380 if (!user_range->flags)
6381 return -EINVAL;
6382
6383 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
6384 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
6385 return -EINVAL;
6386
6387 bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
6388 if (IS_ERR(bitmap))
6389 return PTR_ERR(bitmap);
6390
6391 msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
6392 .flags = user_range->flags,
6393 .base = user_range->base,
6394 .nmsrs = user_range->nmsrs,
6395 .bitmap = bitmap,
6396 };
6397
6398 msr_filter->count++;
6399 return 0;
6400 }
6401
6402 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
6403 {
6404 struct kvm_msr_filter __user *user_msr_filter = argp;
6405 struct kvm_x86_msr_filter *new_filter, *old_filter;
6406 struct kvm_msr_filter filter;
6407 bool default_allow;
6408 bool empty = true;
6409 int r = 0;
6410 u32 i;
6411
6412 if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
6413 return -EFAULT;
6414
6415 if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
6416 return -EINVAL;
6417
6418 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
6419 empty &= !filter.ranges[i].nmsrs;
6420
6421 default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
6422 if (empty && !default_allow)
6423 return -EINVAL;
6424
6425 new_filter = kvm_alloc_msr_filter(default_allow);
6426 if (!new_filter)
6427 return -ENOMEM;
6428
6429 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
6430 r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
6431 if (r) {
6432 kvm_free_msr_filter(new_filter);
6433 return r;
6434 }
6435 }
6436
6437 mutex_lock(&kvm->lock);
6438
6439
6440 old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
6441
6442 rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
6443 synchronize_srcu(&kvm->srcu);
6444
6445 kvm_free_msr_filter(old_filter);
6446
6447 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
6448 mutex_unlock(&kvm->lock);
6449
6450 return 0;
6451 }
6452
6453 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
6454 static int kvm_arch_suspend_notifier(struct kvm *kvm)
6455 {
6456 struct kvm_vcpu *vcpu;
6457 unsigned long i;
6458 int ret = 0;
6459
6460 mutex_lock(&kvm->lock);
6461 kvm_for_each_vcpu(i, vcpu, kvm) {
6462 if (!vcpu->arch.pv_time.active)
6463 continue;
6464
6465 ret = kvm_set_guest_paused(vcpu);
6466 if (ret) {
6467 kvm_err("Failed to pause guest VCPU%d: %d\n",
6468 vcpu->vcpu_id, ret);
6469 break;
6470 }
6471 }
6472 mutex_unlock(&kvm->lock);
6473
6474 return ret ? NOTIFY_BAD : NOTIFY_DONE;
6475 }
6476
6477 int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
6478 {
6479 switch (state) {
6480 case PM_HIBERNATION_PREPARE:
6481 case PM_SUSPEND_PREPARE:
6482 return kvm_arch_suspend_notifier(kvm);
6483 }
6484
6485 return NOTIFY_DONE;
6486 }
6487 #endif
6488
6489 static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
6490 {
6491 struct kvm_clock_data data = { 0 };
6492
6493 get_kvmclock(kvm, &data);
6494 if (copy_to_user(argp, &data, sizeof(data)))
6495 return -EFAULT;
6496
6497 return 0;
6498 }
6499
6500 static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
6501 {
6502 struct kvm_arch *ka = &kvm->arch;
6503 struct kvm_clock_data data;
6504 u64 now_raw_ns;
6505
6506 if (copy_from_user(&data, argp, sizeof(data)))
6507 return -EFAULT;
6508
6509
6510
6511
6512
6513 if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
6514 return -EINVAL;
6515
6516 kvm_hv_request_tsc_page_update(kvm);
6517 kvm_start_pvclock_update(kvm);
6518 pvclock_update_vm_gtod_copy(kvm);
6519
6520
6521
6522
6523
6524
6525
6526
6527 if (data.flags & KVM_CLOCK_REALTIME) {
6528 u64 now_real_ns = ktime_get_real_ns();
6529
6530
6531
6532
6533 if (now_real_ns > data.realtime)
6534 data.clock += now_real_ns - data.realtime;
6535 }
6536
6537 if (ka->use_master_clock)
6538 now_raw_ns = ka->master_kernel_ns;
6539 else
6540 now_raw_ns = get_kvmclock_base_ns();
6541 ka->kvmclock_offset = data.clock - now_raw_ns;
6542 kvm_end_pvclock_update(kvm);
6543 return 0;
6544 }
6545
6546 long kvm_arch_vm_ioctl(struct file *filp,
6547 unsigned int ioctl, unsigned long arg)
6548 {
6549 struct kvm *kvm = filp->private_data;
6550 void __user *argp = (void __user *)arg;
6551 int r = -ENOTTY;
6552
6553
6554
6555
6556
6557 union {
6558 struct kvm_pit_state ps;
6559 struct kvm_pit_state2 ps2;
6560 struct kvm_pit_config pit_config;
6561 } u;
6562
6563 switch (ioctl) {
6564 case KVM_SET_TSS_ADDR:
6565 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
6566 break;
6567 case KVM_SET_IDENTITY_MAP_ADDR: {
6568 u64 ident_addr;
6569
6570 mutex_lock(&kvm->lock);
6571 r = -EINVAL;
6572 if (kvm->created_vcpus)
6573 goto set_identity_unlock;
6574 r = -EFAULT;
6575 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
6576 goto set_identity_unlock;
6577 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
6578 set_identity_unlock:
6579 mutex_unlock(&kvm->lock);
6580 break;
6581 }
6582 case KVM_SET_NR_MMU_PAGES:
6583 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
6584 break;
6585 case KVM_GET_NR_MMU_PAGES:
6586 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
6587 break;
6588 case KVM_CREATE_IRQCHIP: {
6589 mutex_lock(&kvm->lock);
6590
6591 r = -EEXIST;
6592 if (irqchip_in_kernel(kvm))
6593 goto create_irqchip_unlock;
6594
6595 r = -EINVAL;
6596 if (kvm->created_vcpus)
6597 goto create_irqchip_unlock;
6598
6599 r = kvm_pic_init(kvm);
6600 if (r)
6601 goto create_irqchip_unlock;
6602
6603 r = kvm_ioapic_init(kvm);
6604 if (r) {
6605 kvm_pic_destroy(kvm);
6606 goto create_irqchip_unlock;
6607 }
6608
6609 r = kvm_setup_default_irq_routing(kvm);
6610 if (r) {
6611 kvm_ioapic_destroy(kvm);
6612 kvm_pic_destroy(kvm);
6613 goto create_irqchip_unlock;
6614 }
6615
6616 smp_wmb();
6617 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
6618 kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT);
6619 create_irqchip_unlock:
6620 mutex_unlock(&kvm->lock);
6621 break;
6622 }
6623 case KVM_CREATE_PIT:
6624 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
6625 goto create_pit;
6626 case KVM_CREATE_PIT2:
6627 r = -EFAULT;
6628 if (copy_from_user(&u.pit_config, argp,
6629 sizeof(struct kvm_pit_config)))
6630 goto out;
6631 create_pit:
6632 mutex_lock(&kvm->lock);
6633 r = -EEXIST;
6634 if (kvm->arch.vpit)
6635 goto create_pit_unlock;
6636 r = -ENOMEM;
6637 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
6638 if (kvm->arch.vpit)
6639 r = 0;
6640 create_pit_unlock:
6641 mutex_unlock(&kvm->lock);
6642 break;
6643 case KVM_GET_IRQCHIP: {
6644
6645 struct kvm_irqchip *chip;
6646
6647 chip = memdup_user(argp, sizeof(*chip));
6648 if (IS_ERR(chip)) {
6649 r = PTR_ERR(chip);
6650 goto out;
6651 }
6652
6653 r = -ENXIO;
6654 if (!irqchip_kernel(kvm))
6655 goto get_irqchip_out;
6656 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
6657 if (r)
6658 goto get_irqchip_out;
6659 r = -EFAULT;
6660 if (copy_to_user(argp, chip, sizeof(*chip)))
6661 goto get_irqchip_out;
6662 r = 0;
6663 get_irqchip_out:
6664 kfree(chip);
6665 break;
6666 }
6667 case KVM_SET_IRQCHIP: {
6668
6669 struct kvm_irqchip *chip;
6670
6671 chip = memdup_user(argp, sizeof(*chip));
6672 if (IS_ERR(chip)) {
6673 r = PTR_ERR(chip);
6674 goto out;
6675 }
6676
6677 r = -ENXIO;
6678 if (!irqchip_kernel(kvm))
6679 goto set_irqchip_out;
6680 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
6681 set_irqchip_out:
6682 kfree(chip);
6683 break;
6684 }
6685 case KVM_GET_PIT: {
6686 r = -EFAULT;
6687 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
6688 goto out;
6689 r = -ENXIO;
6690 if (!kvm->arch.vpit)
6691 goto out;
6692 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
6693 if (r)
6694 goto out;
6695 r = -EFAULT;
6696 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
6697 goto out;
6698 r = 0;
6699 break;
6700 }
6701 case KVM_SET_PIT: {
6702 r = -EFAULT;
6703 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
6704 goto out;
6705 mutex_lock(&kvm->lock);
6706 r = -ENXIO;
6707 if (!kvm->arch.vpit)
6708 goto set_pit_out;
6709 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
6710 set_pit_out:
6711 mutex_unlock(&kvm->lock);
6712 break;
6713 }
6714 case KVM_GET_PIT2: {
6715 r = -ENXIO;
6716 if (!kvm->arch.vpit)
6717 goto out;
6718 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
6719 if (r)
6720 goto out;
6721 r = -EFAULT;
6722 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
6723 goto out;
6724 r = 0;
6725 break;
6726 }
6727 case KVM_SET_PIT2: {
6728 r = -EFAULT;
6729 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
6730 goto out;
6731 mutex_lock(&kvm->lock);
6732 r = -ENXIO;
6733 if (!kvm->arch.vpit)
6734 goto set_pit2_out;
6735 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
6736 set_pit2_out:
6737 mutex_unlock(&kvm->lock);
6738 break;
6739 }
6740 case KVM_REINJECT_CONTROL: {
6741 struct kvm_reinject_control control;
6742 r = -EFAULT;
6743 if (copy_from_user(&control, argp, sizeof(control)))
6744 goto out;
6745 r = -ENXIO;
6746 if (!kvm->arch.vpit)
6747 goto out;
6748 r = kvm_vm_ioctl_reinject(kvm, &control);
6749 break;
6750 }
6751 case KVM_SET_BOOT_CPU_ID:
6752 r = 0;
6753 mutex_lock(&kvm->lock);
6754 if (kvm->created_vcpus)
6755 r = -EBUSY;
6756 else
6757 kvm->arch.bsp_vcpu_id = arg;
6758 mutex_unlock(&kvm->lock);
6759 break;
6760 #ifdef CONFIG_KVM_XEN
6761 case KVM_XEN_HVM_CONFIG: {
6762 struct kvm_xen_hvm_config xhc;
6763 r = -EFAULT;
6764 if (copy_from_user(&xhc, argp, sizeof(xhc)))
6765 goto out;
6766 r = kvm_xen_hvm_config(kvm, &xhc);
6767 break;
6768 }
6769 case KVM_XEN_HVM_GET_ATTR: {
6770 struct kvm_xen_hvm_attr xha;
6771
6772 r = -EFAULT;
6773 if (copy_from_user(&xha, argp, sizeof(xha)))
6774 goto out;
6775 r = kvm_xen_hvm_get_attr(kvm, &xha);
6776 if (!r && copy_to_user(argp, &xha, sizeof(xha)))
6777 r = -EFAULT;
6778 break;
6779 }
6780 case KVM_XEN_HVM_SET_ATTR: {
6781 struct kvm_xen_hvm_attr xha;
6782
6783 r = -EFAULT;
6784 if (copy_from_user(&xha, argp, sizeof(xha)))
6785 goto out;
6786 r = kvm_xen_hvm_set_attr(kvm, &xha);
6787 break;
6788 }
6789 case KVM_XEN_HVM_EVTCHN_SEND: {
6790 struct kvm_irq_routing_xen_evtchn uxe;
6791
6792 r = -EFAULT;
6793 if (copy_from_user(&uxe, argp, sizeof(uxe)))
6794 goto out;
6795 r = kvm_xen_hvm_evtchn_send(kvm, &uxe);
6796 break;
6797 }
6798 #endif
6799 case KVM_SET_CLOCK:
6800 r = kvm_vm_ioctl_set_clock(kvm, argp);
6801 break;
6802 case KVM_GET_CLOCK:
6803 r = kvm_vm_ioctl_get_clock(kvm, argp);
6804 break;
6805 case KVM_SET_TSC_KHZ: {
6806 u32 user_tsc_khz;
6807
6808 r = -EINVAL;
6809 user_tsc_khz = (u32)arg;
6810
6811 if (kvm_caps.has_tsc_control &&
6812 user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
6813 goto out;
6814
6815 if (user_tsc_khz == 0)
6816 user_tsc_khz = tsc_khz;
6817
6818 WRITE_ONCE(kvm->arch.default_tsc_khz, user_tsc_khz);
6819 r = 0;
6820
6821 goto out;
6822 }
6823 case KVM_GET_TSC_KHZ: {
6824 r = READ_ONCE(kvm->arch.default_tsc_khz);
6825 goto out;
6826 }
6827 case KVM_MEMORY_ENCRYPT_OP: {
6828 r = -ENOTTY;
6829 if (!kvm_x86_ops.mem_enc_ioctl)
6830 goto out;
6831
6832 r = static_call(kvm_x86_mem_enc_ioctl)(kvm, argp);
6833 break;
6834 }
6835 case KVM_MEMORY_ENCRYPT_REG_REGION: {
6836 struct kvm_enc_region region;
6837
6838 r = -EFAULT;
6839 if (copy_from_user(®ion, argp, sizeof(region)))
6840 goto out;
6841
6842 r = -ENOTTY;
6843 if (!kvm_x86_ops.mem_enc_register_region)
6844 goto out;
6845
6846 r = static_call(kvm_x86_mem_enc_register_region)(kvm, ®ion);
6847 break;
6848 }
6849 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
6850 struct kvm_enc_region region;
6851
6852 r = -EFAULT;
6853 if (copy_from_user(®ion, argp, sizeof(region)))
6854 goto out;
6855
6856 r = -ENOTTY;
6857 if (!kvm_x86_ops.mem_enc_unregister_region)
6858 goto out;
6859
6860 r = static_call(kvm_x86_mem_enc_unregister_region)(kvm, ®ion);
6861 break;
6862 }
6863 case KVM_HYPERV_EVENTFD: {
6864 struct kvm_hyperv_eventfd hvevfd;
6865
6866 r = -EFAULT;
6867 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
6868 goto out;
6869 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
6870 break;
6871 }
6872 case KVM_SET_PMU_EVENT_FILTER:
6873 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
6874 break;
6875 case KVM_X86_SET_MSR_FILTER:
6876 r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
6877 break;
6878 default:
6879 r = -ENOTTY;
6880 }
6881 out:
6882 return r;
6883 }
6884
6885 static void kvm_init_msr_list(void)
6886 {
6887 u32 dummy[2];
6888 unsigned i;
6889
6890 BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
6891 "Please update the fixed PMCs in msrs_to_saved_all[]");
6892
6893 num_msrs_to_save = 0;
6894 num_emulated_msrs = 0;
6895 num_msr_based_features = 0;
6896
6897 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
6898 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
6899 continue;
6900
6901
6902
6903
6904
6905 switch (msrs_to_save_all[i]) {
6906 case MSR_IA32_BNDCFGS:
6907 if (!kvm_mpx_supported())
6908 continue;
6909 break;
6910 case MSR_TSC_AUX:
6911 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
6912 !kvm_cpu_cap_has(X86_FEATURE_RDPID))
6913 continue;
6914 break;
6915 case MSR_IA32_UMWAIT_CONTROL:
6916 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
6917 continue;
6918 break;
6919 case MSR_IA32_RTIT_CTL:
6920 case MSR_IA32_RTIT_STATUS:
6921 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
6922 continue;
6923 break;
6924 case MSR_IA32_RTIT_CR3_MATCH:
6925 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6926 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
6927 continue;
6928 break;
6929 case MSR_IA32_RTIT_OUTPUT_BASE:
6930 case MSR_IA32_RTIT_OUTPUT_MASK:
6931 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6932 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
6933 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
6934 continue;
6935 break;
6936 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
6937 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6938 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
6939 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
6940 continue;
6941 break;
6942 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
6943 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
6944 min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
6945 continue;
6946 break;
6947 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
6948 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
6949 min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
6950 continue;
6951 break;
6952 case MSR_IA32_XFD:
6953 case MSR_IA32_XFD_ERR:
6954 if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
6955 continue;
6956 break;
6957 default:
6958 break;
6959 }
6960
6961 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
6962 }
6963
6964 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
6965 if (!static_call(kvm_x86_has_emulated_msr)(NULL, emulated_msrs_all[i]))
6966 continue;
6967
6968 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
6969 }
6970
6971 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
6972 struct kvm_msr_entry msr;
6973
6974 msr.index = msr_based_features_all[i];
6975 if (kvm_get_msr_feature(&msr))
6976 continue;
6977
6978 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
6979 }
6980 }
6981
6982 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
6983 const void *v)
6984 {
6985 int handled = 0;
6986 int n;
6987
6988 do {
6989 n = min(len, 8);
6990 if (!(lapic_in_kernel(vcpu) &&
6991 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
6992 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
6993 break;
6994 handled += n;
6995 addr += n;
6996 len -= n;
6997 v += n;
6998 } while (len);
6999
7000 return handled;
7001 }
7002
7003 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
7004 {
7005 int handled = 0;
7006 int n;
7007
7008 do {
7009 n = min(len, 8);
7010 if (!(lapic_in_kernel(vcpu) &&
7011 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
7012 addr, n, v))
7013 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
7014 break;
7015 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
7016 handled += n;
7017 addr += n;
7018 len -= n;
7019 v += n;
7020 } while (len);
7021
7022 return handled;
7023 }
7024
7025 static void kvm_set_segment(struct kvm_vcpu *vcpu,
7026 struct kvm_segment *var, int seg)
7027 {
7028 static_call(kvm_x86_set_segment)(vcpu, var, seg);
7029 }
7030
7031 void kvm_get_segment(struct kvm_vcpu *vcpu,
7032 struct kvm_segment *var, int seg)
7033 {
7034 static_call(kvm_x86_get_segment)(vcpu, var, seg);
7035 }
7036
7037 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
7038 struct x86_exception *exception)
7039 {
7040 struct kvm_mmu *mmu = vcpu->arch.mmu;
7041 gpa_t t_gpa;
7042
7043 BUG_ON(!mmu_is_nested(vcpu));
7044
7045
7046 access |= PFERR_USER_MASK;
7047 t_gpa = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
7048
7049 return t_gpa;
7050 }
7051
7052 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
7053 struct x86_exception *exception)
7054 {
7055 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7056
7057 u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
7058 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
7059 }
7060 EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
7061
7062 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
7063 struct x86_exception *exception)
7064 {
7065 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7066
7067 u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
7068 access |= PFERR_FETCH_MASK;
7069 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
7070 }
7071
7072 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
7073 struct x86_exception *exception)
7074 {
7075 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7076
7077 u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
7078 access |= PFERR_WRITE_MASK;
7079 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
7080 }
7081 EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
7082
7083
7084 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
7085 struct x86_exception *exception)
7086 {
7087 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7088
7089 return mmu->gva_to_gpa(vcpu, mmu, gva, 0, exception);
7090 }
7091
7092 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
7093 struct kvm_vcpu *vcpu, u64 access,
7094 struct x86_exception *exception)
7095 {
7096 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7097 void *data = val;
7098 int r = X86EMUL_CONTINUE;
7099
7100 while (bytes) {
7101 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
7102 unsigned offset = addr & (PAGE_SIZE-1);
7103 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
7104 int ret;
7105
7106 if (gpa == INVALID_GPA)
7107 return X86EMUL_PROPAGATE_FAULT;
7108 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
7109 offset, toread);
7110 if (ret < 0) {
7111 r = X86EMUL_IO_NEEDED;
7112 goto out;
7113 }
7114
7115 bytes -= toread;
7116 data += toread;
7117 addr += toread;
7118 }
7119 out:
7120 return r;
7121 }
7122
7123
7124 static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
7125 gva_t addr, void *val, unsigned int bytes,
7126 struct x86_exception *exception)
7127 {
7128 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7129 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7130 u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
7131 unsigned offset;
7132 int ret;
7133
7134
7135 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access|PFERR_FETCH_MASK,
7136 exception);
7137 if (unlikely(gpa == INVALID_GPA))
7138 return X86EMUL_PROPAGATE_FAULT;
7139
7140 offset = addr & (PAGE_SIZE-1);
7141 if (WARN_ON(offset + bytes > PAGE_SIZE))
7142 bytes = (unsigned)PAGE_SIZE - offset;
7143 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
7144 offset, bytes);
7145 if (unlikely(ret < 0))
7146 return X86EMUL_IO_NEEDED;
7147
7148 return X86EMUL_CONTINUE;
7149 }
7150
7151 int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
7152 gva_t addr, void *val, unsigned int bytes,
7153 struct x86_exception *exception)
7154 {
7155 u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
7156
7157
7158
7159
7160
7161
7162
7163 memset(exception, 0, sizeof(*exception));
7164 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
7165 exception);
7166 }
7167 EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
7168
7169 static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
7170 gva_t addr, void *val, unsigned int bytes,
7171 struct x86_exception *exception, bool system)
7172 {
7173 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7174 u64 access = 0;
7175
7176 if (system)
7177 access |= PFERR_IMPLICIT_ACCESS;
7178 else if (static_call(kvm_x86_get_cpl)(vcpu) == 3)
7179 access |= PFERR_USER_MASK;
7180
7181 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
7182 }
7183
7184 static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
7185 unsigned long addr, void *val, unsigned int bytes)
7186 {
7187 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7188 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
7189
7190 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
7191 }
7192
7193 static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
7194 struct kvm_vcpu *vcpu, u64 access,
7195 struct x86_exception *exception)
7196 {
7197 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7198 void *data = val;
7199 int r = X86EMUL_CONTINUE;
7200
7201 while (bytes) {
7202 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
7203 unsigned offset = addr & (PAGE_SIZE-1);
7204 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
7205 int ret;
7206
7207 if (gpa == INVALID_GPA)
7208 return X86EMUL_PROPAGATE_FAULT;
7209 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
7210 if (ret < 0) {
7211 r = X86EMUL_IO_NEEDED;
7212 goto out;
7213 }
7214
7215 bytes -= towrite;
7216 data += towrite;
7217 addr += towrite;
7218 }
7219 out:
7220 return r;
7221 }
7222
7223 static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
7224 unsigned int bytes, struct x86_exception *exception,
7225 bool system)
7226 {
7227 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7228 u64 access = PFERR_WRITE_MASK;
7229
7230 if (system)
7231 access |= PFERR_IMPLICIT_ACCESS;
7232 else if (static_call(kvm_x86_get_cpl)(vcpu) == 3)
7233 access |= PFERR_USER_MASK;
7234
7235 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
7236 access, exception);
7237 }
7238
7239 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
7240 unsigned int bytes, struct x86_exception *exception)
7241 {
7242
7243 vcpu->arch.l1tf_flush_l1d = true;
7244
7245 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
7246 PFERR_WRITE_MASK, exception);
7247 }
7248 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
7249
7250 static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
7251 void *insn, int insn_len)
7252 {
7253 return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
7254 insn, insn_len);
7255 }
7256
7257 int handle_ud(struct kvm_vcpu *vcpu)
7258 {
7259 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
7260 int emul_type = EMULTYPE_TRAP_UD;
7261 char sig[5];
7262 struct x86_exception e;
7263
7264 if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
7265 return 1;
7266
7267 if (force_emulation_prefix &&
7268 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
7269 sig, sizeof(sig), &e) == 0 &&
7270 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
7271 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
7272 emul_type = EMULTYPE_TRAP_UD_FORCED;
7273 }
7274
7275 return kvm_emulate_instruction(vcpu, emul_type);
7276 }
7277 EXPORT_SYMBOL_GPL(handle_ud);
7278
7279 static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
7280 gpa_t gpa, bool write)
7281 {
7282
7283 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
7284 return 1;
7285
7286 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
7287 trace_vcpu_match_mmio(gva, gpa, write, true);
7288 return 1;
7289 }
7290
7291 return 0;
7292 }
7293
7294 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
7295 gpa_t *gpa, struct x86_exception *exception,
7296 bool write)
7297 {
7298 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
7299 u64 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
7300 | (write ? PFERR_WRITE_MASK : 0);
7301
7302
7303
7304
7305
7306
7307 if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
7308 !permission_fault(vcpu, vcpu->arch.walk_mmu,
7309 vcpu->arch.mmio_access, 0, access))) {
7310 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
7311 (gva & (PAGE_SIZE - 1));
7312 trace_vcpu_match_mmio(gva, *gpa, write, false);
7313 return 1;
7314 }
7315
7316 *gpa = mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
7317
7318 if (*gpa == INVALID_GPA)
7319 return -1;
7320
7321 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
7322 }
7323
7324 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
7325 const void *val, int bytes)
7326 {
7327 int ret;
7328
7329 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
7330 if (ret < 0)
7331 return 0;
7332 kvm_page_track_write(vcpu, gpa, val, bytes);
7333 return 1;
7334 }
7335
7336 struct read_write_emulator_ops {
7337 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
7338 int bytes);
7339 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
7340 void *val, int bytes);
7341 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
7342 int bytes, void *val);
7343 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
7344 void *val, int bytes);
7345 bool write;
7346 };
7347
7348 static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
7349 {
7350 if (vcpu->mmio_read_completed) {
7351 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
7352 vcpu->mmio_fragments[0].gpa, val);
7353 vcpu->mmio_read_completed = 0;
7354 return 1;
7355 }
7356
7357 return 0;
7358 }
7359
7360 static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
7361 void *val, int bytes)
7362 {
7363 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
7364 }
7365
7366 static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
7367 void *val, int bytes)
7368 {
7369 return emulator_write_phys(vcpu, gpa, val, bytes);
7370 }
7371
7372 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
7373 {
7374 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
7375 return vcpu_mmio_write(vcpu, gpa, bytes, val);
7376 }
7377
7378 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
7379 void *val, int bytes)
7380 {
7381 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
7382 return X86EMUL_IO_NEEDED;
7383 }
7384
7385 static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
7386 void *val, int bytes)
7387 {
7388 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
7389
7390 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
7391 return X86EMUL_CONTINUE;
7392 }
7393
7394 static const struct read_write_emulator_ops read_emultor = {
7395 .read_write_prepare = read_prepare,
7396 .read_write_emulate = read_emulate,
7397 .read_write_mmio = vcpu_mmio_read,
7398 .read_write_exit_mmio = read_exit_mmio,
7399 };
7400
7401 static const struct read_write_emulator_ops write_emultor = {
7402 .read_write_emulate = write_emulate,
7403 .read_write_mmio = write_mmio,
7404 .read_write_exit_mmio = write_exit_mmio,
7405 .write = true,
7406 };
7407
7408 static int emulator_read_write_onepage(unsigned long addr, void *val,
7409 unsigned int bytes,
7410 struct x86_exception *exception,
7411 struct kvm_vcpu *vcpu,
7412 const struct read_write_emulator_ops *ops)
7413 {
7414 gpa_t gpa;
7415 int handled, ret;
7416 bool write = ops->write;
7417 struct kvm_mmio_fragment *frag;
7418 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7419
7420
7421
7422
7423
7424
7425
7426
7427 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
7428 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
7429 gpa = ctxt->gpa_val;
7430 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
7431 } else {
7432 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
7433 if (ret < 0)
7434 return X86EMUL_PROPAGATE_FAULT;
7435 }
7436
7437 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
7438 return X86EMUL_CONTINUE;
7439
7440
7441
7442
7443 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
7444 if (handled == bytes)
7445 return X86EMUL_CONTINUE;
7446
7447 gpa += handled;
7448 bytes -= handled;
7449 val += handled;
7450
7451 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
7452 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
7453 frag->gpa = gpa;
7454 frag->data = val;
7455 frag->len = bytes;
7456 return X86EMUL_CONTINUE;
7457 }
7458
7459 static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
7460 unsigned long addr,
7461 void *val, unsigned int bytes,
7462 struct x86_exception *exception,
7463 const struct read_write_emulator_ops *ops)
7464 {
7465 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7466 gpa_t gpa;
7467 int rc;
7468
7469 if (ops->read_write_prepare &&
7470 ops->read_write_prepare(vcpu, val, bytes))
7471 return X86EMUL_CONTINUE;
7472
7473 vcpu->mmio_nr_fragments = 0;
7474
7475
7476 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
7477 int now;
7478
7479 now = -addr & ~PAGE_MASK;
7480 rc = emulator_read_write_onepage(addr, val, now, exception,
7481 vcpu, ops);
7482
7483 if (rc != X86EMUL_CONTINUE)
7484 return rc;
7485 addr += now;
7486 if (ctxt->mode != X86EMUL_MODE_PROT64)
7487 addr = (u32)addr;
7488 val += now;
7489 bytes -= now;
7490 }
7491
7492 rc = emulator_read_write_onepage(addr, val, bytes, exception,
7493 vcpu, ops);
7494 if (rc != X86EMUL_CONTINUE)
7495 return rc;
7496
7497 if (!vcpu->mmio_nr_fragments)
7498 return rc;
7499
7500 gpa = vcpu->mmio_fragments[0].gpa;
7501
7502 vcpu->mmio_needed = 1;
7503 vcpu->mmio_cur_fragment = 0;
7504
7505 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
7506 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
7507 vcpu->run->exit_reason = KVM_EXIT_MMIO;
7508 vcpu->run->mmio.phys_addr = gpa;
7509
7510 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
7511 }
7512
7513 static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
7514 unsigned long addr,
7515 void *val,
7516 unsigned int bytes,
7517 struct x86_exception *exception)
7518 {
7519 return emulator_read_write(ctxt, addr, val, bytes,
7520 exception, &read_emultor);
7521 }
7522
7523 static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
7524 unsigned long addr,
7525 const void *val,
7526 unsigned int bytes,
7527 struct x86_exception *exception)
7528 {
7529 return emulator_read_write(ctxt, addr, (void *)val, bytes,
7530 exception, &write_emultor);
7531 }
7532
7533 #define emulator_try_cmpxchg_user(t, ptr, old, new) \
7534 (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t))
7535
7536 static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
7537 unsigned long addr,
7538 const void *old,
7539 const void *new,
7540 unsigned int bytes,
7541 struct x86_exception *exception)
7542 {
7543 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7544 u64 page_line_mask;
7545 unsigned long hva;
7546 gpa_t gpa;
7547 int r;
7548
7549
7550 if (bytes > 8 || (bytes & (bytes - 1)))
7551 goto emul_write;
7552
7553 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
7554
7555 if (gpa == INVALID_GPA ||
7556 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
7557 goto emul_write;
7558
7559
7560
7561
7562
7563 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
7564 page_line_mask = ~(cache_line_size() - 1);
7565 else
7566 page_line_mask = PAGE_MASK;
7567
7568 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
7569 goto emul_write;
7570
7571 hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
7572 if (kvm_is_error_hva(hva))
7573 goto emul_write;
7574
7575 hva += offset_in_page(gpa);
7576
7577 switch (bytes) {
7578 case 1:
7579 r = emulator_try_cmpxchg_user(u8, hva, old, new);
7580 break;
7581 case 2:
7582 r = emulator_try_cmpxchg_user(u16, hva, old, new);
7583 break;
7584 case 4:
7585 r = emulator_try_cmpxchg_user(u32, hva, old, new);
7586 break;
7587 case 8:
7588 r = emulator_try_cmpxchg_user(u64, hva, old, new);
7589 break;
7590 default:
7591 BUG();
7592 }
7593
7594 if (r < 0)
7595 return X86EMUL_UNHANDLEABLE;
7596 if (r)
7597 return X86EMUL_CMPXCHG_FAILED;
7598
7599 kvm_page_track_write(vcpu, gpa, new, bytes);
7600
7601 return X86EMUL_CONTINUE;
7602
7603 emul_write:
7604 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
7605
7606 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
7607 }
7608
7609 static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
7610 unsigned short port, void *data,
7611 unsigned int count, bool in)
7612 {
7613 unsigned i;
7614 int r;
7615
7616 WARN_ON_ONCE(vcpu->arch.pio.count);
7617 for (i = 0; i < count; i++) {
7618 if (in)
7619 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, port, size, data);
7620 else
7621 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, port, size, data);
7622
7623 if (r) {
7624 if (i == 0)
7625 goto userspace_io;
7626
7627
7628
7629
7630
7631 if (in)
7632 memset(data, 0, size * (count - i));
7633 break;
7634 }
7635
7636 data += size;
7637 }
7638 return 1;
7639
7640 userspace_io:
7641 vcpu->arch.pio.port = port;
7642 vcpu->arch.pio.in = in;
7643 vcpu->arch.pio.count = count;
7644 vcpu->arch.pio.size = size;
7645
7646 if (in)
7647 memset(vcpu->arch.pio_data, 0, size * count);
7648 else
7649 memcpy(vcpu->arch.pio_data, data, size * count);
7650
7651 vcpu->run->exit_reason = KVM_EXIT_IO;
7652 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
7653 vcpu->run->io.size = size;
7654 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
7655 vcpu->run->io.count = count;
7656 vcpu->run->io.port = port;
7657 return 0;
7658 }
7659
7660 static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
7661 unsigned short port, void *val, unsigned int count)
7662 {
7663 int r = emulator_pio_in_out(vcpu, size, port, val, count, true);
7664 if (r)
7665 trace_kvm_pio(KVM_PIO_IN, port, size, count, val);
7666
7667 return r;
7668 }
7669
7670 static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
7671 {
7672 int size = vcpu->arch.pio.size;
7673 unsigned int count = vcpu->arch.pio.count;
7674 memcpy(val, vcpu->arch.pio_data, size * count);
7675 trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
7676 vcpu->arch.pio.count = 0;
7677 }
7678
7679 static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
7680 int size, unsigned short port, void *val,
7681 unsigned int count)
7682 {
7683 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7684 if (vcpu->arch.pio.count) {
7685
7686
7687
7688
7689
7690
7691
7692 complete_emulator_pio_in(vcpu, val);
7693 return 1;
7694 }
7695
7696 return emulator_pio_in(vcpu, size, port, val, count);
7697 }
7698
7699 static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
7700 unsigned short port, const void *val,
7701 unsigned int count)
7702 {
7703 trace_kvm_pio(KVM_PIO_OUT, port, size, count, val);
7704 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
7705 }
7706
7707 static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
7708 int size, unsigned short port,
7709 const void *val, unsigned int count)
7710 {
7711 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
7712 }
7713
7714 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
7715 {
7716 return static_call(kvm_x86_get_segment_base)(vcpu, seg);
7717 }
7718
7719 static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
7720 {
7721 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
7722 }
7723
7724 static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
7725 {
7726 if (!need_emulate_wbinvd(vcpu))
7727 return X86EMUL_CONTINUE;
7728
7729 if (static_call(kvm_x86_has_wbinvd_exit)()) {
7730 int cpu = get_cpu();
7731
7732 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
7733 on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
7734 wbinvd_ipi, NULL, 1);
7735 put_cpu();
7736 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
7737 } else
7738 wbinvd();
7739 return X86EMUL_CONTINUE;
7740 }
7741
7742 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
7743 {
7744 kvm_emulate_wbinvd_noskip(vcpu);
7745 return kvm_skip_emulated_instruction(vcpu);
7746 }
7747 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
7748
7749
7750
7751 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
7752 {
7753 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
7754 }
7755
7756 static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
7757 unsigned long *dest)
7758 {
7759 kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
7760 }
7761
7762 static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
7763 unsigned long value)
7764 {
7765
7766 return kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
7767 }
7768
7769 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
7770 {
7771 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
7772 }
7773
7774 static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
7775 {
7776 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7777 unsigned long value;
7778
7779 switch (cr) {
7780 case 0:
7781 value = kvm_read_cr0(vcpu);
7782 break;
7783 case 2:
7784 value = vcpu->arch.cr2;
7785 break;
7786 case 3:
7787 value = kvm_read_cr3(vcpu);
7788 break;
7789 case 4:
7790 value = kvm_read_cr4(vcpu);
7791 break;
7792 case 8:
7793 value = kvm_get_cr8(vcpu);
7794 break;
7795 default:
7796 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7797 return 0;
7798 }
7799
7800 return value;
7801 }
7802
7803 static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
7804 {
7805 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7806 int res = 0;
7807
7808 switch (cr) {
7809 case 0:
7810 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
7811 break;
7812 case 2:
7813 vcpu->arch.cr2 = val;
7814 break;
7815 case 3:
7816 res = kvm_set_cr3(vcpu, val);
7817 break;
7818 case 4:
7819 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
7820 break;
7821 case 8:
7822 res = kvm_set_cr8(vcpu, val);
7823 break;
7824 default:
7825 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7826 res = -1;
7827 }
7828
7829 return res;
7830 }
7831
7832 static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
7833 {
7834 return static_call(kvm_x86_get_cpl)(emul_to_vcpu(ctxt));
7835 }
7836
7837 static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7838 {
7839 static_call(kvm_x86_get_gdt)(emul_to_vcpu(ctxt), dt);
7840 }
7841
7842 static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7843 {
7844 static_call(kvm_x86_get_idt)(emul_to_vcpu(ctxt), dt);
7845 }
7846
7847 static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7848 {
7849 static_call(kvm_x86_set_gdt)(emul_to_vcpu(ctxt), dt);
7850 }
7851
7852 static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7853 {
7854 static_call(kvm_x86_set_idt)(emul_to_vcpu(ctxt), dt);
7855 }
7856
7857 static unsigned long emulator_get_cached_segment_base(
7858 struct x86_emulate_ctxt *ctxt, int seg)
7859 {
7860 return get_segment_base(emul_to_vcpu(ctxt), seg);
7861 }
7862
7863 static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
7864 struct desc_struct *desc, u32 *base3,
7865 int seg)
7866 {
7867 struct kvm_segment var;
7868
7869 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
7870 *selector = var.selector;
7871
7872 if (var.unusable) {
7873 memset(desc, 0, sizeof(*desc));
7874 if (base3)
7875 *base3 = 0;
7876 return false;
7877 }
7878
7879 if (var.g)
7880 var.limit >>= 12;
7881 set_desc_limit(desc, var.limit);
7882 set_desc_base(desc, (unsigned long)var.base);
7883 #ifdef CONFIG_X86_64
7884 if (base3)
7885 *base3 = var.base >> 32;
7886 #endif
7887 desc->type = var.type;
7888 desc->s = var.s;
7889 desc->dpl = var.dpl;
7890 desc->p = var.present;
7891 desc->avl = var.avl;
7892 desc->l = var.l;
7893 desc->d = var.db;
7894 desc->g = var.g;
7895
7896 return true;
7897 }
7898
7899 static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
7900 struct desc_struct *desc, u32 base3,
7901 int seg)
7902 {
7903 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7904 struct kvm_segment var;
7905
7906 var.selector = selector;
7907 var.base = get_desc_base(desc);
7908 #ifdef CONFIG_X86_64
7909 var.base |= ((u64)base3) << 32;
7910 #endif
7911 var.limit = get_desc_limit(desc);
7912 if (desc->g)
7913 var.limit = (var.limit << 12) | 0xfff;
7914 var.type = desc->type;
7915 var.dpl = desc->dpl;
7916 var.db = desc->d;
7917 var.s = desc->s;
7918 var.l = desc->l;
7919 var.g = desc->g;
7920 var.avl = desc->avl;
7921 var.present = desc->p;
7922 var.unusable = !var.present;
7923 var.padding = 0;
7924
7925 kvm_set_segment(vcpu, &var, seg);
7926 return;
7927 }
7928
7929 static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt,
7930 u32 msr_index, u64 *pdata)
7931 {
7932 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7933 int r;
7934
7935 r = kvm_get_msr_with_filter(vcpu, msr_index, pdata);
7936
7937 if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
7938 complete_emulated_rdmsr, r)) {
7939
7940 return X86EMUL_IO_NEEDED;
7941 }
7942
7943 return r;
7944 }
7945
7946 static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
7947 u32 msr_index, u64 data)
7948 {
7949 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7950 int r;
7951
7952 r = kvm_set_msr_with_filter(vcpu, msr_index, data);
7953
7954 if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
7955 complete_emulated_msr_access, r)) {
7956
7957 return X86EMUL_IO_NEEDED;
7958 }
7959
7960 return r;
7961 }
7962
7963 static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
7964 u32 msr_index, u64 *pdata)
7965 {
7966 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
7967 }
7968
7969 static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
7970 u32 msr_index, u64 data)
7971 {
7972 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
7973 }
7974
7975 static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
7976 {
7977 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7978
7979 return vcpu->arch.smbase;
7980 }
7981
7982 static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
7983 {
7984 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7985
7986 vcpu->arch.smbase = smbase;
7987 }
7988
7989 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
7990 u32 pmc)
7991 {
7992 if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
7993 return 0;
7994 return -EINVAL;
7995 }
7996
7997 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
7998 u32 pmc, u64 *pdata)
7999 {
8000 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
8001 }
8002
8003 static void emulator_halt(struct x86_emulate_ctxt *ctxt)
8004 {
8005 emul_to_vcpu(ctxt)->arch.halt_request = 1;
8006 }
8007
8008 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
8009 struct x86_instruction_info *info,
8010 enum x86_intercept_stage stage)
8011 {
8012 return static_call(kvm_x86_check_intercept)(emul_to_vcpu(ctxt), info, stage,
8013 &ctxt->exception);
8014 }
8015
8016 static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
8017 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
8018 bool exact_only)
8019 {
8020 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
8021 }
8022
8023 static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
8024 {
8025 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
8026 }
8027
8028 static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
8029 {
8030 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
8031 }
8032
8033 static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
8034 {
8035 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
8036 }
8037
8038 static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
8039 {
8040 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
8041 }
8042
8043 static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
8044 {
8045 return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
8046 }
8047
8048 static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
8049 {
8050 kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val);
8051 }
8052
8053 static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
8054 {
8055 static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
8056 }
8057
8058 static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
8059 {
8060 return emul_to_vcpu(ctxt)->arch.hflags;
8061 }
8062
8063 static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
8064 {
8065 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8066
8067 kvm_smm_changed(vcpu, false);
8068 }
8069
8070 static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
8071 const char *smstate)
8072 {
8073 return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
8074 }
8075
8076 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
8077 {
8078 kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
8079 }
8080
8081 static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
8082 {
8083 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
8084 }
8085
8086 static void emulator_vm_bugged(struct x86_emulate_ctxt *ctxt)
8087 {
8088 struct kvm *kvm = emul_to_vcpu(ctxt)->kvm;
8089
8090 if (!kvm->vm_bugged)
8091 kvm_vm_bugged(kvm);
8092 }
8093
8094 static const struct x86_emulate_ops emulate_ops = {
8095 .vm_bugged = emulator_vm_bugged,
8096 .read_gpr = emulator_read_gpr,
8097 .write_gpr = emulator_write_gpr,
8098 .read_std = emulator_read_std,
8099 .write_std = emulator_write_std,
8100 .read_phys = kvm_read_guest_phys_system,
8101 .fetch = kvm_fetch_guest_virt,
8102 .read_emulated = emulator_read_emulated,
8103 .write_emulated = emulator_write_emulated,
8104 .cmpxchg_emulated = emulator_cmpxchg_emulated,
8105 .invlpg = emulator_invlpg,
8106 .pio_in_emulated = emulator_pio_in_emulated,
8107 .pio_out_emulated = emulator_pio_out_emulated,
8108 .get_segment = emulator_get_segment,
8109 .set_segment = emulator_set_segment,
8110 .get_cached_segment_base = emulator_get_cached_segment_base,
8111 .get_gdt = emulator_get_gdt,
8112 .get_idt = emulator_get_idt,
8113 .set_gdt = emulator_set_gdt,
8114 .set_idt = emulator_set_idt,
8115 .get_cr = emulator_get_cr,
8116 .set_cr = emulator_set_cr,
8117 .cpl = emulator_get_cpl,
8118 .get_dr = emulator_get_dr,
8119 .set_dr = emulator_set_dr,
8120 .get_smbase = emulator_get_smbase,
8121 .set_smbase = emulator_set_smbase,
8122 .set_msr_with_filter = emulator_set_msr_with_filter,
8123 .get_msr_with_filter = emulator_get_msr_with_filter,
8124 .set_msr = emulator_set_msr,
8125 .get_msr = emulator_get_msr,
8126 .check_pmc = emulator_check_pmc,
8127 .read_pmc = emulator_read_pmc,
8128 .halt = emulator_halt,
8129 .wbinvd = emulator_wbinvd,
8130 .fix_hypercall = emulator_fix_hypercall,
8131 .intercept = emulator_intercept,
8132 .get_cpuid = emulator_get_cpuid,
8133 .guest_has_long_mode = emulator_guest_has_long_mode,
8134 .guest_has_movbe = emulator_guest_has_movbe,
8135 .guest_has_fxsr = emulator_guest_has_fxsr,
8136 .guest_has_rdpid = emulator_guest_has_rdpid,
8137 .set_nmi_mask = emulator_set_nmi_mask,
8138 .get_hflags = emulator_get_hflags,
8139 .exiting_smm = emulator_exiting_smm,
8140 .leave_smm = emulator_leave_smm,
8141 .triple_fault = emulator_triple_fault,
8142 .set_xcr = emulator_set_xcr,
8143 };
8144
8145 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
8146 {
8147 u32 int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
8148
8149
8150
8151
8152
8153
8154
8155 if (int_shadow & mask)
8156 mask = 0;
8157 if (unlikely(int_shadow || mask)) {
8158 static_call(kvm_x86_set_interrupt_shadow)(vcpu, mask);
8159 if (!mask)
8160 kvm_make_request(KVM_REQ_EVENT, vcpu);
8161 }
8162 }
8163
8164 static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
8165 {
8166 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8167 if (ctxt->exception.vector == PF_VECTOR)
8168 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
8169
8170 if (ctxt->exception.error_code_valid)
8171 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
8172 ctxt->exception.error_code);
8173 else
8174 kvm_queue_exception(vcpu, ctxt->exception.vector);
8175 return false;
8176 }
8177
8178 static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
8179 {
8180 struct x86_emulate_ctxt *ctxt;
8181
8182 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
8183 if (!ctxt) {
8184 pr_err("kvm: failed to allocate vcpu's emulator\n");
8185 return NULL;
8186 }
8187
8188 ctxt->vcpu = vcpu;
8189 ctxt->ops = &emulate_ops;
8190 vcpu->arch.emulate_ctxt = ctxt;
8191
8192 return ctxt;
8193 }
8194
8195 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
8196 {
8197 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8198 int cs_db, cs_l;
8199
8200 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
8201
8202 ctxt->gpa_available = false;
8203 ctxt->eflags = kvm_get_rflags(vcpu);
8204 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
8205
8206 ctxt->eip = kvm_rip_read(vcpu);
8207 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
8208 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
8209 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
8210 cs_db ? X86EMUL_MODE_PROT32 :
8211 X86EMUL_MODE_PROT16;
8212 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
8213 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
8214 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
8215
8216 ctxt->interruptibility = 0;
8217 ctxt->have_exception = false;
8218 ctxt->exception.vector = -1;
8219 ctxt->perm_ok = false;
8220
8221 init_decode_cache(ctxt);
8222 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
8223 }
8224
8225 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
8226 {
8227 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8228 int ret;
8229
8230 init_emulate_ctxt(vcpu);
8231
8232 ctxt->op_bytes = 2;
8233 ctxt->ad_bytes = 2;
8234 ctxt->_eip = ctxt->eip + inc_eip;
8235 ret = emulate_int_real(ctxt, irq);
8236
8237 if (ret != X86EMUL_CONTINUE) {
8238 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
8239 } else {
8240 ctxt->eip = ctxt->_eip;
8241 kvm_rip_write(vcpu, ctxt->eip);
8242 kvm_set_rflags(vcpu, ctxt->eflags);
8243 }
8244 }
8245 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
8246
8247 static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
8248 u8 ndata, u8 *insn_bytes, u8 insn_size)
8249 {
8250 struct kvm_run *run = vcpu->run;
8251 u64 info[5];
8252 u8 info_start;
8253
8254
8255
8256
8257
8258 memset(&info, 0, sizeof(info));
8259
8260 static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
8261 &info[2], (u32 *)&info[3],
8262 (u32 *)&info[4]);
8263
8264 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
8265 run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
8266
8267
8268
8269
8270
8271
8272 if (WARN_ON_ONCE(ndata > 4))
8273 ndata = 4;
8274
8275
8276 info_start = 1;
8277 run->emulation_failure.flags = 0;
8278
8279 if (insn_size) {
8280 BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) +
8281 sizeof(run->emulation_failure.insn_bytes) != 16));
8282 info_start += 2;
8283 run->emulation_failure.flags |=
8284 KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
8285 run->emulation_failure.insn_size = insn_size;
8286 memset(run->emulation_failure.insn_bytes, 0x90,
8287 sizeof(run->emulation_failure.insn_bytes));
8288 memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
8289 }
8290
8291 memcpy(&run->internal.data[info_start], info, sizeof(info));
8292 memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data,
8293 ndata * sizeof(data[0]));
8294
8295 run->emulation_failure.ndata = info_start + ARRAY_SIZE(info) + ndata;
8296 }
8297
8298 static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
8299 {
8300 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8301
8302 prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
8303 ctxt->fetch.end - ctxt->fetch.data);
8304 }
8305
8306 void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
8307 u8 ndata)
8308 {
8309 prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
8310 }
8311 EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
8312
8313 void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
8314 {
8315 __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
8316 }
8317 EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
8318
8319 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
8320 {
8321 struct kvm *kvm = vcpu->kvm;
8322
8323 ++vcpu->stat.insn_emulation_fail;
8324 trace_kvm_emulate_insn_failed(vcpu);
8325
8326 if (emulation_type & EMULTYPE_VMWARE_GP) {
8327 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8328 return 1;
8329 }
8330
8331 if (kvm->arch.exit_on_emulation_error ||
8332 (emulation_type & EMULTYPE_SKIP)) {
8333 prepare_emulation_ctxt_failure_exit(vcpu);
8334 return 0;
8335 }
8336
8337 kvm_queue_exception(vcpu, UD_VECTOR);
8338
8339 if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
8340 prepare_emulation_ctxt_failure_exit(vcpu);
8341 return 0;
8342 }
8343
8344 return 1;
8345 }
8346
8347 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
8348 bool write_fault_to_shadow_pgtable,
8349 int emulation_type)
8350 {
8351 gpa_t gpa = cr2_or_gpa;
8352 kvm_pfn_t pfn;
8353
8354 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
8355 return false;
8356
8357 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
8358 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
8359 return false;
8360
8361 if (!vcpu->arch.mmu->root_role.direct) {
8362
8363
8364
8365
8366 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
8367
8368
8369
8370
8371
8372 if (gpa == INVALID_GPA)
8373 return true;
8374 }
8375
8376
8377
8378
8379
8380
8381
8382 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
8383
8384
8385
8386
8387
8388 if (is_error_noslot_pfn(pfn))
8389 return false;
8390
8391 kvm_release_pfn_clean(pfn);
8392
8393
8394 if (vcpu->arch.mmu->root_role.direct) {
8395 unsigned int indirect_shadow_pages;
8396
8397 write_lock(&vcpu->kvm->mmu_lock);
8398 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
8399 write_unlock(&vcpu->kvm->mmu_lock);
8400
8401 if (indirect_shadow_pages)
8402 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8403
8404 return true;
8405 }
8406
8407
8408
8409
8410
8411
8412 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8413
8414
8415
8416
8417
8418
8419 return !write_fault_to_shadow_pgtable;
8420 }
8421
8422 static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
8423 gpa_t cr2_or_gpa, int emulation_type)
8424 {
8425 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8426 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
8427
8428 last_retry_eip = vcpu->arch.last_retry_eip;
8429 last_retry_addr = vcpu->arch.last_retry_addr;
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
8445
8446 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
8447 return false;
8448
8449 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
8450 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
8451 return false;
8452
8453 if (x86_page_table_writing_insn(ctxt))
8454 return false;
8455
8456 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
8457 return false;
8458
8459 vcpu->arch.last_retry_eip = ctxt->eip;
8460 vcpu->arch.last_retry_addr = cr2_or_gpa;
8461
8462 if (!vcpu->arch.mmu->root_role.direct)
8463 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
8464
8465 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8466
8467 return true;
8468 }
8469
8470 static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
8471 static int complete_emulated_pio(struct kvm_vcpu *vcpu);
8472
8473 static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
8474 {
8475 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
8476
8477 if (entering_smm) {
8478 vcpu->arch.hflags |= HF_SMM_MASK;
8479 } else {
8480 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
8481
8482
8483 kvm_make_request(KVM_REQ_EVENT, vcpu);
8484
8485
8486
8487
8488
8489
8490 vcpu->arch.pdptrs_from_userspace = false;
8491 }
8492
8493 kvm_mmu_reset_context(vcpu);
8494 }
8495
8496 static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
8497 unsigned long *db)
8498 {
8499 u32 dr6 = 0;
8500 int i;
8501 u32 enable, rwlen;
8502
8503 enable = dr7;
8504 rwlen = dr7 >> 16;
8505 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
8506 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
8507 dr6 |= (1 << i);
8508 return dr6;
8509 }
8510
8511 static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
8512 {
8513 struct kvm_run *kvm_run = vcpu->run;
8514
8515 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
8516 kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
8517 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
8518 kvm_run->debug.arch.exception = DB_VECTOR;
8519 kvm_run->exit_reason = KVM_EXIT_DEBUG;
8520 return 0;
8521 }
8522 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
8523 return 1;
8524 }
8525
8526 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
8527 {
8528 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
8529 int r;
8530
8531 r = static_call(kvm_x86_skip_emulated_instruction)(vcpu);
8532 if (unlikely(!r))
8533 return 0;
8534
8535 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545 if (unlikely(rflags & X86_EFLAGS_TF))
8546 r = kvm_vcpu_do_singlestep(vcpu);
8547 return r;
8548 }
8549 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
8550
8551 static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
8552 {
8553 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
8554 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
8555 struct kvm_run *kvm_run = vcpu->run;
8556 unsigned long eip = kvm_get_linear_rip(vcpu);
8557 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
8558 vcpu->arch.guest_debug_dr7,
8559 vcpu->arch.eff_db);
8560
8561 if (dr6 != 0) {
8562 kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
8563 kvm_run->debug.arch.pc = eip;
8564 kvm_run->debug.arch.exception = DB_VECTOR;
8565 kvm_run->exit_reason = KVM_EXIT_DEBUG;
8566 *r = 0;
8567 return true;
8568 }
8569 }
8570
8571 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
8572 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
8573 unsigned long eip = kvm_get_linear_rip(vcpu);
8574 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
8575 vcpu->arch.dr7,
8576 vcpu->arch.db);
8577
8578 if (dr6 != 0) {
8579 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
8580 *r = 1;
8581 return true;
8582 }
8583 }
8584
8585 return false;
8586 }
8587
8588 static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
8589 {
8590 switch (ctxt->opcode_len) {
8591 case 1:
8592 switch (ctxt->b) {
8593 case 0xe4:
8594 case 0xe5:
8595 case 0xec:
8596 case 0xed:
8597 case 0xe6:
8598 case 0xe7:
8599 case 0xee:
8600 case 0xef:
8601 case 0x6c:
8602 case 0x6d:
8603 case 0x6e:
8604 case 0x6f:
8605 return true;
8606 }
8607 break;
8608 case 2:
8609 switch (ctxt->b) {
8610 case 0x33:
8611 return true;
8612 }
8613 break;
8614 }
8615
8616 return false;
8617 }
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629 int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
8630 void *insn, int insn_len)
8631 {
8632 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8633 int r;
8634
8635 init_emulate_ctxt(vcpu);
8636
8637 r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
8638
8639 trace_kvm_emulate_insn_start(vcpu);
8640 ++vcpu->stat.insn_emulation;
8641
8642 return r;
8643 }
8644 EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
8645
8646 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
8647 int emulation_type, void *insn, int insn_len)
8648 {
8649 int r;
8650 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8651 bool writeback = true;
8652 bool write_fault_to_spt;
8653
8654 if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
8655 return 1;
8656
8657 vcpu->arch.l1tf_flush_l1d = true;
8658
8659
8660
8661
8662
8663 write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
8664 vcpu->arch.write_fault_to_shadow_pgtable = false;
8665
8666 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
8667 kvm_clear_exception_queue(vcpu);
8668
8669
8670
8671
8672
8673
8674 if (!(emulation_type & EMULTYPE_SKIP) &&
8675 kvm_vcpu_check_code_breakpoint(vcpu, &r))
8676 return r;
8677
8678 r = x86_decode_emulated_instruction(vcpu, emulation_type,
8679 insn, insn_len);
8680 if (r != EMULATION_OK) {
8681 if ((emulation_type & EMULTYPE_TRAP_UD) ||
8682 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
8683 kvm_queue_exception(vcpu, UD_VECTOR);
8684 return 1;
8685 }
8686 if (reexecute_instruction(vcpu, cr2_or_gpa,
8687 write_fault_to_spt,
8688 emulation_type))
8689 return 1;
8690 if (ctxt->have_exception) {
8691
8692
8693
8694
8695 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
8696 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
8697 inject_emulated_exception(vcpu);
8698 return 1;
8699 }
8700 return handle_emulation_failure(vcpu, emulation_type);
8701 }
8702 }
8703
8704 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
8705 !is_vmware_backdoor_opcode(ctxt)) {
8706 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8707 return 1;
8708 }
8709
8710
8711
8712
8713
8714
8715
8716 if (emulation_type & EMULTYPE_SKIP) {
8717 if (ctxt->mode != X86EMUL_MODE_PROT64)
8718 ctxt->eip = (u32)ctxt->_eip;
8719 else
8720 ctxt->eip = ctxt->_eip;
8721
8722 if (emulation_type & EMULTYPE_COMPLETE_USER_EXIT) {
8723 r = 1;
8724 goto writeback;
8725 }
8726
8727 kvm_rip_write(vcpu, ctxt->eip);
8728 if (ctxt->eflags & X86_EFLAGS_RF)
8729 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
8730 return 1;
8731 }
8732
8733 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
8734 return 1;
8735
8736
8737
8738 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
8739 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
8740 emulator_invalidate_register_cache(ctxt);
8741 }
8742
8743 restart:
8744 if (emulation_type & EMULTYPE_PF) {
8745
8746 ctxt->exception.address = cr2_or_gpa;
8747
8748
8749 if (vcpu->arch.mmu->root_role.direct) {
8750 ctxt->gpa_available = true;
8751 ctxt->gpa_val = cr2_or_gpa;
8752 }
8753 } else {
8754
8755 ctxt->exception.address = 0;
8756 }
8757
8758 r = x86_emulate_insn(ctxt);
8759
8760 if (r == EMULATION_INTERCEPTED)
8761 return 1;
8762
8763 if (r == EMULATION_FAILED) {
8764 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
8765 emulation_type))
8766 return 1;
8767
8768 return handle_emulation_failure(vcpu, emulation_type);
8769 }
8770
8771 if (ctxt->have_exception) {
8772 r = 1;
8773 if (inject_emulated_exception(vcpu))
8774 return r;
8775 } else if (vcpu->arch.pio.count) {
8776 if (!vcpu->arch.pio.in) {
8777
8778 vcpu->arch.pio.count = 0;
8779 } else {
8780 writeback = false;
8781 vcpu->arch.complete_userspace_io = complete_emulated_pio;
8782 }
8783 r = 0;
8784 } else if (vcpu->mmio_needed) {
8785 ++vcpu->stat.mmio_exits;
8786
8787 if (!vcpu->mmio_is_write)
8788 writeback = false;
8789 r = 0;
8790 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8791 } else if (vcpu->arch.complete_userspace_io) {
8792 writeback = false;
8793 r = 0;
8794 } else if (r == EMULATION_RESTART)
8795 goto restart;
8796 else
8797 r = 1;
8798
8799 writeback:
8800 if (writeback) {
8801 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
8802 toggle_interruptibility(vcpu, ctxt->interruptibility);
8803 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8804 if (!ctxt->have_exception ||
8805 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
8806 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
8807 if (ctxt->is_branch)
8808 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
8809 kvm_rip_write(vcpu, ctxt->eip);
8810 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
8811 r = kvm_vcpu_do_singlestep(vcpu);
8812 static_call_cond(kvm_x86_update_emulated_instruction)(vcpu);
8813 __kvm_set_rflags(vcpu, ctxt->eflags);
8814 }
8815
8816
8817
8818
8819
8820
8821
8822 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
8823 kvm_make_request(KVM_REQ_EVENT, vcpu);
8824 } else
8825 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
8826
8827 return r;
8828 }
8829
8830 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
8831 {
8832 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
8833 }
8834 EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
8835
8836 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
8837 void *insn, int insn_len)
8838 {
8839 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
8840 }
8841 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
8842
8843 static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
8844 {
8845 vcpu->arch.pio.count = 0;
8846 return 1;
8847 }
8848
8849 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
8850 {
8851 vcpu->arch.pio.count = 0;
8852
8853 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
8854 return 1;
8855
8856 return kvm_skip_emulated_instruction(vcpu);
8857 }
8858
8859 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
8860 unsigned short port)
8861 {
8862 unsigned long val = kvm_rax_read(vcpu);
8863 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
8864
8865 if (ret)
8866 return ret;
8867
8868
8869
8870
8871
8872 if (port == 0x7e &&
8873 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
8874 vcpu->arch.complete_userspace_io =
8875 complete_fast_pio_out_port_0x7e;
8876 kvm_skip_emulated_instruction(vcpu);
8877 } else {
8878 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8879 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
8880 }
8881 return 0;
8882 }
8883
8884 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
8885 {
8886 unsigned long val;
8887
8888
8889 BUG_ON(vcpu->arch.pio.count != 1);
8890
8891 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
8892 vcpu->arch.pio.count = 0;
8893 return 1;
8894 }
8895
8896
8897 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
8898
8899 complete_emulator_pio_in(vcpu, &val);
8900 kvm_rax_write(vcpu, val);
8901
8902 return kvm_skip_emulated_instruction(vcpu);
8903 }
8904
8905 static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
8906 unsigned short port)
8907 {
8908 unsigned long val;
8909 int ret;
8910
8911
8912 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
8913
8914 ret = emulator_pio_in(vcpu, size, port, &val, 1);
8915 if (ret) {
8916 kvm_rax_write(vcpu, val);
8917 return ret;
8918 }
8919
8920 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8921 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
8922
8923 return 0;
8924 }
8925
8926 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
8927 {
8928 int ret;
8929
8930 if (in)
8931 ret = kvm_fast_pio_in(vcpu, size, port);
8932 else
8933 ret = kvm_fast_pio_out(vcpu, size, port);
8934 return ret && kvm_skip_emulated_instruction(vcpu);
8935 }
8936 EXPORT_SYMBOL_GPL(kvm_fast_pio);
8937
8938 static int kvmclock_cpu_down_prep(unsigned int cpu)
8939 {
8940 __this_cpu_write(cpu_tsc_khz, 0);
8941 return 0;
8942 }
8943
8944 static void tsc_khz_changed(void *data)
8945 {
8946 struct cpufreq_freqs *freq = data;
8947 unsigned long khz = 0;
8948
8949 if (data)
8950 khz = freq->new;
8951 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8952 khz = cpufreq_quick_get(raw_smp_processor_id());
8953 if (!khz)
8954 khz = tsc_khz;
8955 __this_cpu_write(cpu_tsc_khz, khz);
8956 }
8957
8958 #ifdef CONFIG_X86_64
8959 static void kvm_hyperv_tsc_notifier(void)
8960 {
8961 struct kvm *kvm;
8962 int cpu;
8963
8964 mutex_lock(&kvm_lock);
8965 list_for_each_entry(kvm, &vm_list, vm_list)
8966 kvm_make_mclock_inprogress_request(kvm);
8967
8968
8969 hyperv_stop_tsc_emulation();
8970
8971
8972 for_each_present_cpu(cpu)
8973 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
8974 kvm_caps.max_guest_tsc_khz = tsc_khz;
8975
8976 list_for_each_entry(kvm, &vm_list, vm_list) {
8977 __kvm_start_pvclock_update(kvm);
8978 pvclock_update_vm_gtod_copy(kvm);
8979 kvm_end_pvclock_update(kvm);
8980 }
8981
8982 mutex_unlock(&kvm_lock);
8983 }
8984 #endif
8985
8986 static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
8987 {
8988 struct kvm *kvm;
8989 struct kvm_vcpu *vcpu;
8990 int send_ipi = 0;
8991 unsigned long i;
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
9033
9034 mutex_lock(&kvm_lock);
9035 list_for_each_entry(kvm, &vm_list, vm_list) {
9036 kvm_for_each_vcpu(i, vcpu, kvm) {
9037 if (vcpu->cpu != cpu)
9038 continue;
9039 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9040 if (vcpu->cpu != raw_smp_processor_id())
9041 send_ipi = 1;
9042 }
9043 }
9044 mutex_unlock(&kvm_lock);
9045
9046 if (freq->old < freq->new && send_ipi) {
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
9060 }
9061 }
9062
9063 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
9064 void *data)
9065 {
9066 struct cpufreq_freqs *freq = data;
9067 int cpu;
9068
9069 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
9070 return 0;
9071 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
9072 return 0;
9073
9074 for_each_cpu(cpu, freq->policy->cpus)
9075 __kvmclock_cpufreq_notifier(freq, cpu);
9076
9077 return 0;
9078 }
9079
9080 static struct notifier_block kvmclock_cpufreq_notifier_block = {
9081 .notifier_call = kvmclock_cpufreq_notifier
9082 };
9083
9084 static int kvmclock_cpu_online(unsigned int cpu)
9085 {
9086 tsc_khz_changed(NULL);
9087 return 0;
9088 }
9089
9090 static void kvm_timer_init(void)
9091 {
9092 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
9093 max_tsc_khz = tsc_khz;
9094
9095 if (IS_ENABLED(CONFIG_CPU_FREQ)) {
9096 struct cpufreq_policy *policy;
9097 int cpu;
9098
9099 cpu = get_cpu();
9100 policy = cpufreq_cpu_get(cpu);
9101 if (policy) {
9102 if (policy->cpuinfo.max_freq)
9103 max_tsc_khz = policy->cpuinfo.max_freq;
9104 cpufreq_cpu_put(policy);
9105 }
9106 put_cpu();
9107 }
9108 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
9109 CPUFREQ_TRANSITION_NOTIFIER);
9110 }
9111
9112 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
9113 kvmclock_cpu_online, kvmclock_cpu_down_prep);
9114 }
9115
9116 #ifdef CONFIG_X86_64
9117 static void pvclock_gtod_update_fn(struct work_struct *work)
9118 {
9119 struct kvm *kvm;
9120 struct kvm_vcpu *vcpu;
9121 unsigned long i;
9122
9123 mutex_lock(&kvm_lock);
9124 list_for_each_entry(kvm, &vm_list, vm_list)
9125 kvm_for_each_vcpu(i, vcpu, kvm)
9126 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
9127 atomic_set(&kvm_guest_has_master_clock, 0);
9128 mutex_unlock(&kvm_lock);
9129 }
9130
9131 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
9132
9133
9134
9135
9136
9137
9138 static void pvclock_irq_work_fn(struct irq_work *w)
9139 {
9140 queue_work(system_long_wq, &pvclock_gtod_work);
9141 }
9142
9143 static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
9144
9145
9146
9147
9148 static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
9149 void *priv)
9150 {
9151 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
9152 struct timekeeper *tk = priv;
9153
9154 update_pvclock_gtod(tk);
9155
9156
9157
9158
9159
9160
9161 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
9162 atomic_read(&kvm_guest_has_master_clock) != 0)
9163 irq_work_queue(&pvclock_irq_work);
9164 return 0;
9165 }
9166
9167 static struct notifier_block pvclock_gtod_notifier = {
9168 .notifier_call = pvclock_gtod_notify,
9169 };
9170 #endif
9171
9172 int kvm_arch_init(void *opaque)
9173 {
9174 struct kvm_x86_init_ops *ops = opaque;
9175 u64 host_pat;
9176 int r;
9177
9178 if (kvm_x86_ops.hardware_enable) {
9179 pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name);
9180 return -EEXIST;
9181 }
9182
9183 if (!ops->cpu_has_kvm_support()) {
9184 pr_err_ratelimited("kvm: no hardware support for '%s'\n",
9185 ops->runtime_ops->name);
9186 return -EOPNOTSUPP;
9187 }
9188 if (ops->disabled_by_bios()) {
9189 pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
9190 ops->runtime_ops->name);
9191 return -EOPNOTSUPP;
9192 }
9193
9194
9195
9196
9197
9198
9199 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
9200 printk(KERN_ERR "kvm: inadequate fpu\n");
9201 return -EOPNOTSUPP;
9202 }
9203
9204 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
9205 pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
9206 return -EOPNOTSUPP;
9207 }
9208
9209
9210
9211
9212
9213
9214
9215
9216 if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) ||
9217 (host_pat & GENMASK(2, 0)) != 6) {
9218 pr_err("kvm: host PAT[0] is not WB\n");
9219 return -EIO;
9220 }
9221
9222 x86_emulator_cache = kvm_alloc_emulator_cache();
9223 if (!x86_emulator_cache) {
9224 pr_err("kvm: failed to allocate cache for x86 emulator\n");
9225 return -ENOMEM;
9226 }
9227
9228 user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
9229 if (!user_return_msrs) {
9230 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
9231 r = -ENOMEM;
9232 goto out_free_x86_emulator_cache;
9233 }
9234 kvm_nr_uret_msrs = 0;
9235
9236 r = kvm_mmu_vendor_module_init();
9237 if (r)
9238 goto out_free_percpu;
9239
9240 kvm_timer_init();
9241
9242 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
9243 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
9244 kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
9245 }
9246
9247 if (pi_inject_timer == -1)
9248 pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
9249 #ifdef CONFIG_X86_64
9250 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
9251
9252 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
9253 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
9254 #endif
9255
9256 return 0;
9257
9258 out_free_percpu:
9259 free_percpu(user_return_msrs);
9260 out_free_x86_emulator_cache:
9261 kmem_cache_destroy(x86_emulator_cache);
9262 return r;
9263 }
9264
9265 void kvm_arch_exit(void)
9266 {
9267 #ifdef CONFIG_X86_64
9268 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
9269 clear_hv_tscchange_cb();
9270 #endif
9271 kvm_lapic_exit();
9272
9273 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
9274 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
9275 CPUFREQ_TRANSITION_NOTIFIER);
9276 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
9277 #ifdef CONFIG_X86_64
9278 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
9279 irq_work_sync(&pvclock_irq_work);
9280 cancel_work_sync(&pvclock_gtod_work);
9281 #endif
9282 kvm_x86_ops.hardware_enable = NULL;
9283 kvm_mmu_vendor_module_exit();
9284 free_percpu(user_return_msrs);
9285 kmem_cache_destroy(x86_emulator_cache);
9286 #ifdef CONFIG_KVM_XEN
9287 static_key_deferred_flush(&kvm_xen_enabled);
9288 WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
9289 #endif
9290 }
9291
9292 static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
9293 {
9294
9295
9296
9297
9298
9299
9300
9301 ++vcpu->stat.halt_exits;
9302 if (lapic_in_kernel(vcpu)) {
9303 vcpu->arch.mp_state = state;
9304 return 1;
9305 } else {
9306 vcpu->run->exit_reason = reason;
9307 return 0;
9308 }
9309 }
9310
9311 int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
9312 {
9313 return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
9314 }
9315 EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
9316
9317 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
9318 {
9319 int ret = kvm_skip_emulated_instruction(vcpu);
9320
9321
9322
9323
9324 return kvm_emulate_halt_noskip(vcpu) && ret;
9325 }
9326 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
9327
9328 int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
9329 {
9330 int ret = kvm_skip_emulated_instruction(vcpu);
9331
9332 return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
9333 KVM_EXIT_AP_RESET_HOLD) && ret;
9334 }
9335 EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
9336
9337 #ifdef CONFIG_X86_64
9338 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
9339 unsigned long clock_type)
9340 {
9341 struct kvm_clock_pairing clock_pairing;
9342 struct timespec64 ts;
9343 u64 cycle;
9344 int ret;
9345
9346 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
9347 return -KVM_EOPNOTSUPP;
9348
9349
9350
9351
9352
9353 if (vcpu->arch.tsc_always_catchup)
9354 return -KVM_EOPNOTSUPP;
9355
9356 if (!kvm_get_walltime_and_clockread(&ts, &cycle))
9357 return -KVM_EOPNOTSUPP;
9358
9359 clock_pairing.sec = ts.tv_sec;
9360 clock_pairing.nsec = ts.tv_nsec;
9361 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
9362 clock_pairing.flags = 0;
9363 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
9364
9365 ret = 0;
9366 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
9367 sizeof(struct kvm_clock_pairing)))
9368 ret = -KVM_EFAULT;
9369
9370 return ret;
9371 }
9372 #endif
9373
9374
9375
9376
9377
9378
9379 static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
9380 {
9381
9382
9383
9384
9385 struct kvm_lapic_irq lapic_irq = {
9386 .delivery_mode = APIC_DM_REMRD,
9387 .dest_mode = APIC_DEST_PHYSICAL,
9388 .shorthand = APIC_DEST_NOSHORT,
9389 .dest_id = apicid,
9390 };
9391
9392 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
9393 }
9394
9395 bool kvm_apicv_activated(struct kvm *kvm)
9396 {
9397 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
9398 }
9399 EXPORT_SYMBOL_GPL(kvm_apicv_activated);
9400
9401 bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
9402 {
9403 ulong vm_reasons = READ_ONCE(vcpu->kvm->arch.apicv_inhibit_reasons);
9404 ulong vcpu_reasons = static_call(kvm_x86_vcpu_get_apicv_inhibit_reasons)(vcpu);
9405
9406 return (vm_reasons | vcpu_reasons) == 0;
9407 }
9408 EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
9409
9410 static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
9411 enum kvm_apicv_inhibit reason, bool set)
9412 {
9413 if (set)
9414 __set_bit(reason, inhibits);
9415 else
9416 __clear_bit(reason, inhibits);
9417
9418 trace_kvm_apicv_inhibit_changed(reason, set, *inhibits);
9419 }
9420
9421 static void kvm_apicv_init(struct kvm *kvm)
9422 {
9423 unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons;
9424
9425 init_rwsem(&kvm->arch.apicv_update_lock);
9426
9427 set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
9428
9429 if (!enable_apicv)
9430 set_or_clear_apicv_inhibit(inhibits,
9431 APICV_INHIBIT_REASON_DISABLE, true);
9432 }
9433
9434 static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
9435 {
9436 struct kvm_vcpu *target = NULL;
9437 struct kvm_apic_map *map;
9438
9439 vcpu->stat.directed_yield_attempted++;
9440
9441 if (single_task_running())
9442 goto no_yield;
9443
9444 rcu_read_lock();
9445 map = rcu_dereference(vcpu->kvm->arch.apic_map);
9446
9447 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
9448 target = map->phys_map[dest_id]->vcpu;
9449
9450 rcu_read_unlock();
9451
9452 if (!target || !READ_ONCE(target->ready))
9453 goto no_yield;
9454
9455
9456 if (vcpu == target)
9457 goto no_yield;
9458
9459 if (kvm_vcpu_yield_to(target) <= 0)
9460 goto no_yield;
9461
9462 vcpu->stat.directed_yield_successful++;
9463
9464 no_yield:
9465 return;
9466 }
9467
9468 static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
9469 {
9470 u64 ret = vcpu->run->hypercall.ret;
9471
9472 if (!is_64_bit_mode(vcpu))
9473 ret = (u32)ret;
9474 kvm_rax_write(vcpu, ret);
9475 ++vcpu->stat.hypercalls;
9476 return kvm_skip_emulated_instruction(vcpu);
9477 }
9478
9479 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
9480 {
9481 unsigned long nr, a0, a1, a2, a3, ret;
9482 int op_64_bit;
9483
9484 if (kvm_xen_hypercall_enabled(vcpu->kvm))
9485 return kvm_xen_hypercall(vcpu);
9486
9487 if (kvm_hv_hypercall_enabled(vcpu))
9488 return kvm_hv_hypercall(vcpu);
9489
9490 nr = kvm_rax_read(vcpu);
9491 a0 = kvm_rbx_read(vcpu);
9492 a1 = kvm_rcx_read(vcpu);
9493 a2 = kvm_rdx_read(vcpu);
9494 a3 = kvm_rsi_read(vcpu);
9495
9496 trace_kvm_hypercall(nr, a0, a1, a2, a3);
9497
9498 op_64_bit = is_64_bit_hypercall(vcpu);
9499 if (!op_64_bit) {
9500 nr &= 0xFFFFFFFF;
9501 a0 &= 0xFFFFFFFF;
9502 a1 &= 0xFFFFFFFF;
9503 a2 &= 0xFFFFFFFF;
9504 a3 &= 0xFFFFFFFF;
9505 }
9506
9507 if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
9508 ret = -KVM_EPERM;
9509 goto out;
9510 }
9511
9512 ret = -KVM_ENOSYS;
9513
9514 switch (nr) {
9515 case KVM_HC_VAPIC_POLL_IRQ:
9516 ret = 0;
9517 break;
9518 case KVM_HC_KICK_CPU:
9519 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
9520 break;
9521
9522 kvm_pv_kick_cpu_op(vcpu->kvm, a1);
9523 kvm_sched_yield(vcpu, a1);
9524 ret = 0;
9525 break;
9526 #ifdef CONFIG_X86_64
9527 case KVM_HC_CLOCK_PAIRING:
9528 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
9529 break;
9530 #endif
9531 case KVM_HC_SEND_IPI:
9532 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
9533 break;
9534
9535 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
9536 break;
9537 case KVM_HC_SCHED_YIELD:
9538 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
9539 break;
9540
9541 kvm_sched_yield(vcpu, a0);
9542 ret = 0;
9543 break;
9544 case KVM_HC_MAP_GPA_RANGE: {
9545 u64 gpa = a0, npages = a1, attrs = a2;
9546
9547 ret = -KVM_ENOSYS;
9548 if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
9549 break;
9550
9551 if (!PAGE_ALIGNED(gpa) || !npages ||
9552 gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
9553 ret = -KVM_EINVAL;
9554 break;
9555 }
9556
9557 vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
9558 vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
9559 vcpu->run->hypercall.args[0] = gpa;
9560 vcpu->run->hypercall.args[1] = npages;
9561 vcpu->run->hypercall.args[2] = attrs;
9562 vcpu->run->hypercall.longmode = op_64_bit;
9563 vcpu->arch.complete_userspace_io = complete_hypercall_exit;
9564 return 0;
9565 }
9566 default:
9567 ret = -KVM_ENOSYS;
9568 break;
9569 }
9570 out:
9571 if (!op_64_bit)
9572 ret = (u32)ret;
9573 kvm_rax_write(vcpu, ret);
9574
9575 ++vcpu->stat.hypercalls;
9576 return kvm_skip_emulated_instruction(vcpu);
9577 }
9578 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
9579
9580 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
9581 {
9582 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
9583 char instruction[3];
9584 unsigned long rip = kvm_rip_read(vcpu);
9585
9586
9587
9588
9589
9590 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
9591 ctxt->exception.error_code_valid = false;
9592 ctxt->exception.vector = UD_VECTOR;
9593 ctxt->have_exception = true;
9594 return X86EMUL_PROPAGATE_FAULT;
9595 }
9596
9597 static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
9598
9599 return emulator_write_emulated(ctxt, rip, instruction, 3,
9600 &ctxt->exception);
9601 }
9602
9603 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
9604 {
9605 return vcpu->run->request_interrupt_window &&
9606 likely(!pic_in_kernel(vcpu->kvm));
9607 }
9608
9609
9610 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
9611 {
9612 struct kvm_run *kvm_run = vcpu->run;
9613
9614 kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
9615 kvm_run->cr8 = kvm_get_cr8(vcpu);
9616 kvm_run->apic_base = kvm_get_apic_base(vcpu);
9617
9618 kvm_run->ready_for_interrupt_injection =
9619 pic_in_kernel(vcpu->kvm) ||
9620 kvm_vcpu_ready_for_interrupt_injection(vcpu);
9621
9622 if (is_smm(vcpu))
9623 kvm_run->flags |= KVM_RUN_X86_SMM;
9624 }
9625
9626 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
9627 {
9628 int max_irr, tpr;
9629
9630 if (!kvm_x86_ops.update_cr8_intercept)
9631 return;
9632
9633 if (!lapic_in_kernel(vcpu))
9634 return;
9635
9636 if (vcpu->arch.apic->apicv_active)
9637 return;
9638
9639 if (!vcpu->arch.apic->vapic_addr)
9640 max_irr = kvm_lapic_find_highest_irr(vcpu);
9641 else
9642 max_irr = -1;
9643
9644 if (max_irr != -1)
9645 max_irr >>= 4;
9646
9647 tpr = kvm_lapic_get_cr8(vcpu);
9648
9649 static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
9650 }
9651
9652
9653 int kvm_check_nested_events(struct kvm_vcpu *vcpu)
9654 {
9655 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9656 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9657 return 1;
9658 }
9659
9660 return kvm_x86_ops.nested_ops->check_events(vcpu);
9661 }
9662
9663 static void kvm_inject_exception(struct kvm_vcpu *vcpu)
9664 {
9665 trace_kvm_inj_exception(vcpu->arch.exception.nr,
9666 vcpu->arch.exception.has_error_code,
9667 vcpu->arch.exception.error_code,
9668 vcpu->arch.exception.injected);
9669
9670 if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
9671 vcpu->arch.exception.error_code = false;
9672 static_call(kvm_x86_queue_exception)(vcpu);
9673 }
9674
9675 static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
9676 {
9677 int r;
9678 bool can_inject = true;
9679
9680
9681
9682 if (vcpu->arch.exception.injected) {
9683 kvm_inject_exception(vcpu);
9684 can_inject = false;
9685 }
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700 else if (!vcpu->arch.exception.pending) {
9701 if (vcpu->arch.nmi_injected) {
9702 static_call(kvm_x86_inject_nmi)(vcpu);
9703 can_inject = false;
9704 } else if (vcpu->arch.interrupt.injected) {
9705 static_call(kvm_x86_inject_irq)(vcpu, true);
9706 can_inject = false;
9707 }
9708 }
9709
9710 WARN_ON_ONCE(vcpu->arch.exception.injected &&
9711 vcpu->arch.exception.pending);
9712
9713
9714
9715
9716
9717
9718
9719 if (is_guest_mode(vcpu)) {
9720 r = kvm_check_nested_events(vcpu);
9721 if (r < 0)
9722 goto out;
9723 }
9724
9725
9726 if (vcpu->arch.exception.pending) {
9727 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
9728 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
9729 X86_EFLAGS_RF);
9730
9731 if (vcpu->arch.exception.nr == DB_VECTOR) {
9732 kvm_deliver_exception_payload(vcpu);
9733 if (vcpu->arch.dr7 & DR7_GD) {
9734 vcpu->arch.dr7 &= ~DR7_GD;
9735 kvm_update_dr7(vcpu);
9736 }
9737 }
9738
9739 kvm_inject_exception(vcpu);
9740
9741 vcpu->arch.exception.pending = false;
9742 vcpu->arch.exception.injected = true;
9743
9744 can_inject = false;
9745 }
9746
9747
9748 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
9749 return 0;
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762 if (vcpu->arch.smi_pending) {
9763 r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
9764 if (r < 0)
9765 goto out;
9766 if (r) {
9767 vcpu->arch.smi_pending = false;
9768 ++vcpu->arch.smi_count;
9769 enter_smm(vcpu);
9770 can_inject = false;
9771 } else
9772 static_call(kvm_x86_enable_smi_window)(vcpu);
9773 }
9774
9775 if (vcpu->arch.nmi_pending) {
9776 r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
9777 if (r < 0)
9778 goto out;
9779 if (r) {
9780 --vcpu->arch.nmi_pending;
9781 vcpu->arch.nmi_injected = true;
9782 static_call(kvm_x86_inject_nmi)(vcpu);
9783 can_inject = false;
9784 WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
9785 }
9786 if (vcpu->arch.nmi_pending)
9787 static_call(kvm_x86_enable_nmi_window)(vcpu);
9788 }
9789
9790 if (kvm_cpu_has_injectable_intr(vcpu)) {
9791 r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
9792 if (r < 0)
9793 goto out;
9794 if (r) {
9795 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
9796 static_call(kvm_x86_inject_irq)(vcpu, false);
9797 WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
9798 }
9799 if (kvm_cpu_has_injectable_intr(vcpu))
9800 static_call(kvm_x86_enable_irq_window)(vcpu);
9801 }
9802
9803 if (is_guest_mode(vcpu) &&
9804 kvm_x86_ops.nested_ops->hv_timer_pending &&
9805 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
9806 *req_immediate_exit = true;
9807
9808 WARN_ON(vcpu->arch.exception.pending);
9809 return 0;
9810
9811 out:
9812 if (r == -EBUSY) {
9813 *req_immediate_exit = true;
9814 r = 0;
9815 }
9816 return r;
9817 }
9818
9819 static void process_nmi(struct kvm_vcpu *vcpu)
9820 {
9821 unsigned limit = 2;
9822
9823
9824
9825
9826
9827
9828 if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
9829 limit = 1;
9830
9831 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
9832 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
9833 kvm_make_request(KVM_REQ_EVENT, vcpu);
9834 }
9835
9836 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
9837 {
9838 u32 flags = 0;
9839 flags |= seg->g << 23;
9840 flags |= seg->db << 22;
9841 flags |= seg->l << 21;
9842 flags |= seg->avl << 20;
9843 flags |= seg->present << 15;
9844 flags |= seg->dpl << 13;
9845 flags |= seg->s << 12;
9846 flags |= seg->type << 8;
9847 return flags;
9848 }
9849
9850 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
9851 {
9852 struct kvm_segment seg;
9853 int offset;
9854
9855 kvm_get_segment(vcpu, &seg, n);
9856 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
9857
9858 if (n < 3)
9859 offset = 0x7f84 + n * 12;
9860 else
9861 offset = 0x7f2c + (n - 3) * 12;
9862
9863 put_smstate(u32, buf, offset + 8, seg.base);
9864 put_smstate(u32, buf, offset + 4, seg.limit);
9865 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
9866 }
9867
9868 #ifdef CONFIG_X86_64
9869 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
9870 {
9871 struct kvm_segment seg;
9872 int offset;
9873 u16 flags;
9874
9875 kvm_get_segment(vcpu, &seg, n);
9876 offset = 0x7e00 + n * 16;
9877
9878 flags = enter_smm_get_segment_flags(&seg) >> 8;
9879 put_smstate(u16, buf, offset, seg.selector);
9880 put_smstate(u16, buf, offset + 2, flags);
9881 put_smstate(u32, buf, offset + 4, seg.limit);
9882 put_smstate(u64, buf, offset + 8, seg.base);
9883 }
9884 #endif
9885
9886 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
9887 {
9888 struct desc_ptr dt;
9889 struct kvm_segment seg;
9890 unsigned long val;
9891 int i;
9892
9893 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
9894 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
9895 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
9896 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
9897
9898 for (i = 0; i < 8; i++)
9899 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
9900
9901 kvm_get_dr(vcpu, 6, &val);
9902 put_smstate(u32, buf, 0x7fcc, (u32)val);
9903 kvm_get_dr(vcpu, 7, &val);
9904 put_smstate(u32, buf, 0x7fc8, (u32)val);
9905
9906 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9907 put_smstate(u32, buf, 0x7fc4, seg.selector);
9908 put_smstate(u32, buf, 0x7f64, seg.base);
9909 put_smstate(u32, buf, 0x7f60, seg.limit);
9910 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
9911
9912 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9913 put_smstate(u32, buf, 0x7fc0, seg.selector);
9914 put_smstate(u32, buf, 0x7f80, seg.base);
9915 put_smstate(u32, buf, 0x7f7c, seg.limit);
9916 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
9917
9918 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9919 put_smstate(u32, buf, 0x7f74, dt.address);
9920 put_smstate(u32, buf, 0x7f70, dt.size);
9921
9922 static_call(kvm_x86_get_idt)(vcpu, &dt);
9923 put_smstate(u32, buf, 0x7f58, dt.address);
9924 put_smstate(u32, buf, 0x7f54, dt.size);
9925
9926 for (i = 0; i < 6; i++)
9927 enter_smm_save_seg_32(vcpu, buf, i);
9928
9929 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
9930
9931
9932 put_smstate(u32, buf, 0x7efc, 0x00020000);
9933 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
9934 }
9935
9936 #ifdef CONFIG_X86_64
9937 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
9938 {
9939 struct desc_ptr dt;
9940 struct kvm_segment seg;
9941 unsigned long val;
9942 int i;
9943
9944 for (i = 0; i < 16; i++)
9945 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
9946
9947 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
9948 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
9949
9950 kvm_get_dr(vcpu, 6, &val);
9951 put_smstate(u64, buf, 0x7f68, val);
9952 kvm_get_dr(vcpu, 7, &val);
9953 put_smstate(u64, buf, 0x7f60, val);
9954
9955 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
9956 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
9957 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
9958
9959 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
9960
9961
9962 put_smstate(u32, buf, 0x7efc, 0x00020064);
9963
9964 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
9965
9966 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9967 put_smstate(u16, buf, 0x7e90, seg.selector);
9968 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
9969 put_smstate(u32, buf, 0x7e94, seg.limit);
9970 put_smstate(u64, buf, 0x7e98, seg.base);
9971
9972 static_call(kvm_x86_get_idt)(vcpu, &dt);
9973 put_smstate(u32, buf, 0x7e84, dt.size);
9974 put_smstate(u64, buf, 0x7e88, dt.address);
9975
9976 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9977 put_smstate(u16, buf, 0x7e70, seg.selector);
9978 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
9979 put_smstate(u32, buf, 0x7e74, seg.limit);
9980 put_smstate(u64, buf, 0x7e78, seg.base);
9981
9982 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9983 put_smstate(u32, buf, 0x7e64, dt.size);
9984 put_smstate(u64, buf, 0x7e68, dt.address);
9985
9986 for (i = 0; i < 6; i++)
9987 enter_smm_save_seg_64(vcpu, buf, i);
9988 }
9989 #endif
9990
9991 static void enter_smm(struct kvm_vcpu *vcpu)
9992 {
9993 struct kvm_segment cs, ds;
9994 struct desc_ptr dt;
9995 unsigned long cr0;
9996 char buf[512];
9997
9998 memset(buf, 0, 512);
9999 #ifdef CONFIG_X86_64
10000 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
10001 enter_smm_save_state_64(vcpu, buf);
10002 else
10003 #endif
10004 enter_smm_save_state_32(vcpu, buf);
10005
10006
10007
10008
10009
10010
10011 static_call(kvm_x86_enter_smm)(vcpu, buf);
10012
10013 kvm_smm_changed(vcpu, true);
10014 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
10015
10016 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
10017 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
10018 else
10019 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
10020
10021 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
10022 kvm_rip_write(vcpu, 0x8000);
10023
10024 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
10025 static_call(kvm_x86_set_cr0)(vcpu, cr0);
10026 vcpu->arch.cr0 = cr0;
10027
10028 static_call(kvm_x86_set_cr4)(vcpu, 0);
10029
10030
10031 dt.address = dt.size = 0;
10032 static_call(kvm_x86_set_idt)(vcpu, &dt);
10033
10034 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
10035
10036 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
10037 cs.base = vcpu->arch.smbase;
10038
10039 ds.selector = 0;
10040 ds.base = 0;
10041
10042 cs.limit = ds.limit = 0xffffffff;
10043 cs.type = ds.type = 0x3;
10044 cs.dpl = ds.dpl = 0;
10045 cs.db = ds.db = 0;
10046 cs.s = ds.s = 1;
10047 cs.l = ds.l = 0;
10048 cs.g = ds.g = 1;
10049 cs.avl = ds.avl = 0;
10050 cs.present = ds.present = 1;
10051 cs.unusable = ds.unusable = 0;
10052 cs.padding = ds.padding = 0;
10053
10054 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
10055 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
10056 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
10057 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
10058 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
10059 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
10060
10061 #ifdef CONFIG_X86_64
10062 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
10063 static_call(kvm_x86_set_efer)(vcpu, 0);
10064 #endif
10065
10066 kvm_update_cpuid_runtime(vcpu);
10067 kvm_mmu_reset_context(vcpu);
10068 }
10069
10070 static void process_smi(struct kvm_vcpu *vcpu)
10071 {
10072 vcpu->arch.smi_pending = true;
10073 kvm_make_request(KVM_REQ_EVENT, vcpu);
10074 }
10075
10076 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
10077 unsigned long *vcpu_bitmap)
10078 {
10079 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, vcpu_bitmap);
10080 }
10081
10082 void kvm_make_scan_ioapic_request(struct kvm *kvm)
10083 {
10084 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
10085 }
10086
10087 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
10088 {
10089 struct kvm_lapic *apic = vcpu->arch.apic;
10090 bool activate;
10091
10092 if (!lapic_in_kernel(vcpu))
10093 return;
10094
10095 down_read(&vcpu->kvm->arch.apicv_update_lock);
10096 preempt_disable();
10097
10098
10099 activate = kvm_vcpu_apicv_activated(vcpu) &&
10100 (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);
10101
10102 if (apic->apicv_active == activate)
10103 goto out;
10104
10105 apic->apicv_active = activate;
10106 kvm_apic_update_apicv(vcpu);
10107 static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
10108
10109
10110
10111
10112
10113
10114
10115 if (!apic->apicv_active)
10116 kvm_make_request(KVM_REQ_EVENT, vcpu);
10117
10118 out:
10119 preempt_enable();
10120 up_read(&vcpu->kvm->arch.apicv_update_lock);
10121 }
10122 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
10123
10124 void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
10125 enum kvm_apicv_inhibit reason, bool set)
10126 {
10127 unsigned long old, new;
10128
10129 lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
10130
10131 if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason))
10132 return;
10133
10134 old = new = kvm->arch.apicv_inhibit_reasons;
10135
10136 set_or_clear_apicv_inhibit(&new, reason, set);
10137
10138 if (!!old != !!new) {
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151 kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
10152 kvm->arch.apicv_inhibit_reasons = new;
10153 if (new) {
10154 unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
10155 kvm_zap_gfn_range(kvm, gfn, gfn+1);
10156 }
10157 } else {
10158 kvm->arch.apicv_inhibit_reasons = new;
10159 }
10160 }
10161
10162 void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
10163 enum kvm_apicv_inhibit reason, bool set)
10164 {
10165 if (!enable_apicv)
10166 return;
10167
10168 down_write(&kvm->arch.apicv_update_lock);
10169 __kvm_set_or_clear_apicv_inhibit(kvm, reason, set);
10170 up_write(&kvm->arch.apicv_update_lock);
10171 }
10172 EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit);
10173
10174 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
10175 {
10176 if (!kvm_apic_present(vcpu))
10177 return;
10178
10179 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
10180
10181 if (irqchip_split(vcpu->kvm))
10182 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
10183 else {
10184 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
10185 if (ioapic_in_kernel(vcpu->kvm))
10186 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
10187 }
10188
10189 if (is_guest_mode(vcpu))
10190 vcpu->arch.load_eoi_exitmap_pending = true;
10191 else
10192 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
10193 }
10194
10195 static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
10196 {
10197 u64 eoi_exit_bitmap[4];
10198
10199 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
10200 return;
10201
10202 if (to_hv_vcpu(vcpu)) {
10203 bitmap_or((ulong *)eoi_exit_bitmap,
10204 vcpu->arch.ioapic_handled_vectors,
10205 to_hv_synic(vcpu)->vec_bitmap, 256);
10206 static_call_cond(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
10207 return;
10208 }
10209
10210 static_call_cond(kvm_x86_load_eoi_exitmap)(
10211 vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
10212 }
10213
10214 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
10215 unsigned long start, unsigned long end)
10216 {
10217 unsigned long apic_address;
10218
10219
10220
10221
10222
10223 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
10224 if (start <= apic_address && apic_address < end)
10225 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
10226 }
10227
10228 void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
10229 {
10230 static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
10231 }
10232
10233 static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
10234 {
10235 if (!lapic_in_kernel(vcpu))
10236 return;
10237
10238 static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
10239 }
10240
10241 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
10242 {
10243 smp_send_reschedule(vcpu->cpu);
10244 }
10245 EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
10246
10247
10248
10249
10250
10251
10252
10253 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
10254 {
10255 int r;
10256 bool req_int_win =
10257 dm_request_for_irq_injection(vcpu) &&
10258 kvm_cpu_accept_dm_intr(vcpu);
10259 fastpath_t exit_fastpath;
10260
10261 bool req_immediate_exit = false;
10262
10263
10264 if (unlikely(vcpu->kvm->dirty_ring_size &&
10265 kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
10266 vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
10267 trace_kvm_dirty_ring_exit(vcpu);
10268 r = 0;
10269 goto out;
10270 }
10271
10272 if (kvm_request_pending(vcpu)) {
10273 if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
10274 r = -EIO;
10275 goto out;
10276 }
10277 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
10278 if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
10279 r = 0;
10280 goto out;
10281 }
10282 }
10283 if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
10284 kvm_mmu_free_obsolete_roots(vcpu);
10285 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
10286 __kvm_migrate_timers(vcpu);
10287 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
10288 kvm_update_masterclock(vcpu->kvm);
10289 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
10290 kvm_gen_kvmclock_update(vcpu);
10291 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
10292 r = kvm_guest_time_update(vcpu);
10293 if (unlikely(r))
10294 goto out;
10295 }
10296 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
10297 kvm_mmu_sync_roots(vcpu);
10298 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
10299 kvm_mmu_load_pgd(vcpu);
10300 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
10301 kvm_vcpu_flush_tlb_all(vcpu);
10302
10303
10304 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
10305 }
10306 kvm_service_local_tlb_flush_requests(vcpu);
10307
10308 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
10309 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
10310 r = 0;
10311 goto out;
10312 }
10313 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
10314 if (is_guest_mode(vcpu)) {
10315 kvm_x86_ops.nested_ops->triple_fault(vcpu);
10316 } else {
10317 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
10318 vcpu->mmio_needed = 0;
10319 r = 0;
10320 goto out;
10321 }
10322 }
10323 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
10324
10325 vcpu->arch.apf.halted = true;
10326 r = 1;
10327 goto out;
10328 }
10329 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
10330 record_steal_time(vcpu);
10331 if (kvm_check_request(KVM_REQ_SMI, vcpu))
10332 process_smi(vcpu);
10333 if (kvm_check_request(KVM_REQ_NMI, vcpu))
10334 process_nmi(vcpu);
10335 if (kvm_check_request(KVM_REQ_PMU, vcpu))
10336 kvm_pmu_handle_event(vcpu);
10337 if (kvm_check_request(KVM_REQ_PMI, vcpu))
10338 kvm_pmu_deliver_pmi(vcpu);
10339 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
10340 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
10341 if (test_bit(vcpu->arch.pending_ioapic_eoi,
10342 vcpu->arch.ioapic_handled_vectors)) {
10343 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
10344 vcpu->run->eoi.vector =
10345 vcpu->arch.pending_ioapic_eoi;
10346 r = 0;
10347 goto out;
10348 }
10349 }
10350 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
10351 vcpu_scan_ioapic(vcpu);
10352 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
10353 vcpu_load_eoi_exitmap(vcpu);
10354 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
10355 kvm_vcpu_reload_apic_access_page(vcpu);
10356 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
10357 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
10358 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
10359 vcpu->run->system_event.ndata = 0;
10360 r = 0;
10361 goto out;
10362 }
10363 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
10364 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
10365 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
10366 vcpu->run->system_event.ndata = 0;
10367 r = 0;
10368 goto out;
10369 }
10370 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
10371 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
10372
10373 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
10374 vcpu->run->hyperv = hv_vcpu->exit;
10375 r = 0;
10376 goto out;
10377 }
10378
10379
10380
10381
10382
10383
10384 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
10385 kvm_hv_process_stimers(vcpu);
10386 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
10387 kvm_vcpu_update_apicv(vcpu);
10388 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
10389 kvm_check_async_pf_completion(vcpu);
10390 if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
10391 static_call(kvm_x86_msr_filter_changed)(vcpu);
10392
10393 if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
10394 static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
10395 }
10396
10397 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
10398 kvm_xen_has_interrupt(vcpu)) {
10399 ++vcpu->stat.req_event;
10400 r = kvm_apic_accept_events(vcpu);
10401 if (r < 0) {
10402 r = 0;
10403 goto out;
10404 }
10405 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
10406 r = 1;
10407 goto out;
10408 }
10409
10410 r = inject_pending_event(vcpu, &req_immediate_exit);
10411 if (r < 0) {
10412 r = 0;
10413 goto out;
10414 }
10415 if (req_int_win)
10416 static_call(kvm_x86_enable_irq_window)(vcpu);
10417
10418 if (kvm_lapic_enabled(vcpu)) {
10419 update_cr8_intercept(vcpu);
10420 kvm_lapic_sync_to_vapic(vcpu);
10421 }
10422 }
10423
10424 r = kvm_mmu_reload(vcpu);
10425 if (unlikely(r)) {
10426 goto cancel_injection;
10427 }
10428
10429 preempt_disable();
10430
10431 static_call(kvm_x86_prepare_switch_to_guest)(vcpu);
10432
10433
10434
10435
10436
10437
10438 local_irq_disable();
10439
10440
10441 smp_store_release(&vcpu->mode, IN_GUEST_MODE);
10442
10443 kvm_vcpu_srcu_read_unlock(vcpu);
10444
10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457 smp_mb__after_srcu_read_unlock();
10458
10459
10460
10461
10462
10463
10464
10465
10466 if (kvm_lapic_enabled(vcpu))
10467 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
10468
10469 if (kvm_vcpu_exit_request(vcpu)) {
10470 vcpu->mode = OUTSIDE_GUEST_MODE;
10471 smp_wmb();
10472 local_irq_enable();
10473 preempt_enable();
10474 kvm_vcpu_srcu_read_lock(vcpu);
10475 r = 1;
10476 goto cancel_injection;
10477 }
10478
10479 if (req_immediate_exit) {
10480 kvm_make_request(KVM_REQ_EVENT, vcpu);
10481 static_call(kvm_x86_request_immediate_exit)(vcpu);
10482 }
10483
10484 fpregs_assert_state_consistent();
10485 if (test_thread_flag(TIF_NEED_FPU_LOAD))
10486 switch_fpu_return();
10487
10488 if (vcpu->arch.guest_fpu.xfd_err)
10489 wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
10490
10491 if (unlikely(vcpu->arch.switch_db_regs)) {
10492 set_debugreg(0, 7);
10493 set_debugreg(vcpu->arch.eff_db[0], 0);
10494 set_debugreg(vcpu->arch.eff_db[1], 1);
10495 set_debugreg(vcpu->arch.eff_db[2], 2);
10496 set_debugreg(vcpu->arch.eff_db[3], 3);
10497 } else if (unlikely(hw_breakpoint_active())) {
10498 set_debugreg(0, 7);
10499 }
10500
10501 guest_timing_enter_irqoff();
10502
10503 for (;;) {
10504
10505
10506
10507
10508
10509
10510 WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
10511 (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
10512
10513 exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
10514 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
10515 break;
10516
10517 if (kvm_lapic_enabled(vcpu))
10518 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
10519
10520 if (unlikely(kvm_vcpu_exit_request(vcpu))) {
10521 exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
10522 break;
10523 }
10524 }
10525
10526
10527
10528
10529
10530
10531
10532 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
10533 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
10534 static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
10535 kvm_update_dr0123(vcpu);
10536 kvm_update_dr7(vcpu);
10537 }
10538
10539
10540
10541
10542
10543
10544
10545
10546 if (hw_breakpoint_active())
10547 hw_breakpoint_restore();
10548
10549 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
10550 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
10551
10552 vcpu->mode = OUTSIDE_GUEST_MODE;
10553 smp_wmb();
10554
10555
10556
10557
10558
10559
10560 if (vcpu->arch.xfd_no_write_intercept)
10561 fpu_sync_guest_vmexit_xfd_state();
10562
10563 static_call(kvm_x86_handle_exit_irqoff)(vcpu);
10564
10565 if (vcpu->arch.guest_fpu.xfd_err)
10566 wrmsrl(MSR_IA32_XFD_ERR, 0);
10567
10568
10569
10570
10571
10572
10573
10574
10575 kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
10576 local_irq_enable();
10577 ++vcpu->stat.exits;
10578 local_irq_disable();
10579 kvm_after_interrupt(vcpu);
10580
10581
10582
10583
10584
10585
10586
10587
10588 guest_timing_exit_irqoff();
10589
10590 local_irq_enable();
10591 preempt_enable();
10592
10593 kvm_vcpu_srcu_read_lock(vcpu);
10594
10595
10596
10597
10598 if (unlikely(prof_on == KVM_PROFILING)) {
10599 unsigned long rip = kvm_rip_read(vcpu);
10600 profile_hit(KVM_PROFILING, (void *)rip);
10601 }
10602
10603 if (unlikely(vcpu->arch.tsc_always_catchup))
10604 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
10605
10606 if (vcpu->arch.apic_attention)
10607 kvm_lapic_sync_from_vapic(vcpu);
10608
10609 r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
10610 return r;
10611
10612 cancel_injection:
10613 if (req_immediate_exit)
10614 kvm_make_request(KVM_REQ_EVENT, vcpu);
10615 static_call(kvm_x86_cancel_injection)(vcpu);
10616 if (unlikely(vcpu->arch.apic_attention))
10617 kvm_lapic_sync_from_vapic(vcpu);
10618 out:
10619 return r;
10620 }
10621
10622
10623 static inline int vcpu_block(struct kvm_vcpu *vcpu)
10624 {
10625 bool hv_timer;
10626
10627 if (!kvm_arch_vcpu_runnable(vcpu)) {
10628
10629
10630
10631
10632
10633
10634
10635 hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
10636 if (hv_timer)
10637 kvm_lapic_switch_to_sw_timer(vcpu);
10638
10639 kvm_vcpu_srcu_read_unlock(vcpu);
10640 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
10641 kvm_vcpu_halt(vcpu);
10642 else
10643 kvm_vcpu_block(vcpu);
10644 kvm_vcpu_srcu_read_lock(vcpu);
10645
10646 if (hv_timer)
10647 kvm_lapic_switch_to_hv_timer(vcpu);
10648
10649 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
10650 return 1;
10651 }
10652
10653 if (kvm_apic_accept_events(vcpu) < 0)
10654 return 0;
10655 switch(vcpu->arch.mp_state) {
10656 case KVM_MP_STATE_HALTED:
10657 case KVM_MP_STATE_AP_RESET_HOLD:
10658 vcpu->arch.pv.pv_unhalted = false;
10659 vcpu->arch.mp_state =
10660 KVM_MP_STATE_RUNNABLE;
10661 fallthrough;
10662 case KVM_MP_STATE_RUNNABLE:
10663 vcpu->arch.apf.halted = false;
10664 break;
10665 case KVM_MP_STATE_INIT_RECEIVED:
10666 break;
10667 default:
10668 WARN_ON_ONCE(1);
10669 break;
10670 }
10671 return 1;
10672 }
10673
10674 static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
10675 {
10676 if (is_guest_mode(vcpu))
10677 kvm_check_nested_events(vcpu);
10678
10679 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
10680 !vcpu->arch.apf.halted);
10681 }
10682
10683
10684 static int vcpu_run(struct kvm_vcpu *vcpu)
10685 {
10686 int r;
10687
10688 vcpu->arch.l1tf_flush_l1d = true;
10689
10690 for (;;) {
10691
10692
10693
10694
10695
10696
10697 vcpu->arch.at_instruction_boundary = false;
10698 if (kvm_vcpu_running(vcpu)) {
10699 r = vcpu_enter_guest(vcpu);
10700 } else {
10701 r = vcpu_block(vcpu);
10702 }
10703
10704 if (r <= 0)
10705 break;
10706
10707 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
10708 if (kvm_xen_has_pending_events(vcpu))
10709 kvm_xen_inject_pending_events(vcpu);
10710
10711 if (kvm_cpu_has_pending_timer(vcpu))
10712 kvm_inject_pending_timer_irqs(vcpu);
10713
10714 if (dm_request_for_irq_injection(vcpu) &&
10715 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
10716 r = 0;
10717 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
10718 ++vcpu->stat.request_irq_exits;
10719 break;
10720 }
10721
10722 if (__xfer_to_guest_mode_work_pending()) {
10723 kvm_vcpu_srcu_read_unlock(vcpu);
10724 r = xfer_to_guest_mode_handle_work(vcpu);
10725 kvm_vcpu_srcu_read_lock(vcpu);
10726 if (r)
10727 return r;
10728 }
10729 }
10730
10731 return r;
10732 }
10733
10734 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
10735 {
10736 return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
10737 }
10738
10739 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
10740 {
10741 BUG_ON(!vcpu->arch.pio.count);
10742
10743 return complete_emulated_io(vcpu);
10744 }
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764 static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
10765 {
10766 struct kvm_run *run = vcpu->run;
10767 struct kvm_mmio_fragment *frag;
10768 unsigned len;
10769
10770 BUG_ON(!vcpu->mmio_needed);
10771
10772
10773 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
10774 len = min(8u, frag->len);
10775 if (!vcpu->mmio_is_write)
10776 memcpy(frag->data, run->mmio.data, len);
10777
10778 if (frag->len <= 8) {
10779
10780 frag++;
10781 vcpu->mmio_cur_fragment++;
10782 } else {
10783
10784 frag->data += len;
10785 frag->gpa += len;
10786 frag->len -= len;
10787 }
10788
10789 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
10790 vcpu->mmio_needed = 0;
10791
10792
10793 if (vcpu->mmio_is_write)
10794 return 1;
10795 vcpu->mmio_read_completed = 1;
10796 return complete_emulated_io(vcpu);
10797 }
10798
10799 run->exit_reason = KVM_EXIT_MMIO;
10800 run->mmio.phys_addr = frag->gpa;
10801 if (vcpu->mmio_is_write)
10802 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
10803 run->mmio.len = min(8u, frag->len);
10804 run->mmio.is_write = vcpu->mmio_is_write;
10805 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
10806 return 0;
10807 }
10808
10809
10810 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
10811 {
10812
10813 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
10814 trace_kvm_fpu(1);
10815 }
10816
10817
10818 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
10819 {
10820 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
10821 ++vcpu->stat.fpu_reload;
10822 trace_kvm_fpu(0);
10823 }
10824
10825 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
10826 {
10827 struct kvm_run *kvm_run = vcpu->run;
10828 int r;
10829
10830 vcpu_load(vcpu);
10831 kvm_sigset_activate(vcpu);
10832 kvm_run->flags = 0;
10833 kvm_load_guest_fpu(vcpu);
10834
10835 kvm_vcpu_srcu_read_lock(vcpu);
10836 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
10837 if (kvm_run->immediate_exit) {
10838 r = -EINTR;
10839 goto out;
10840 }
10841
10842
10843
10844
10845 WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
10846
10847 kvm_vcpu_srcu_read_unlock(vcpu);
10848 kvm_vcpu_block(vcpu);
10849 kvm_vcpu_srcu_read_lock(vcpu);
10850
10851 if (kvm_apic_accept_events(vcpu) < 0) {
10852 r = 0;
10853 goto out;
10854 }
10855 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
10856 r = -EAGAIN;
10857 if (signal_pending(current)) {
10858 r = -EINTR;
10859 kvm_run->exit_reason = KVM_EXIT_INTR;
10860 ++vcpu->stat.signal_exits;
10861 }
10862 goto out;
10863 }
10864
10865 if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
10866 (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
10867 r = -EINVAL;
10868 goto out;
10869 }
10870
10871 if (kvm_run->kvm_dirty_regs) {
10872 r = sync_regs(vcpu);
10873 if (r != 0)
10874 goto out;
10875 }
10876
10877
10878 if (!lapic_in_kernel(vcpu)) {
10879 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
10880 r = -EINVAL;
10881 goto out;
10882 }
10883 }
10884
10885 if (unlikely(vcpu->arch.complete_userspace_io)) {
10886 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
10887 vcpu->arch.complete_userspace_io = NULL;
10888 r = cui(vcpu);
10889 if (r <= 0)
10890 goto out;
10891 } else {
10892 WARN_ON_ONCE(vcpu->arch.pio.count);
10893 WARN_ON_ONCE(vcpu->mmio_needed);
10894 }
10895
10896 if (kvm_run->immediate_exit) {
10897 r = -EINTR;
10898 goto out;
10899 }
10900
10901 r = static_call(kvm_x86_vcpu_pre_run)(vcpu);
10902 if (r <= 0)
10903 goto out;
10904
10905 r = vcpu_run(vcpu);
10906
10907 out:
10908 kvm_put_guest_fpu(vcpu);
10909 if (kvm_run->kvm_valid_regs)
10910 store_regs(vcpu);
10911 post_kvm_run_save(vcpu);
10912 kvm_vcpu_srcu_read_unlock(vcpu);
10913
10914 kvm_sigset_deactivate(vcpu);
10915 vcpu_put(vcpu);
10916 return r;
10917 }
10918
10919 static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10920 {
10921 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
10922
10923
10924
10925
10926
10927
10928
10929 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
10930 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10931 }
10932 regs->rax = kvm_rax_read(vcpu);
10933 regs->rbx = kvm_rbx_read(vcpu);
10934 regs->rcx = kvm_rcx_read(vcpu);
10935 regs->rdx = kvm_rdx_read(vcpu);
10936 regs->rsi = kvm_rsi_read(vcpu);
10937 regs->rdi = kvm_rdi_read(vcpu);
10938 regs->rsp = kvm_rsp_read(vcpu);
10939 regs->rbp = kvm_rbp_read(vcpu);
10940 #ifdef CONFIG_X86_64
10941 regs->r8 = kvm_r8_read(vcpu);
10942 regs->r9 = kvm_r9_read(vcpu);
10943 regs->r10 = kvm_r10_read(vcpu);
10944 regs->r11 = kvm_r11_read(vcpu);
10945 regs->r12 = kvm_r12_read(vcpu);
10946 regs->r13 = kvm_r13_read(vcpu);
10947 regs->r14 = kvm_r14_read(vcpu);
10948 regs->r15 = kvm_r15_read(vcpu);
10949 #endif
10950
10951 regs->rip = kvm_rip_read(vcpu);
10952 regs->rflags = kvm_get_rflags(vcpu);
10953 }
10954
10955 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10956 {
10957 vcpu_load(vcpu);
10958 __get_regs(vcpu, regs);
10959 vcpu_put(vcpu);
10960 return 0;
10961 }
10962
10963 static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10964 {
10965 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
10966 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10967
10968 kvm_rax_write(vcpu, regs->rax);
10969 kvm_rbx_write(vcpu, regs->rbx);
10970 kvm_rcx_write(vcpu, regs->rcx);
10971 kvm_rdx_write(vcpu, regs->rdx);
10972 kvm_rsi_write(vcpu, regs->rsi);
10973 kvm_rdi_write(vcpu, regs->rdi);
10974 kvm_rsp_write(vcpu, regs->rsp);
10975 kvm_rbp_write(vcpu, regs->rbp);
10976 #ifdef CONFIG_X86_64
10977 kvm_r8_write(vcpu, regs->r8);
10978 kvm_r9_write(vcpu, regs->r9);
10979 kvm_r10_write(vcpu, regs->r10);
10980 kvm_r11_write(vcpu, regs->r11);
10981 kvm_r12_write(vcpu, regs->r12);
10982 kvm_r13_write(vcpu, regs->r13);
10983 kvm_r14_write(vcpu, regs->r14);
10984 kvm_r15_write(vcpu, regs->r15);
10985 #endif
10986
10987 kvm_rip_write(vcpu, regs->rip);
10988 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
10989
10990 vcpu->arch.exception.pending = false;
10991
10992 kvm_make_request(KVM_REQ_EVENT, vcpu);
10993 }
10994
10995 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10996 {
10997 vcpu_load(vcpu);
10998 __set_regs(vcpu, regs);
10999 vcpu_put(vcpu);
11000 return 0;
11001 }
11002
11003 static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
11004 {
11005 struct desc_ptr dt;
11006
11007 if (vcpu->arch.guest_state_protected)
11008 goto skip_protected_regs;
11009
11010 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
11011 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
11012 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
11013 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
11014 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
11015 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
11016
11017 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
11018 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
11019
11020 static_call(kvm_x86_get_idt)(vcpu, &dt);
11021 sregs->idt.limit = dt.size;
11022 sregs->idt.base = dt.address;
11023 static_call(kvm_x86_get_gdt)(vcpu, &dt);
11024 sregs->gdt.limit = dt.size;
11025 sregs->gdt.base = dt.address;
11026
11027 sregs->cr2 = vcpu->arch.cr2;
11028 sregs->cr3 = kvm_read_cr3(vcpu);
11029
11030 skip_protected_regs:
11031 sregs->cr0 = kvm_read_cr0(vcpu);
11032 sregs->cr4 = kvm_read_cr4(vcpu);
11033 sregs->cr8 = kvm_get_cr8(vcpu);
11034 sregs->efer = vcpu->arch.efer;
11035 sregs->apic_base = kvm_get_apic_base(vcpu);
11036 }
11037
11038 static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
11039 {
11040 __get_sregs_common(vcpu, sregs);
11041
11042 if (vcpu->arch.guest_state_protected)
11043 return;
11044
11045 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
11046 set_bit(vcpu->arch.interrupt.nr,
11047 (unsigned long *)sregs->interrupt_bitmap);
11048 }
11049
11050 static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
11051 {
11052 int i;
11053
11054 __get_sregs_common(vcpu, (struct kvm_sregs *)sregs2);
11055
11056 if (vcpu->arch.guest_state_protected)
11057 return;
11058
11059 if (is_pae_paging(vcpu)) {
11060 for (i = 0 ; i < 4 ; i++)
11061 sregs2->pdptrs[i] = kvm_pdptr_read(vcpu, i);
11062 sregs2->flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
11063 }
11064 }
11065
11066 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
11067 struct kvm_sregs *sregs)
11068 {
11069 vcpu_load(vcpu);
11070 __get_sregs(vcpu, sregs);
11071 vcpu_put(vcpu);
11072 return 0;
11073 }
11074
11075 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
11076 struct kvm_mp_state *mp_state)
11077 {
11078 int r;
11079
11080 vcpu_load(vcpu);
11081 if (kvm_mpx_supported())
11082 kvm_load_guest_fpu(vcpu);
11083
11084 r = kvm_apic_accept_events(vcpu);
11085 if (r < 0)
11086 goto out;
11087 r = 0;
11088
11089 if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
11090 vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
11091 vcpu->arch.pv.pv_unhalted)
11092 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
11093 else
11094 mp_state->mp_state = vcpu->arch.mp_state;
11095
11096 out:
11097 if (kvm_mpx_supported())
11098 kvm_put_guest_fpu(vcpu);
11099 vcpu_put(vcpu);
11100 return r;
11101 }
11102
11103 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
11104 struct kvm_mp_state *mp_state)
11105 {
11106 int ret = -EINVAL;
11107
11108 vcpu_load(vcpu);
11109
11110 switch (mp_state->mp_state) {
11111 case KVM_MP_STATE_UNINITIALIZED:
11112 case KVM_MP_STATE_HALTED:
11113 case KVM_MP_STATE_AP_RESET_HOLD:
11114 case KVM_MP_STATE_INIT_RECEIVED:
11115 case KVM_MP_STATE_SIPI_RECEIVED:
11116 if (!lapic_in_kernel(vcpu))
11117 goto out;
11118 break;
11119
11120 case KVM_MP_STATE_RUNNABLE:
11121 break;
11122
11123 default:
11124 goto out;
11125 }
11126
11127
11128
11129
11130
11131
11132 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
11133 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
11134 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
11135 goto out;
11136
11137 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
11138 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
11139 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
11140 } else
11141 vcpu->arch.mp_state = mp_state->mp_state;
11142 kvm_make_request(KVM_REQ_EVENT, vcpu);
11143
11144 ret = 0;
11145 out:
11146 vcpu_put(vcpu);
11147 return ret;
11148 }
11149
11150 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
11151 int reason, bool has_error_code, u32 error_code)
11152 {
11153 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
11154 int ret;
11155
11156 init_emulate_ctxt(vcpu);
11157
11158 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
11159 has_error_code, error_code);
11160 if (ret) {
11161 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
11162 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
11163 vcpu->run->internal.ndata = 0;
11164 return 0;
11165 }
11166
11167 kvm_rip_write(vcpu, ctxt->eip);
11168 kvm_set_rflags(vcpu, ctxt->eflags);
11169 return 1;
11170 }
11171 EXPORT_SYMBOL_GPL(kvm_task_switch);
11172
11173 static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
11174 {
11175 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
11176
11177
11178
11179
11180
11181 if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
11182 return false;
11183 if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
11184 return false;
11185 } else {
11186
11187
11188
11189
11190 if (sregs->efer & EFER_LMA || sregs->cs.l)
11191 return false;
11192 }
11193
11194 return kvm_is_valid_cr4(vcpu, sregs->cr4);
11195 }
11196
11197 static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
11198 int *mmu_reset_needed, bool update_pdptrs)
11199 {
11200 struct msr_data apic_base_msr;
11201 int idx;
11202 struct desc_ptr dt;
11203
11204 if (!kvm_is_valid_sregs(vcpu, sregs))
11205 return -EINVAL;
11206
11207 apic_base_msr.data = sregs->apic_base;
11208 apic_base_msr.host_initiated = true;
11209 if (kvm_set_apic_base(vcpu, &apic_base_msr))
11210 return -EINVAL;
11211
11212 if (vcpu->arch.guest_state_protected)
11213 return 0;
11214
11215 dt.size = sregs->idt.limit;
11216 dt.address = sregs->idt.base;
11217 static_call(kvm_x86_set_idt)(vcpu, &dt);
11218 dt.size = sregs->gdt.limit;
11219 dt.address = sregs->gdt.base;
11220 static_call(kvm_x86_set_gdt)(vcpu, &dt);
11221
11222 vcpu->arch.cr2 = sregs->cr2;
11223 *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
11224 vcpu->arch.cr3 = sregs->cr3;
11225 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
11226 static_call_cond(kvm_x86_post_set_cr3)(vcpu, sregs->cr3);
11227
11228 kvm_set_cr8(vcpu, sregs->cr8);
11229
11230 *mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
11231 static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
11232
11233 *mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
11234 static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
11235 vcpu->arch.cr0 = sregs->cr0;
11236
11237 *mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
11238 static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
11239
11240 if (update_pdptrs) {
11241 idx = srcu_read_lock(&vcpu->kvm->srcu);
11242 if (is_pae_paging(vcpu)) {
11243 load_pdptrs(vcpu, kvm_read_cr3(vcpu));
11244 *mmu_reset_needed = 1;
11245 }
11246 srcu_read_unlock(&vcpu->kvm->srcu, idx);
11247 }
11248
11249 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
11250 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
11251 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
11252 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
11253 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
11254 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
11255
11256 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
11257 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
11258
11259 update_cr8_intercept(vcpu);
11260
11261
11262 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
11263 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
11264 !is_protmode(vcpu))
11265 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
11266
11267 return 0;
11268 }
11269
11270 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
11271 {
11272 int pending_vec, max_bits;
11273 int mmu_reset_needed = 0;
11274 int ret = __set_sregs_common(vcpu, sregs, &mmu_reset_needed, true);
11275
11276 if (ret)
11277 return ret;
11278
11279 if (mmu_reset_needed)
11280 kvm_mmu_reset_context(vcpu);
11281
11282 max_bits = KVM_NR_INTERRUPTS;
11283 pending_vec = find_first_bit(
11284 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
11285
11286 if (pending_vec < max_bits) {
11287 kvm_queue_interrupt(vcpu, pending_vec, false);
11288 pr_debug("Set back pending irq %d\n", pending_vec);
11289 kvm_make_request(KVM_REQ_EVENT, vcpu);
11290 }
11291 return 0;
11292 }
11293
11294 static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
11295 {
11296 int mmu_reset_needed = 0;
11297 bool valid_pdptrs = sregs2->flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
11298 bool pae = (sregs2->cr0 & X86_CR0_PG) && (sregs2->cr4 & X86_CR4_PAE) &&
11299 !(sregs2->efer & EFER_LMA);
11300 int i, ret;
11301
11302 if (sregs2->flags & ~KVM_SREGS2_FLAGS_PDPTRS_VALID)
11303 return -EINVAL;
11304
11305 if (valid_pdptrs && (!pae || vcpu->arch.guest_state_protected))
11306 return -EINVAL;
11307
11308 ret = __set_sregs_common(vcpu, (struct kvm_sregs *)sregs2,
11309 &mmu_reset_needed, !valid_pdptrs);
11310 if (ret)
11311 return ret;
11312
11313 if (valid_pdptrs) {
11314 for (i = 0; i < 4 ; i++)
11315 kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
11316
11317 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
11318 mmu_reset_needed = 1;
11319 vcpu->arch.pdptrs_from_userspace = true;
11320 }
11321 if (mmu_reset_needed)
11322 kvm_mmu_reset_context(vcpu);
11323 return 0;
11324 }
11325
11326 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
11327 struct kvm_sregs *sregs)
11328 {
11329 int ret;
11330
11331 vcpu_load(vcpu);
11332 ret = __set_sregs(vcpu, sregs);
11333 vcpu_put(vcpu);
11334 return ret;
11335 }
11336
11337 static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
11338 {
11339 bool set = false;
11340 struct kvm_vcpu *vcpu;
11341 unsigned long i;
11342
11343 if (!enable_apicv)
11344 return;
11345
11346 down_write(&kvm->arch.apicv_update_lock);
11347
11348 kvm_for_each_vcpu(i, vcpu, kvm) {
11349 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
11350 set = true;
11351 break;
11352 }
11353 }
11354 __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_BLOCKIRQ, set);
11355 up_write(&kvm->arch.apicv_update_lock);
11356 }
11357
11358 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
11359 struct kvm_guest_debug *dbg)
11360 {
11361 unsigned long rflags;
11362 int i, r;
11363
11364 if (vcpu->arch.guest_state_protected)
11365 return -EINVAL;
11366
11367 vcpu_load(vcpu);
11368
11369 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
11370 r = -EBUSY;
11371 if (vcpu->arch.exception.pending)
11372 goto out;
11373 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
11374 kvm_queue_exception(vcpu, DB_VECTOR);
11375 else
11376 kvm_queue_exception(vcpu, BP_VECTOR);
11377 }
11378
11379
11380
11381
11382
11383 rflags = kvm_get_rflags(vcpu);
11384
11385 vcpu->guest_debug = dbg->control;
11386 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
11387 vcpu->guest_debug = 0;
11388
11389 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
11390 for (i = 0; i < KVM_NR_DB_REGS; ++i)
11391 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
11392 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
11393 } else {
11394 for (i = 0; i < KVM_NR_DB_REGS; i++)
11395 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
11396 }
11397 kvm_update_dr7(vcpu);
11398
11399 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
11400 vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
11401
11402
11403
11404
11405
11406 kvm_set_rflags(vcpu, rflags);
11407
11408 static_call(kvm_x86_update_exception_bitmap)(vcpu);
11409
11410 kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
11411
11412 r = 0;
11413
11414 out:
11415 vcpu_put(vcpu);
11416 return r;
11417 }
11418
11419
11420
11421
11422 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
11423 struct kvm_translation *tr)
11424 {
11425 unsigned long vaddr = tr->linear_address;
11426 gpa_t gpa;
11427 int idx;
11428
11429 vcpu_load(vcpu);
11430
11431 idx = srcu_read_lock(&vcpu->kvm->srcu);
11432 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
11433 srcu_read_unlock(&vcpu->kvm->srcu, idx);
11434 tr->physical_address = gpa;
11435 tr->valid = gpa != INVALID_GPA;
11436 tr->writeable = 1;
11437 tr->usermode = 0;
11438
11439 vcpu_put(vcpu);
11440 return 0;
11441 }
11442
11443 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
11444 {
11445 struct fxregs_state *fxsave;
11446
11447 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
11448 return 0;
11449
11450 vcpu_load(vcpu);
11451
11452 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
11453 memcpy(fpu->fpr, fxsave->st_space, 128);
11454 fpu->fcw = fxsave->cwd;
11455 fpu->fsw = fxsave->swd;
11456 fpu->ftwx = fxsave->twd;
11457 fpu->last_opcode = fxsave->fop;
11458 fpu->last_ip = fxsave->rip;
11459 fpu->last_dp = fxsave->rdp;
11460 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
11461
11462 vcpu_put(vcpu);
11463 return 0;
11464 }
11465
11466 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
11467 {
11468 struct fxregs_state *fxsave;
11469
11470 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
11471 return 0;
11472
11473 vcpu_load(vcpu);
11474
11475 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
11476
11477 memcpy(fxsave->st_space, fpu->fpr, 128);
11478 fxsave->cwd = fpu->fcw;
11479 fxsave->swd = fpu->fsw;
11480 fxsave->twd = fpu->ftwx;
11481 fxsave->fop = fpu->last_opcode;
11482 fxsave->rip = fpu->last_ip;
11483 fxsave->rdp = fpu->last_dp;
11484 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
11485
11486 vcpu_put(vcpu);
11487 return 0;
11488 }
11489
11490 static void store_regs(struct kvm_vcpu *vcpu)
11491 {
11492 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
11493
11494 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
11495 __get_regs(vcpu, &vcpu->run->s.regs.regs);
11496
11497 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
11498 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
11499
11500 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
11501 kvm_vcpu_ioctl_x86_get_vcpu_events(
11502 vcpu, &vcpu->run->s.regs.events);
11503 }
11504
11505 static int sync_regs(struct kvm_vcpu *vcpu)
11506 {
11507 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
11508 __set_regs(vcpu, &vcpu->run->s.regs.regs);
11509 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
11510 }
11511 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
11512 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
11513 return -EINVAL;
11514 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
11515 }
11516 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
11517 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
11518 vcpu, &vcpu->run->s.regs.events))
11519 return -EINVAL;
11520 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
11521 }
11522
11523 return 0;
11524 }
11525
11526 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
11527 {
11528 if (kvm_check_tsc_unstable() && kvm->created_vcpus)
11529 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
11530 "guest TSC will not be reliable\n");
11531
11532 if (!kvm->arch.max_vcpu_ids)
11533 kvm->arch.max_vcpu_ids = KVM_MAX_VCPU_IDS;
11534
11535 if (id >= kvm->arch.max_vcpu_ids)
11536 return -EINVAL;
11537
11538 return static_call(kvm_x86_vcpu_precreate)(kvm);
11539 }
11540
11541 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
11542 {
11543 struct page *page;
11544 int r;
11545
11546 vcpu->arch.last_vmentry_cpu = -1;
11547 vcpu->arch.regs_avail = ~0;
11548 vcpu->arch.regs_dirty = ~0;
11549
11550 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
11551 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
11552 else
11553 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
11554
11555 r = kvm_mmu_create(vcpu);
11556 if (r < 0)
11557 return r;
11558
11559 if (irqchip_in_kernel(vcpu->kvm)) {
11560 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
11561 if (r < 0)
11562 goto fail_mmu_destroy;
11563
11564
11565
11566
11567
11568
11569
11570
11571
11572
11573
11574 if (enable_apicv) {
11575 vcpu->arch.apic->apicv_active = true;
11576 kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
11577 }
11578 } else
11579 static_branch_inc(&kvm_has_noapic_vcpu);
11580
11581 r = -ENOMEM;
11582
11583 page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
11584 if (!page)
11585 goto fail_free_lapic;
11586 vcpu->arch.pio_data = page_address(page);
11587
11588 vcpu->arch.mce_banks = kcalloc(KVM_MAX_MCE_BANKS * 4, sizeof(u64),
11589 GFP_KERNEL_ACCOUNT);
11590 vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
11591 GFP_KERNEL_ACCOUNT);
11592 if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
11593 goto fail_free_mce_banks;
11594 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
11595
11596 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
11597 GFP_KERNEL_ACCOUNT))
11598 goto fail_free_mce_banks;
11599
11600 if (!alloc_emulate_ctxt(vcpu))
11601 goto free_wbinvd_dirty_mask;
11602
11603 if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
11604 pr_err("kvm: failed to allocate vcpu's fpu\n");
11605 goto free_emulate_ctxt;
11606 }
11607
11608 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
11609 vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
11610
11611 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
11612
11613 kvm_async_pf_hash_reset(vcpu);
11614 kvm_pmu_init(vcpu);
11615
11616 vcpu->arch.pending_external_vector = -1;
11617 vcpu->arch.preempted_in_kernel = false;
11618
11619 #if IS_ENABLED(CONFIG_HYPERV)
11620 vcpu->arch.hv_root_tdp = INVALID_PAGE;
11621 #endif
11622
11623 r = static_call(kvm_x86_vcpu_create)(vcpu);
11624 if (r)
11625 goto free_guest_fpu;
11626
11627 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
11628 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
11629 kvm_xen_init_vcpu(vcpu);
11630 kvm_vcpu_mtrr_init(vcpu);
11631 vcpu_load(vcpu);
11632 kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
11633 kvm_vcpu_reset(vcpu, false);
11634 kvm_init_mmu(vcpu);
11635 vcpu_put(vcpu);
11636 return 0;
11637
11638 free_guest_fpu:
11639 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
11640 free_emulate_ctxt:
11641 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
11642 free_wbinvd_dirty_mask:
11643 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
11644 fail_free_mce_banks:
11645 kfree(vcpu->arch.mce_banks);
11646 kfree(vcpu->arch.mci_ctl2_banks);
11647 free_page((unsigned long)vcpu->arch.pio_data);
11648 fail_free_lapic:
11649 kvm_free_lapic(vcpu);
11650 fail_mmu_destroy:
11651 kvm_mmu_destroy(vcpu);
11652 return r;
11653 }
11654
11655 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
11656 {
11657 struct kvm *kvm = vcpu->kvm;
11658
11659 if (mutex_lock_killable(&vcpu->mutex))
11660 return;
11661 vcpu_load(vcpu);
11662 kvm_synchronize_tsc(vcpu, 0);
11663 vcpu_put(vcpu);
11664
11665
11666 vcpu->arch.msr_kvm_poll_control = 1;
11667
11668 mutex_unlock(&vcpu->mutex);
11669
11670 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
11671 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
11672 KVMCLOCK_SYNC_PERIOD);
11673 }
11674
11675 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
11676 {
11677 int idx;
11678
11679 kvmclock_reset(vcpu);
11680
11681 static_call(kvm_x86_vcpu_free)(vcpu);
11682
11683 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
11684 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
11685 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
11686
11687 kvm_xen_destroy_vcpu(vcpu);
11688 kvm_hv_vcpu_uninit(vcpu);
11689 kvm_pmu_destroy(vcpu);
11690 kfree(vcpu->arch.mce_banks);
11691 kfree(vcpu->arch.mci_ctl2_banks);
11692 kvm_free_lapic(vcpu);
11693 idx = srcu_read_lock(&vcpu->kvm->srcu);
11694 kvm_mmu_destroy(vcpu);
11695 srcu_read_unlock(&vcpu->kvm->srcu, idx);
11696 free_page((unsigned long)vcpu->arch.pio_data);
11697 kvfree(vcpu->arch.cpuid_entries);
11698 if (!lapic_in_kernel(vcpu))
11699 static_branch_dec(&kvm_has_noapic_vcpu);
11700 }
11701
11702 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
11703 {
11704 struct kvm_cpuid_entry2 *cpuid_0x1;
11705 unsigned long old_cr0 = kvm_read_cr0(vcpu);
11706 unsigned long new_cr0;
11707
11708
11709
11710
11711
11712
11713
11714
11715 WARN_ON_ONCE(!init_event &&
11716 (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
11717
11718 kvm_lapic_reset(vcpu, init_event);
11719
11720 vcpu->arch.hflags = 0;
11721
11722 vcpu->arch.smi_pending = 0;
11723 vcpu->arch.smi_count = 0;
11724 atomic_set(&vcpu->arch.nmi_queued, 0);
11725 vcpu->arch.nmi_pending = 0;
11726 vcpu->arch.nmi_injected = false;
11727 kvm_clear_interrupt_queue(vcpu);
11728 kvm_clear_exception_queue(vcpu);
11729
11730 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
11731 kvm_update_dr0123(vcpu);
11732 vcpu->arch.dr6 = DR6_ACTIVE_LOW;
11733 vcpu->arch.dr7 = DR7_FIXED_1;
11734 kvm_update_dr7(vcpu);
11735
11736 vcpu->arch.cr2 = 0;
11737
11738 kvm_make_request(KVM_REQ_EVENT, vcpu);
11739 vcpu->arch.apf.msr_en_val = 0;
11740 vcpu->arch.apf.msr_int_val = 0;
11741 vcpu->arch.st.msr_val = 0;
11742
11743 kvmclock_reset(vcpu);
11744
11745 kvm_clear_async_pf_completion_queue(vcpu);
11746 kvm_async_pf_hash_reset(vcpu);
11747 vcpu->arch.apf.halted = false;
11748
11749 if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
11750 struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
11751
11752
11753
11754
11755
11756 if (init_event)
11757 kvm_put_guest_fpu(vcpu);
11758
11759 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
11760 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
11761
11762 if (init_event)
11763 kvm_load_guest_fpu(vcpu);
11764 }
11765
11766 if (!init_event) {
11767 kvm_pmu_reset(vcpu);
11768 vcpu->arch.smbase = 0x30000;
11769
11770 vcpu->arch.msr_misc_features_enables = 0;
11771 vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
11772 MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
11773
11774 __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
11775 __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
11776 }
11777
11778
11779 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
11780 kvm_register_mark_dirty(vcpu, VCPU_REGS_RSP);
11781
11782
11783
11784
11785
11786
11787
11788
11789 cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1);
11790 kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
11791
11792 static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
11793
11794 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
11795 kvm_rip_write(vcpu, 0xfff0);
11796
11797 vcpu->arch.cr3 = 0;
11798 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
11799
11800
11801
11802
11803
11804
11805 new_cr0 = X86_CR0_ET;
11806 if (init_event)
11807 new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
11808 else
11809 new_cr0 |= X86_CR0_NW | X86_CR0_CD;
11810
11811 static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
11812 static_call(kvm_x86_set_cr4)(vcpu, 0);
11813 static_call(kvm_x86_set_efer)(vcpu, 0);
11814 static_call(kvm_x86_update_exception_bitmap)(vcpu);
11815
11816
11817
11818
11819
11820
11821
11822
11823
11824 if (old_cr0 & X86_CR0_PG) {
11825 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
11826 kvm_mmu_reset_context(vcpu);
11827 }
11828
11829
11830
11831
11832
11833
11834
11835
11836
11837
11838 if (init_event)
11839 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
11840 }
11841 EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
11842
11843 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
11844 {
11845 struct kvm_segment cs;
11846
11847 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
11848 cs.selector = vector << 8;
11849 cs.base = vector << 12;
11850 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
11851 kvm_rip_write(vcpu, 0);
11852 }
11853 EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
11854
11855 int kvm_arch_hardware_enable(void)
11856 {
11857 struct kvm *kvm;
11858 struct kvm_vcpu *vcpu;
11859 unsigned long i;
11860 int ret;
11861 u64 local_tsc;
11862 u64 max_tsc = 0;
11863 bool stable, backwards_tsc = false;
11864
11865 kvm_user_return_msr_cpu_online();
11866 ret = static_call(kvm_x86_hardware_enable)();
11867 if (ret != 0)
11868 return ret;
11869
11870 local_tsc = rdtsc();
11871 stable = !kvm_check_tsc_unstable();
11872 list_for_each_entry(kvm, &vm_list, vm_list) {
11873 kvm_for_each_vcpu(i, vcpu, kvm) {
11874 if (!stable && vcpu->cpu == smp_processor_id())
11875 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
11876 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
11877 backwards_tsc = true;
11878 if (vcpu->arch.last_host_tsc > max_tsc)
11879 max_tsc = vcpu->arch.last_host_tsc;
11880 }
11881 }
11882 }
11883
11884
11885
11886
11887
11888
11889
11890
11891
11892
11893
11894
11895
11896
11897
11898
11899
11900
11901
11902
11903
11904
11905
11906
11907
11908
11909
11910
11911
11912
11913
11914
11915
11916
11917
11918
11919
11920
11921
11922 if (backwards_tsc) {
11923 u64 delta_cyc = max_tsc - local_tsc;
11924 list_for_each_entry(kvm, &vm_list, vm_list) {
11925 kvm->arch.backwards_tsc_observed = true;
11926 kvm_for_each_vcpu(i, vcpu, kvm) {
11927 vcpu->arch.tsc_offset_adjustment += delta_cyc;
11928 vcpu->arch.last_host_tsc = local_tsc;
11929 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
11930 }
11931
11932
11933
11934
11935
11936
11937
11938 kvm->arch.last_tsc_nsec = 0;
11939 kvm->arch.last_tsc_write = 0;
11940 }
11941
11942 }
11943 return 0;
11944 }
11945
11946 void kvm_arch_hardware_disable(void)
11947 {
11948 static_call(kvm_x86_hardware_disable)();
11949 drop_user_return_notifiers();
11950 }
11951
11952 static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
11953 {
11954 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
11955
11956 #define __KVM_X86_OP(func) \
11957 static_call_update(kvm_x86_##func, kvm_x86_ops.func);
11958 #define KVM_X86_OP(func) \
11959 WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
11960 #define KVM_X86_OP_OPTIONAL __KVM_X86_OP
11961 #define KVM_X86_OP_OPTIONAL_RET0(func) \
11962 static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
11963 (void *)__static_call_return0);
11964 #include <asm/kvm-x86-ops.h>
11965 #undef __KVM_X86_OP
11966
11967 kvm_pmu_ops_update(ops->pmu_ops);
11968 }
11969
11970 int kvm_arch_hardware_setup(void *opaque)
11971 {
11972 struct kvm_x86_init_ops *ops = opaque;
11973 int r;
11974
11975 rdmsrl_safe(MSR_EFER, &host_efer);
11976
11977 if (boot_cpu_has(X86_FEATURE_XSAVES))
11978 rdmsrl(MSR_IA32_XSS, host_xss);
11979
11980 kvm_init_pmu_capability();
11981
11982 r = ops->hardware_setup();
11983 if (r != 0)
11984 return r;
11985
11986 kvm_ops_update(ops);
11987
11988 kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
11989
11990 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
11991 kvm_caps.supported_xss = 0;
11992
11993 #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
11994 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
11995 #undef __kvm_cpu_cap_has
11996
11997 if (kvm_caps.has_tsc_control) {
11998
11999
12000
12001
12002
12003
12004 u64 max = min(0x7fffffffULL,
12005 __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz));
12006 kvm_caps.max_guest_tsc_khz = max;
12007 }
12008 kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
12009 kvm_init_msr_list();
12010 return 0;
12011 }
12012
12013 void kvm_arch_hardware_unsetup(void)
12014 {
12015 kvm_unregister_perf_callbacks();
12016
12017 static_call(kvm_x86_hardware_unsetup)();
12018 }
12019
12020 int kvm_arch_check_processor_compat(void *opaque)
12021 {
12022 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
12023 struct kvm_x86_init_ops *ops = opaque;
12024
12025 WARN_ON(!irqs_disabled());
12026
12027 if (__cr4_reserved_bits(cpu_has, c) !=
12028 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
12029 return -EIO;
12030
12031 return ops->check_processor_compatibility();
12032 }
12033
12034 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
12035 {
12036 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
12037 }
12038 EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
12039
12040 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
12041 {
12042 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
12043 }
12044
12045 __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
12046 EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
12047
12048 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
12049 {
12050 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
12051
12052 vcpu->arch.l1tf_flush_l1d = true;
12053 if (pmu->version && unlikely(pmu->event_count)) {
12054 pmu->need_cleanup = true;
12055 kvm_make_request(KVM_REQ_PMU, vcpu);
12056 }
12057 static_call(kvm_x86_sched_in)(vcpu, cpu);
12058 }
12059
12060 void kvm_arch_free_vm(struct kvm *kvm)
12061 {
12062 kfree(to_kvm_hv(kvm)->hv_pa_pg);
12063 __kvm_arch_free_vm(kvm);
12064 }
12065
12066
12067 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
12068 {
12069 int ret;
12070 unsigned long flags;
12071
12072 if (type)
12073 return -EINVAL;
12074
12075 ret = kvm_page_track_init(kvm);
12076 if (ret)
12077 goto out;
12078
12079 ret = kvm_mmu_init_vm(kvm);
12080 if (ret)
12081 goto out_page_track;
12082
12083 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
12084 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
12085 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
12086
12087
12088 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
12089
12090 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
12091 &kvm->arch.irq_sources_bitmap);
12092
12093 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
12094 mutex_init(&kvm->arch.apic_map_lock);
12095 seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
12096 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
12097
12098 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
12099 pvclock_update_vm_gtod_copy(kvm);
12100 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
12101
12102 kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
12103 kvm->arch.guest_can_read_msr_platform_info = true;
12104 kvm->arch.enable_pmu = enable_pmu;
12105
12106 #if IS_ENABLED(CONFIG_HYPERV)
12107 spin_lock_init(&kvm->arch.hv_root_tdp_lock);
12108 kvm->arch.hv_root_tdp = INVALID_PAGE;
12109 #endif
12110
12111 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
12112 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
12113
12114 kvm_apicv_init(kvm);
12115 kvm_hv_init_vm(kvm);
12116 kvm_xen_init_vm(kvm);
12117
12118 return static_call(kvm_x86_vm_init)(kvm);
12119
12120 out_page_track:
12121 kvm_page_track_cleanup(kvm);
12122 out:
12123 return ret;
12124 }
12125
12126 int kvm_arch_post_init_vm(struct kvm *kvm)
12127 {
12128 return kvm_mmu_post_init_vm(kvm);
12129 }
12130
12131 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
12132 {
12133 vcpu_load(vcpu);
12134 kvm_mmu_unload(vcpu);
12135 vcpu_put(vcpu);
12136 }
12137
12138 static void kvm_unload_vcpu_mmus(struct kvm *kvm)
12139 {
12140 unsigned long i;
12141 struct kvm_vcpu *vcpu;
12142
12143 kvm_for_each_vcpu(i, vcpu, kvm) {
12144 kvm_clear_async_pf_completion_queue(vcpu);
12145 kvm_unload_vcpu_mmu(vcpu);
12146 }
12147 }
12148
12149 void kvm_arch_sync_events(struct kvm *kvm)
12150 {
12151 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
12152 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
12153 kvm_free_pit(kvm);
12154 }
12155
12156
12157
12158
12159
12160
12161
12162
12163
12164
12165
12166
12167
12168
12169
12170
12171
12172
12173
12174
12175
12176
12177
12178 void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
12179 u32 size)
12180 {
12181 int i, r;
12182 unsigned long hva, old_npages;
12183 struct kvm_memslots *slots = kvm_memslots(kvm);
12184 struct kvm_memory_slot *slot;
12185
12186
12187 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
12188 return ERR_PTR_USR(-EINVAL);
12189
12190 slot = id_to_memslot(slots, id);
12191 if (size) {
12192 if (slot && slot->npages)
12193 return ERR_PTR_USR(-EEXIST);
12194
12195
12196
12197
12198
12199 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
12200 MAP_SHARED | MAP_ANONYMOUS, 0);
12201 if (IS_ERR((void *)hva))
12202 return (void __user *)hva;
12203 } else {
12204 if (!slot || !slot->npages)
12205 return NULL;
12206
12207 old_npages = slot->npages;
12208 hva = slot->userspace_addr;
12209 }
12210
12211 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
12212 struct kvm_userspace_memory_region m;
12213
12214 m.slot = id | (i << 16);
12215 m.flags = 0;
12216 m.guest_phys_addr = gpa;
12217 m.userspace_addr = hva;
12218 m.memory_size = size;
12219 r = __kvm_set_memory_region(kvm, &m);
12220 if (r < 0)
12221 return ERR_PTR_USR(r);
12222 }
12223
12224 if (!size)
12225 vm_munmap(hva, old_npages * PAGE_SIZE);
12226
12227 return (void __user *)hva;
12228 }
12229 EXPORT_SYMBOL_GPL(__x86_set_memory_region);
12230
12231 void kvm_arch_pre_destroy_vm(struct kvm *kvm)
12232 {
12233 kvm_mmu_pre_destroy_vm(kvm);
12234 }
12235
12236 void kvm_arch_destroy_vm(struct kvm *kvm)
12237 {
12238 if (current->mm == kvm->mm) {
12239
12240
12241
12242
12243
12244 mutex_lock(&kvm->slots_lock);
12245 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
12246 0, 0);
12247 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
12248 0, 0);
12249 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
12250 mutex_unlock(&kvm->slots_lock);
12251 }
12252 kvm_unload_vcpu_mmus(kvm);
12253 static_call_cond(kvm_x86_vm_destroy)(kvm);
12254 kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
12255 kvm_pic_destroy(kvm);
12256 kvm_ioapic_destroy(kvm);
12257 kvm_destroy_vcpus(kvm);
12258 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
12259 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
12260 kvm_mmu_uninit_vm(kvm);
12261 kvm_page_track_cleanup(kvm);
12262 kvm_xen_destroy_vm(kvm);
12263 kvm_hv_destroy_vm(kvm);
12264 }
12265
12266 static void memslot_rmap_free(struct kvm_memory_slot *slot)
12267 {
12268 int i;
12269
12270 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
12271 kvfree(slot->arch.rmap[i]);
12272 slot->arch.rmap[i] = NULL;
12273 }
12274 }
12275
12276 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
12277 {
12278 int i;
12279
12280 memslot_rmap_free(slot);
12281
12282 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
12283 kvfree(slot->arch.lpage_info[i - 1]);
12284 slot->arch.lpage_info[i - 1] = NULL;
12285 }
12286
12287 kvm_page_track_free_memslot(slot);
12288 }
12289
12290 int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
12291 {
12292 const int sz = sizeof(*slot->arch.rmap[0]);
12293 int i;
12294
12295 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
12296 int level = i + 1;
12297 int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
12298
12299 if (slot->arch.rmap[i])
12300 continue;
12301
12302 slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
12303 if (!slot->arch.rmap[i]) {
12304 memslot_rmap_free(slot);
12305 return -ENOMEM;
12306 }
12307 }
12308
12309 return 0;
12310 }
12311
12312 static int kvm_alloc_memslot_metadata(struct kvm *kvm,
12313 struct kvm_memory_slot *slot)
12314 {
12315 unsigned long npages = slot->npages;
12316 int i, r;
12317
12318
12319
12320
12321
12322
12323 memset(&slot->arch, 0, sizeof(slot->arch));
12324
12325 if (kvm_memslots_have_rmaps(kvm)) {
12326 r = memslot_rmap_alloc(slot, npages);
12327 if (r)
12328 return r;
12329 }
12330
12331 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
12332 struct kvm_lpage_info *linfo;
12333 unsigned long ugfn;
12334 int lpages;
12335 int level = i + 1;
12336
12337 lpages = __kvm_mmu_slot_lpages(slot, npages, level);
12338
12339 linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
12340 if (!linfo)
12341 goto out_free;
12342
12343 slot->arch.lpage_info[i - 1] = linfo;
12344
12345 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
12346 linfo[0].disallow_lpage = 1;
12347 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
12348 linfo[lpages - 1].disallow_lpage = 1;
12349 ugfn = slot->userspace_addr >> PAGE_SHIFT;
12350
12351
12352
12353
12354 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
12355 unsigned long j;
12356
12357 for (j = 0; j < lpages; ++j)
12358 linfo[j].disallow_lpage = 1;
12359 }
12360 }
12361
12362 if (kvm_page_track_create_memslot(kvm, slot, npages))
12363 goto out_free;
12364
12365 return 0;
12366
12367 out_free:
12368 memslot_rmap_free(slot);
12369
12370 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
12371 kvfree(slot->arch.lpage_info[i - 1]);
12372 slot->arch.lpage_info[i - 1] = NULL;
12373 }
12374 return -ENOMEM;
12375 }
12376
12377 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
12378 {
12379 struct kvm_vcpu *vcpu;
12380 unsigned long i;
12381
12382
12383
12384
12385
12386 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
12387
12388
12389 kvm_for_each_vcpu(i, vcpu, kvm)
12390 kvm_vcpu_kick(vcpu);
12391 }
12392
12393 int kvm_arch_prepare_memory_region(struct kvm *kvm,
12394 const struct kvm_memory_slot *old,
12395 struct kvm_memory_slot *new,
12396 enum kvm_mr_change change)
12397 {
12398 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
12399 if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
12400 return -EINVAL;
12401
12402 return kvm_alloc_memslot_metadata(kvm, new);
12403 }
12404
12405 if (change == KVM_MR_FLAGS_ONLY)
12406 memcpy(&new->arch, &old->arch, sizeof(old->arch));
12407 else if (WARN_ON_ONCE(change != KVM_MR_DELETE))
12408 return -EIO;
12409
12410 return 0;
12411 }
12412
12413
12414 static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
12415 {
12416 struct kvm_arch *ka = &kvm->arch;
12417
12418 if (!kvm_x86_ops.cpu_dirty_log_size)
12419 return;
12420
12421 if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
12422 (!enable && --ka->cpu_dirty_logging_count == 0))
12423 kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
12424
12425 WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
12426 }
12427
12428 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
12429 struct kvm_memory_slot *old,
12430 const struct kvm_memory_slot *new,
12431 enum kvm_mr_change change)
12432 {
12433 u32 old_flags = old ? old->flags : 0;
12434 u32 new_flags = new ? new->flags : 0;
12435 bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
12436
12437
12438
12439
12440
12441 if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)
12442 kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
12443
12444
12445
12446
12447
12448
12449
12450
12451
12452
12453
12454
12455
12456
12457
12458
12459 if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY))
12460 return;
12461
12462
12463
12464
12465
12466
12467 if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)))
12468 return;
12469
12470 if (!log_dirty_pages) {
12471
12472
12473
12474
12475
12476
12477
12478
12479
12480
12481
12482
12483
12484 kvm_mmu_zap_collapsible_sptes(kvm, new);
12485 } else {
12486
12487
12488
12489
12490 if (kvm_dirty_log_manual_protect_and_init_set(kvm))
12491 return;
12492
12493 if (READ_ONCE(eager_page_split))
12494 kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K);
12495
12496 if (kvm_x86_ops.cpu_dirty_log_size) {
12497 kvm_mmu_slot_leaf_clear_dirty(kvm, new);
12498 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
12499 } else {
12500 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
12501 }
12502
12503
12504
12505
12506
12507
12508
12509
12510
12511
12512
12513
12514
12515
12516
12517
12518
12519
12520
12521
12522
12523
12524
12525
12526
12527
12528
12529
12530
12531
12532
12533
12534
12535
12536
12537
12538
12539
12540
12541
12542
12543
12544
12545 kvm_arch_flush_remote_tlbs_memslot(kvm, new);
12546 }
12547 }
12548
12549 void kvm_arch_commit_memory_region(struct kvm *kvm,
12550 struct kvm_memory_slot *old,
12551 const struct kvm_memory_slot *new,
12552 enum kvm_mr_change change)
12553 {
12554 if (!kvm->arch.n_requested_mmu_pages &&
12555 (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
12556 unsigned long nr_mmu_pages;
12557
12558 nr_mmu_pages = kvm->nr_memslot_pages / KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO;
12559 nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
12560 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
12561 }
12562
12563 kvm_mmu_slot_apply_flags(kvm, old, new, change);
12564
12565
12566 if (change == KVM_MR_MOVE)
12567 kvm_arch_free_memslot(kvm, old);
12568 }
12569
12570 void kvm_arch_flush_shadow_all(struct kvm *kvm)
12571 {
12572 kvm_mmu_zap_all(kvm);
12573 }
12574
12575 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
12576 struct kvm_memory_slot *slot)
12577 {
12578 kvm_page_track_flush_slot(kvm, slot);
12579 }
12580
12581 static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
12582 {
12583 return (is_guest_mode(vcpu) &&
12584 static_call(kvm_x86_guest_apic_has_interrupt)(vcpu));
12585 }
12586
12587 static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
12588 {
12589 if (!list_empty_careful(&vcpu->async_pf.done))
12590 return true;
12591
12592 if (kvm_apic_has_events(vcpu))
12593 return true;
12594
12595 if (vcpu->arch.pv.pv_unhalted)
12596 return true;
12597
12598 if (vcpu->arch.exception.pending)
12599 return true;
12600
12601 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
12602 (vcpu->arch.nmi_pending &&
12603 static_call(kvm_x86_nmi_allowed)(vcpu, false)))
12604 return true;
12605
12606 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
12607 (vcpu->arch.smi_pending &&
12608 static_call(kvm_x86_smi_allowed)(vcpu, false)))
12609 return true;
12610
12611 if (kvm_arch_interrupt_allowed(vcpu) &&
12612 (kvm_cpu_has_interrupt(vcpu) ||
12613 kvm_guest_apic_has_interrupt(vcpu)))
12614 return true;
12615
12616 if (kvm_hv_has_stimer_pending(vcpu))
12617 return true;
12618
12619 if (is_guest_mode(vcpu) &&
12620 kvm_x86_ops.nested_ops->hv_timer_pending &&
12621 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
12622 return true;
12623
12624 if (kvm_xen_has_pending_events(vcpu))
12625 return true;
12626
12627 if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu))
12628 return true;
12629
12630 return false;
12631 }
12632
12633 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
12634 {
12635 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
12636 }
12637
12638 bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
12639 {
12640 if (kvm_vcpu_apicv_active(vcpu) &&
12641 static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
12642 return true;
12643
12644 return false;
12645 }
12646
12647 bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
12648 {
12649 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
12650 return true;
12651
12652 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
12653 kvm_test_request(KVM_REQ_SMI, vcpu) ||
12654 kvm_test_request(KVM_REQ_EVENT, vcpu))
12655 return true;
12656
12657 return kvm_arch_dy_has_pending_interrupt(vcpu);
12658 }
12659
12660 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
12661 {
12662 if (vcpu->arch.guest_state_protected)
12663 return true;
12664
12665 return vcpu->arch.preempted_in_kernel;
12666 }
12667
12668 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
12669 {
12670 return kvm_rip_read(vcpu);
12671 }
12672
12673 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
12674 {
12675 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
12676 }
12677
12678 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
12679 {
12680 return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
12681 }
12682
12683 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
12684 {
12685
12686 if (vcpu->arch.guest_state_protected)
12687 return 0;
12688
12689 if (is_64_bit_mode(vcpu))
12690 return kvm_rip_read(vcpu);
12691 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
12692 kvm_rip_read(vcpu));
12693 }
12694 EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
12695
12696 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
12697 {
12698 return kvm_get_linear_rip(vcpu) == linear_rip;
12699 }
12700 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
12701
12702 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
12703 {
12704 unsigned long rflags;
12705
12706 rflags = static_call(kvm_x86_get_rflags)(vcpu);
12707 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
12708 rflags &= ~X86_EFLAGS_TF;
12709 return rflags;
12710 }
12711 EXPORT_SYMBOL_GPL(kvm_get_rflags);
12712
12713 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
12714 {
12715 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
12716 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
12717 rflags |= X86_EFLAGS_TF;
12718 static_call(kvm_x86_set_rflags)(vcpu, rflags);
12719 }
12720
12721 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
12722 {
12723 __kvm_set_rflags(vcpu, rflags);
12724 kvm_make_request(KVM_REQ_EVENT, vcpu);
12725 }
12726 EXPORT_SYMBOL_GPL(kvm_set_rflags);
12727
12728 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
12729 {
12730 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
12731
12732 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
12733 }
12734
12735 static inline u32 kvm_async_pf_next_probe(u32 key)
12736 {
12737 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
12738 }
12739
12740 static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12741 {
12742 u32 key = kvm_async_pf_hash_fn(gfn);
12743
12744 while (vcpu->arch.apf.gfns[key] != ~0)
12745 key = kvm_async_pf_next_probe(key);
12746
12747 vcpu->arch.apf.gfns[key] = gfn;
12748 }
12749
12750 static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
12751 {
12752 int i;
12753 u32 key = kvm_async_pf_hash_fn(gfn);
12754
12755 for (i = 0; i < ASYNC_PF_PER_VCPU &&
12756 (vcpu->arch.apf.gfns[key] != gfn &&
12757 vcpu->arch.apf.gfns[key] != ~0); i++)
12758 key = kvm_async_pf_next_probe(key);
12759
12760 return key;
12761 }
12762
12763 bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12764 {
12765 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
12766 }
12767
12768 static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12769 {
12770 u32 i, j, k;
12771
12772 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
12773
12774 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
12775 return;
12776
12777 while (true) {
12778 vcpu->arch.apf.gfns[i] = ~0;
12779 do {
12780 j = kvm_async_pf_next_probe(j);
12781 if (vcpu->arch.apf.gfns[j] == ~0)
12782 return;
12783 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
12784
12785
12786
12787
12788
12789 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
12790 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
12791 i = j;
12792 }
12793 }
12794
12795 static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
12796 {
12797 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
12798
12799 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
12800 sizeof(reason));
12801 }
12802
12803 static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
12804 {
12805 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12806
12807 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12808 &token, offset, sizeof(token));
12809 }
12810
12811 static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
12812 {
12813 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12814 u32 val;
12815
12816 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12817 &val, offset, sizeof(val)))
12818 return false;
12819
12820 return !val;
12821 }
12822
12823 static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
12824 {
12825
12826 if (!kvm_pv_async_pf_enabled(vcpu))
12827 return false;
12828
12829 if (vcpu->arch.apf.send_user_only &&
12830 static_call(kvm_x86_get_cpl)(vcpu) == 0)
12831 return false;
12832
12833 if (is_guest_mode(vcpu)) {
12834
12835
12836
12837
12838 return vcpu->arch.apf.delivery_as_pf_vmexit;
12839 } else {
12840
12841
12842
12843
12844
12845 return is_paging(vcpu);
12846 }
12847 }
12848
12849 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
12850 {
12851 if (unlikely(!lapic_in_kernel(vcpu) ||
12852 kvm_event_needs_reinjection(vcpu) ||
12853 vcpu->arch.exception.pending))
12854 return false;
12855
12856 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
12857 return false;
12858
12859
12860
12861
12862
12863 return kvm_arch_interrupt_allowed(vcpu);
12864 }
12865
12866 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
12867 struct kvm_async_pf *work)
12868 {
12869 struct x86_exception fault;
12870
12871 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
12872 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
12873
12874 if (kvm_can_deliver_async_pf(vcpu) &&
12875 !apf_put_user_notpresent(vcpu)) {
12876 fault.vector = PF_VECTOR;
12877 fault.error_code_valid = true;
12878 fault.error_code = 0;
12879 fault.nested_page_fault = false;
12880 fault.address = work->arch.token;
12881 fault.async_page_fault = true;
12882 kvm_inject_page_fault(vcpu, &fault);
12883 return true;
12884 } else {
12885
12886
12887
12888
12889
12890
12891
12892
12893 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
12894 return false;
12895 }
12896 }
12897
12898 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
12899 struct kvm_async_pf *work)
12900 {
12901 struct kvm_lapic_irq irq = {
12902 .delivery_mode = APIC_DM_FIXED,
12903 .vector = vcpu->arch.apf.vec
12904 };
12905
12906 if (work->wakeup_all)
12907 work->arch.token = ~0;
12908 else
12909 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
12910 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
12911
12912 if ((work->wakeup_all || work->notpresent_injected) &&
12913 kvm_pv_async_pf_enabled(vcpu) &&
12914 !apf_put_user_ready(vcpu, work->arch.token)) {
12915 vcpu->arch.apf.pageready_pending = true;
12916 kvm_apic_set_irq(vcpu, &irq, NULL);
12917 }
12918
12919 vcpu->arch.apf.halted = false;
12920 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
12921 }
12922
12923 void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
12924 {
12925 kvm_make_request(KVM_REQ_APF_READY, vcpu);
12926 if (!vcpu->arch.apf.pageready_pending)
12927 kvm_vcpu_kick(vcpu);
12928 }
12929
12930 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
12931 {
12932 if (!kvm_pv_async_pf_enabled(vcpu))
12933 return true;
12934 else
12935 return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu);
12936 }
12937
12938 void kvm_arch_start_assignment(struct kvm *kvm)
12939 {
12940 if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
12941 static_call_cond(kvm_x86_pi_start_assignment)(kvm);
12942 }
12943 EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
12944
12945 void kvm_arch_end_assignment(struct kvm *kvm)
12946 {
12947 atomic_dec(&kvm->arch.assigned_device_count);
12948 }
12949 EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
12950
12951 bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
12952 {
12953 return arch_atomic_read(&kvm->arch.assigned_device_count);
12954 }
12955 EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
12956
12957 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
12958 {
12959 atomic_inc(&kvm->arch.noncoherent_dma_count);
12960 }
12961 EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
12962
12963 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
12964 {
12965 atomic_dec(&kvm->arch.noncoherent_dma_count);
12966 }
12967 EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
12968
12969 bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
12970 {
12971 return atomic_read(&kvm->arch.noncoherent_dma_count);
12972 }
12973 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
12974
12975 bool kvm_arch_has_irq_bypass(void)
12976 {
12977 return true;
12978 }
12979
12980 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
12981 struct irq_bypass_producer *prod)
12982 {
12983 struct kvm_kernel_irqfd *irqfd =
12984 container_of(cons, struct kvm_kernel_irqfd, consumer);
12985 int ret;
12986
12987 irqfd->producer = prod;
12988 kvm_arch_start_assignment(irqfd->kvm);
12989 ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm,
12990 prod->irq, irqfd->gsi, 1);
12991
12992 if (ret)
12993 kvm_arch_end_assignment(irqfd->kvm);
12994
12995 return ret;
12996 }
12997
12998 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
12999 struct irq_bypass_producer *prod)
13000 {
13001 int ret;
13002 struct kvm_kernel_irqfd *irqfd =
13003 container_of(cons, struct kvm_kernel_irqfd, consumer);
13004
13005 WARN_ON(irqfd->producer != prod);
13006 irqfd->producer = NULL;
13007
13008
13009
13010
13011
13012
13013
13014 ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
13015 if (ret)
13016 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
13017 " fails: %d\n", irqfd->consumer.token, ret);
13018
13019 kvm_arch_end_assignment(irqfd->kvm);
13020 }
13021
13022 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
13023 uint32_t guest_irq, bool set)
13024 {
13025 return static_call(kvm_x86_pi_update_irte)(kvm, host_irq, guest_irq, set);
13026 }
13027
13028 bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
13029 struct kvm_kernel_irq_routing_entry *new)
13030 {
13031 if (new->type != KVM_IRQ_ROUTING_MSI)
13032 return true;
13033
13034 return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
13035 }
13036
13037 bool kvm_vector_hashing_enabled(void)
13038 {
13039 return vector_hashing;
13040 }
13041
13042 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
13043 {
13044 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
13045 }
13046 EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
13047
13048
13049 int kvm_spec_ctrl_test_value(u64 value)
13050 {
13051
13052
13053
13054
13055
13056 u64 saved_value;
13057 unsigned long flags;
13058 int ret = 0;
13059
13060 local_irq_save(flags);
13061
13062 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
13063 ret = 1;
13064 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
13065 ret = 1;
13066 else
13067 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
13068
13069 local_irq_restore(flags);
13070
13071 return ret;
13072 }
13073 EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
13074
13075 void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
13076 {
13077 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
13078 struct x86_exception fault;
13079 u64 access = error_code &
13080 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
13081
13082 if (!(error_code & PFERR_PRESENT_MASK) ||
13083 mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != INVALID_GPA) {
13084
13085
13086
13087
13088
13089 fault.vector = PF_VECTOR;
13090 fault.error_code_valid = true;
13091 fault.error_code = error_code;
13092 fault.nested_page_fault = false;
13093 fault.address = gva;
13094 fault.async_page_fault = false;
13095 }
13096 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
13097 }
13098 EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
13099
13100
13101
13102
13103
13104
13105 int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
13106 struct x86_exception *e)
13107 {
13108 if (r == X86EMUL_PROPAGATE_FAULT) {
13109 kvm_inject_emulated_page_fault(vcpu, e);
13110 return 1;
13111 }
13112
13113
13114
13115
13116
13117
13118
13119
13120 kvm_prepare_emulation_failure_exit(vcpu);
13121
13122 return 0;
13123 }
13124 EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
13125
13126 int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
13127 {
13128 bool pcid_enabled;
13129 struct x86_exception e;
13130 struct {
13131 u64 pcid;
13132 u64 gla;
13133 } operand;
13134 int r;
13135
13136 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
13137 if (r != X86EMUL_CONTINUE)
13138 return kvm_handle_memory_failure(vcpu, r, &e);
13139
13140 if (operand.pcid >> 12 != 0) {
13141 kvm_inject_gp(vcpu, 0);
13142 return 1;
13143 }
13144
13145 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
13146
13147 switch (type) {
13148 case INVPCID_TYPE_INDIV_ADDR:
13149 if ((!pcid_enabled && (operand.pcid != 0)) ||
13150 is_noncanonical_address(operand.gla, vcpu)) {
13151 kvm_inject_gp(vcpu, 0);
13152 return 1;
13153 }
13154 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
13155 return kvm_skip_emulated_instruction(vcpu);
13156
13157 case INVPCID_TYPE_SINGLE_CTXT:
13158 if (!pcid_enabled && (operand.pcid != 0)) {
13159 kvm_inject_gp(vcpu, 0);
13160 return 1;
13161 }
13162
13163 kvm_invalidate_pcid(vcpu, operand.pcid);
13164 return kvm_skip_emulated_instruction(vcpu);
13165
13166 case INVPCID_TYPE_ALL_NON_GLOBAL:
13167
13168
13169
13170
13171
13172
13173
13174 fallthrough;
13175 case INVPCID_TYPE_ALL_INCL_GLOBAL:
13176 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
13177 return kvm_skip_emulated_instruction(vcpu);
13178
13179 default:
13180 kvm_inject_gp(vcpu, 0);
13181 return 1;
13182 }
13183 }
13184 EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
13185
13186 static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
13187 {
13188 struct kvm_run *run = vcpu->run;
13189 struct kvm_mmio_fragment *frag;
13190 unsigned int len;
13191
13192 BUG_ON(!vcpu->mmio_needed);
13193
13194
13195 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
13196 len = min(8u, frag->len);
13197 if (!vcpu->mmio_is_write)
13198 memcpy(frag->data, run->mmio.data, len);
13199
13200 if (frag->len <= 8) {
13201
13202 frag++;
13203 vcpu->mmio_cur_fragment++;
13204 } else {
13205
13206 frag->data += len;
13207 frag->gpa += len;
13208 frag->len -= len;
13209 }
13210
13211 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
13212 vcpu->mmio_needed = 0;
13213
13214
13215
13216 return 1;
13217 }
13218
13219
13220 run->mmio.phys_addr = frag->gpa;
13221 run->mmio.len = min(8u, frag->len);
13222 run->mmio.is_write = vcpu->mmio_is_write;
13223 if (run->mmio.is_write)
13224 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
13225 run->exit_reason = KVM_EXIT_MMIO;
13226
13227 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
13228
13229 return 0;
13230 }
13231
13232 int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
13233 void *data)
13234 {
13235 int handled;
13236 struct kvm_mmio_fragment *frag;
13237
13238 if (!data)
13239 return -EINVAL;
13240
13241 handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
13242 if (handled == bytes)
13243 return 1;
13244
13245 bytes -= handled;
13246 gpa += handled;
13247 data += handled;
13248
13249
13250 frag = vcpu->mmio_fragments;
13251 vcpu->mmio_nr_fragments = 1;
13252 frag->len = bytes;
13253 frag->gpa = gpa;
13254 frag->data = data;
13255
13256 vcpu->mmio_needed = 1;
13257 vcpu->mmio_cur_fragment = 0;
13258
13259 vcpu->run->mmio.phys_addr = gpa;
13260 vcpu->run->mmio.len = min(8u, frag->len);
13261 vcpu->run->mmio.is_write = 1;
13262 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
13263 vcpu->run->exit_reason = KVM_EXIT_MMIO;
13264
13265 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
13266
13267 return 0;
13268 }
13269 EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
13270
13271 int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
13272 void *data)
13273 {
13274 int handled;
13275 struct kvm_mmio_fragment *frag;
13276
13277 if (!data)
13278 return -EINVAL;
13279
13280 handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
13281 if (handled == bytes)
13282 return 1;
13283
13284 bytes -= handled;
13285 gpa += handled;
13286 data += handled;
13287
13288
13289 frag = vcpu->mmio_fragments;
13290 vcpu->mmio_nr_fragments = 1;
13291 frag->len = bytes;
13292 frag->gpa = gpa;
13293 frag->data = data;
13294
13295 vcpu->mmio_needed = 1;
13296 vcpu->mmio_cur_fragment = 0;
13297
13298 vcpu->run->mmio.phys_addr = gpa;
13299 vcpu->run->mmio.len = min(8u, frag->len);
13300 vcpu->run->mmio.is_write = 0;
13301 vcpu->run->exit_reason = KVM_EXIT_MMIO;
13302
13303 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
13304
13305 return 0;
13306 }
13307 EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
13308
13309 static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size)
13310 {
13311 vcpu->arch.sev_pio_count -= count;
13312 vcpu->arch.sev_pio_data += count * size;
13313 }
13314
13315 static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
13316 unsigned int port);
13317
13318 static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
13319 {
13320 int size = vcpu->arch.pio.size;
13321 int port = vcpu->arch.pio.port;
13322
13323 vcpu->arch.pio.count = 0;
13324 if (vcpu->arch.sev_pio_count)
13325 return kvm_sev_es_outs(vcpu, size, port);
13326 return 1;
13327 }
13328
13329 static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
13330 unsigned int port)
13331 {
13332 for (;;) {
13333 unsigned int count =
13334 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
13335 int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
13336
13337
13338 advance_sev_es_emulated_pio(vcpu, count, size);
13339 if (!ret)
13340 break;
13341
13342
13343 if (!vcpu->arch.sev_pio_count)
13344 return 1;
13345 }
13346
13347 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
13348 return 0;
13349 }
13350
13351 static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
13352 unsigned int port);
13353
13354 static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
13355 {
13356 unsigned count = vcpu->arch.pio.count;
13357 int size = vcpu->arch.pio.size;
13358 int port = vcpu->arch.pio.port;
13359
13360 complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
13361 advance_sev_es_emulated_pio(vcpu, count, size);
13362 if (vcpu->arch.sev_pio_count)
13363 return kvm_sev_es_ins(vcpu, size, port);
13364 return 1;
13365 }
13366
13367 static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
13368 unsigned int port)
13369 {
13370 for (;;) {
13371 unsigned int count =
13372 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
13373 if (!emulator_pio_in(vcpu, size, port, vcpu->arch.sev_pio_data, count))
13374 break;
13375
13376
13377 advance_sev_es_emulated_pio(vcpu, count, size);
13378 if (!vcpu->arch.sev_pio_count)
13379 return 1;
13380 }
13381
13382 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
13383 return 0;
13384 }
13385
13386 int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
13387 unsigned int port, void *data, unsigned int count,
13388 int in)
13389 {
13390 vcpu->arch.sev_pio_data = data;
13391 vcpu->arch.sev_pio_count = count;
13392 return in ? kvm_sev_es_ins(vcpu, size, port)
13393 : kvm_sev_es_outs(vcpu, size, port);
13394 }
13395 EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
13396
13397 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
13398 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
13399 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
13400 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
13401 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
13402 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
13403 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
13404 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
13405 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
13406 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
13407 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
13408 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
13409 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
13410 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
13411 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
13412 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
13413 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
13414 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
13415 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
13416 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
13417 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
13418 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
13419 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_kick_vcpu_slowpath);
13420 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_doorbell);
13421 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq);
13422 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
13423 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
13424 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
13425 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
13426
13427 static int __init kvm_x86_init(void)
13428 {
13429 kvm_mmu_x86_module_init();
13430 return 0;
13431 }
13432 module_init(kvm_x86_init);
13433
13434 static void __exit kvm_x86_exit(void)
13435 {
13436
13437
13438
13439
13440 }
13441 module_exit(kvm_x86_exit);