0001 #define pr_fmt(fmt) "SVM: " fmt
0002
0003 #include <linux/kvm_host.h>
0004
0005 #include "irq.h"
0006 #include "mmu.h"
0007 #include "kvm_cache_regs.h"
0008 #include "x86.h"
0009 #include "cpuid.h"
0010 #include "pmu.h"
0011
0012 #include <linux/module.h>
0013 #include <linux/mod_devicetable.h>
0014 #include <linux/kernel.h>
0015 #include <linux/vmalloc.h>
0016 #include <linux/highmem.h>
0017 #include <linux/amd-iommu.h>
0018 #include <linux/sched.h>
0019 #include <linux/trace_events.h>
0020 #include <linux/slab.h>
0021 #include <linux/hashtable.h>
0022 #include <linux/objtool.h>
0023 #include <linux/psp-sev.h>
0024 #include <linux/file.h>
0025 #include <linux/pagemap.h>
0026 #include <linux/swap.h>
0027 #include <linux/rwsem.h>
0028 #include <linux/cc_platform.h>
0029
0030 #include <asm/apic.h>
0031 #include <asm/perf_event.h>
0032 #include <asm/tlbflush.h>
0033 #include <asm/desc.h>
0034 #include <asm/debugreg.h>
0035 #include <asm/kvm_para.h>
0036 #include <asm/irq_remapping.h>
0037 #include <asm/spec-ctrl.h>
0038 #include <asm/cpu_device_id.h>
0039 #include <asm/traps.h>
0040 #include <asm/fpu/api.h>
0041
0042 #include <asm/virtext.h>
0043 #include "trace.h"
0044
0045 #include "svm.h"
0046 #include "svm_ops.h"
0047
0048 #include "kvm_onhyperv.h"
0049 #include "svm_onhyperv.h"
0050
0051 MODULE_AUTHOR("Qumranet");
0052 MODULE_LICENSE("GPL");
0053
0054 #ifdef MODULE
0055 static const struct x86_cpu_id svm_cpu_id[] = {
0056 X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL),
0057 {}
0058 };
0059 MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
0060 #endif
0061
0062 #define SEG_TYPE_LDT 2
0063 #define SEG_TYPE_BUSY_TSS16 3
0064
0065 static bool erratum_383_found __read_mostly;
0066
0067 u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
0068
0069
0070
0071
0072
0073 static uint64_t osvw_len = 4, osvw_status;
0074
0075 static DEFINE_PER_CPU(u64, current_tsc_ratio);
0076
0077 #define X2APIC_MSR(x) (APIC_BASE_MSR + (x >> 4))
0078
0079 static const struct svm_direct_access_msrs {
0080 u32 index;
0081 bool always;
0082 } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
0083 { .index = MSR_STAR, .always = true },
0084 { .index = MSR_IA32_SYSENTER_CS, .always = true },
0085 { .index = MSR_IA32_SYSENTER_EIP, .always = false },
0086 { .index = MSR_IA32_SYSENTER_ESP, .always = false },
0087 #ifdef CONFIG_X86_64
0088 { .index = MSR_GS_BASE, .always = true },
0089 { .index = MSR_FS_BASE, .always = true },
0090 { .index = MSR_KERNEL_GS_BASE, .always = true },
0091 { .index = MSR_LSTAR, .always = true },
0092 { .index = MSR_CSTAR, .always = true },
0093 { .index = MSR_SYSCALL_MASK, .always = true },
0094 #endif
0095 { .index = MSR_IA32_SPEC_CTRL, .always = false },
0096 { .index = MSR_IA32_PRED_CMD, .always = false },
0097 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
0098 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
0099 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
0100 { .index = MSR_IA32_LASTINTTOIP, .always = false },
0101 { .index = MSR_EFER, .always = false },
0102 { .index = MSR_IA32_CR_PAT, .always = false },
0103 { .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
0104 { .index = MSR_TSC_AUX, .always = false },
0105 { .index = X2APIC_MSR(APIC_ID), .always = false },
0106 { .index = X2APIC_MSR(APIC_LVR), .always = false },
0107 { .index = X2APIC_MSR(APIC_TASKPRI), .always = false },
0108 { .index = X2APIC_MSR(APIC_ARBPRI), .always = false },
0109 { .index = X2APIC_MSR(APIC_PROCPRI), .always = false },
0110 { .index = X2APIC_MSR(APIC_EOI), .always = false },
0111 { .index = X2APIC_MSR(APIC_RRR), .always = false },
0112 { .index = X2APIC_MSR(APIC_LDR), .always = false },
0113 { .index = X2APIC_MSR(APIC_DFR), .always = false },
0114 { .index = X2APIC_MSR(APIC_SPIV), .always = false },
0115 { .index = X2APIC_MSR(APIC_ISR), .always = false },
0116 { .index = X2APIC_MSR(APIC_TMR), .always = false },
0117 { .index = X2APIC_MSR(APIC_IRR), .always = false },
0118 { .index = X2APIC_MSR(APIC_ESR), .always = false },
0119 { .index = X2APIC_MSR(APIC_ICR), .always = false },
0120 { .index = X2APIC_MSR(APIC_ICR2), .always = false },
0121
0122
0123
0124
0125
0126
0127
0128
0129 { .index = X2APIC_MSR(APIC_LVTTHMR), .always = false },
0130 { .index = X2APIC_MSR(APIC_LVTPC), .always = false },
0131 { .index = X2APIC_MSR(APIC_LVT0), .always = false },
0132 { .index = X2APIC_MSR(APIC_LVT1), .always = false },
0133 { .index = X2APIC_MSR(APIC_LVTERR), .always = false },
0134 { .index = X2APIC_MSR(APIC_TMICT), .always = false },
0135 { .index = X2APIC_MSR(APIC_TMCCT), .always = false },
0136 { .index = X2APIC_MSR(APIC_TDCR), .always = false },
0137 { .index = MSR_INVALID, .always = false },
0138 };
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170 static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
0171 module_param(pause_filter_thresh, ushort, 0444);
0172
0173 static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
0174 module_param(pause_filter_count, ushort, 0444);
0175
0176
0177 static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
0178 module_param(pause_filter_count_grow, ushort, 0444);
0179
0180
0181 static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
0182 module_param(pause_filter_count_shrink, ushort, 0444);
0183
0184
0185 static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
0186 module_param(pause_filter_count_max, ushort, 0444);
0187
0188
0189
0190
0191
0192 bool npt_enabled = true;
0193 module_param_named(npt, npt_enabled, bool, 0444);
0194
0195
0196 static int nested = true;
0197 module_param(nested, int, S_IRUGO);
0198
0199
0200 static int nrips = true;
0201 module_param(nrips, int, 0444);
0202
0203
0204 static int vls = true;
0205 module_param(vls, int, 0444);
0206
0207
0208 int vgif = true;
0209 module_param(vgif, int, 0444);
0210
0211
0212 static int lbrv = true;
0213 module_param(lbrv, int, 0444);
0214
0215 static int tsc_scaling = true;
0216 module_param(tsc_scaling, int, 0444);
0217
0218
0219
0220
0221
0222 static bool avic;
0223 module_param(avic, bool, 0444);
0224
0225 bool __read_mostly dump_invalid_vmcb;
0226 module_param(dump_invalid_vmcb, bool, 0644);
0227
0228
0229 bool intercept_smi = true;
0230 module_param(intercept_smi, bool, 0444);
0231
0232
0233 static bool svm_gp_erratum_intercept = true;
0234
0235 static u8 rsm_ins_bytes[] = "\x0f\xaa";
0236
0237 static unsigned long iopm_base;
0238
0239 struct kvm_ldttss_desc {
0240 u16 limit0;
0241 u16 base0;
0242 unsigned base1:8, type:5, dpl:2, p:1;
0243 unsigned limit1:4, zero0:3, g:1, base2:8;
0244 u32 base3;
0245 u32 zero1;
0246 } __attribute__((packed));
0247
0248 DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
0249
0250
0251
0252
0253
0254
0255
0256
0257 static int tsc_aux_uret_slot __read_mostly = -1;
0258
0259 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
0260
0261 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
0262 #define MSRS_RANGE_SIZE 2048
0263 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
0264
0265 u32 svm_msrpm_offset(u32 msr)
0266 {
0267 u32 offset;
0268 int i;
0269
0270 for (i = 0; i < NUM_MSR_MAPS; i++) {
0271 if (msr < msrpm_ranges[i] ||
0272 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
0273 continue;
0274
0275 offset = (msr - msrpm_ranges[i]) / 4;
0276 offset += (i * MSRS_RANGE_SIZE);
0277
0278
0279 return offset / 4;
0280 }
0281
0282
0283 return MSR_INVALID;
0284 }
0285
0286 static void svm_flush_tlb_current(struct kvm_vcpu *vcpu);
0287
0288 static int get_npt_level(void)
0289 {
0290 #ifdef CONFIG_X86_64
0291 return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
0292 #else
0293 return PT32E_ROOT_LEVEL;
0294 #endif
0295 }
0296
0297 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
0298 {
0299 struct vcpu_svm *svm = to_svm(vcpu);
0300 u64 old_efer = vcpu->arch.efer;
0301 vcpu->arch.efer = efer;
0302
0303 if (!npt_enabled) {
0304
0305 efer |= EFER_NX;
0306
0307 if (!(efer & EFER_LMA))
0308 efer &= ~EFER_LME;
0309 }
0310
0311 if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
0312 if (!(efer & EFER_SVME)) {
0313 svm_leave_nested(vcpu);
0314 svm_set_gif(svm, true);
0315
0316 if (!enable_vmware_backdoor)
0317 clr_exception_intercept(svm, GP_VECTOR);
0318
0319
0320
0321
0322
0323
0324 if (!is_smm(vcpu))
0325 svm_free_nested(svm);
0326
0327 } else {
0328 int ret = svm_allocate_nested(svm);
0329
0330 if (ret) {
0331 vcpu->arch.efer = old_efer;
0332 return ret;
0333 }
0334
0335
0336
0337
0338
0339 if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
0340 set_exception_intercept(svm, GP_VECTOR);
0341 }
0342 }
0343
0344 svm->vmcb->save.efer = efer | EFER_SVME;
0345 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
0346 return 0;
0347 }
0348
0349 static int is_external_interrupt(u32 info)
0350 {
0351 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
0352 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
0353 }
0354
0355 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
0356 {
0357 struct vcpu_svm *svm = to_svm(vcpu);
0358 u32 ret = 0;
0359
0360 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
0361 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
0362 return ret;
0363 }
0364
0365 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
0366 {
0367 struct vcpu_svm *svm = to_svm(vcpu);
0368
0369 if (mask == 0)
0370 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
0371 else
0372 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
0373
0374 }
0375
0376 static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
0377 bool commit_side_effects)
0378 {
0379 struct vcpu_svm *svm = to_svm(vcpu);
0380 unsigned long old_rflags;
0381
0382
0383
0384
0385
0386 if (sev_es_guest(vcpu->kvm))
0387 goto done;
0388
0389 if (nrips && svm->vmcb->control.next_rip != 0) {
0390 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
0391 svm->next_rip = svm->vmcb->control.next_rip;
0392 }
0393
0394 if (!svm->next_rip) {
0395 if (unlikely(!commit_side_effects))
0396 old_rflags = svm->vmcb->save.rflags;
0397
0398 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
0399 return 0;
0400
0401 if (unlikely(!commit_side_effects))
0402 svm->vmcb->save.rflags = old_rflags;
0403 } else {
0404 kvm_rip_write(vcpu, svm->next_rip);
0405 }
0406
0407 done:
0408 if (likely(commit_side_effects))
0409 svm_set_interrupt_shadow(vcpu, 0);
0410
0411 return 1;
0412 }
0413
0414 static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
0415 {
0416 return __svm_skip_emulated_instruction(vcpu, true);
0417 }
0418
0419 static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
0420 {
0421 unsigned long rip, old_rip = kvm_rip_read(vcpu);
0422 struct vcpu_svm *svm = to_svm(vcpu);
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436 if (!__svm_skip_emulated_instruction(vcpu, !nrips))
0437 return -EIO;
0438
0439 rip = kvm_rip_read(vcpu);
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450 svm->soft_int_injected = true;
0451 svm->soft_int_csbase = svm->vmcb->save.cs.base;
0452 svm->soft_int_old_rip = old_rip;
0453 svm->soft_int_next_rip = rip;
0454
0455 if (nrips)
0456 kvm_rip_write(vcpu, old_rip);
0457
0458 if (static_cpu_has(X86_FEATURE_NRIPS))
0459 svm->vmcb->control.next_rip = rip;
0460
0461 return 0;
0462 }
0463
0464 static void svm_queue_exception(struct kvm_vcpu *vcpu)
0465 {
0466 struct vcpu_svm *svm = to_svm(vcpu);
0467 unsigned nr = vcpu->arch.exception.nr;
0468 bool has_error_code = vcpu->arch.exception.has_error_code;
0469 u32 error_code = vcpu->arch.exception.error_code;
0470
0471 kvm_deliver_exception_payload(vcpu);
0472
0473 if (kvm_exception_is_soft(nr) &&
0474 svm_update_soft_interrupt_rip(vcpu))
0475 return;
0476
0477 svm->vmcb->control.event_inj = nr
0478 | SVM_EVTINJ_VALID
0479 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
0480 | SVM_EVTINJ_TYPE_EXEPT;
0481 svm->vmcb->control.event_inj_err = error_code;
0482 }
0483
0484 static void svm_init_erratum_383(void)
0485 {
0486 u32 low, high;
0487 int err;
0488 u64 val;
0489
0490 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
0491 return;
0492
0493
0494 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
0495 if (err)
0496 return;
0497
0498 val |= (1ULL << 47);
0499
0500 low = lower_32_bits(val);
0501 high = upper_32_bits(val);
0502
0503 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
0504
0505 erratum_383_found = true;
0506 }
0507
0508 static void svm_init_osvw(struct kvm_vcpu *vcpu)
0509 {
0510
0511
0512
0513
0514 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
0515 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
0526 vcpu->arch.osvw.status |= 1;
0527 }
0528
0529 static int has_svm(void)
0530 {
0531 const char *msg;
0532
0533 if (!cpu_has_svm(&msg)) {
0534 printk(KERN_INFO "has_svm: %s\n", msg);
0535 return 0;
0536 }
0537
0538 if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
0539 pr_info("KVM is unsupported when running as an SEV guest\n");
0540 return 0;
0541 }
0542
0543 return 1;
0544 }
0545
0546 void __svm_write_tsc_multiplier(u64 multiplier)
0547 {
0548 preempt_disable();
0549
0550 if (multiplier == __this_cpu_read(current_tsc_ratio))
0551 goto out;
0552
0553 wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
0554 __this_cpu_write(current_tsc_ratio, multiplier);
0555 out:
0556 preempt_enable();
0557 }
0558
0559 static void svm_hardware_disable(void)
0560 {
0561
0562 if (tsc_scaling)
0563 __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
0564
0565 cpu_svm_disable();
0566
0567 amd_pmu_disable_virt();
0568 }
0569
0570 static int svm_hardware_enable(void)
0571 {
0572
0573 struct svm_cpu_data *sd;
0574 uint64_t efer;
0575 struct desc_struct *gdt;
0576 int me = raw_smp_processor_id();
0577
0578 rdmsrl(MSR_EFER, efer);
0579 if (efer & EFER_SVME)
0580 return -EBUSY;
0581
0582 if (!has_svm()) {
0583 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
0584 return -EINVAL;
0585 }
0586 sd = per_cpu(svm_data, me);
0587 if (!sd) {
0588 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
0589 return -EINVAL;
0590 }
0591
0592 sd->asid_generation = 1;
0593 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
0594 sd->next_asid = sd->max_asid + 1;
0595 sd->min_asid = max_sev_asid + 1;
0596
0597 gdt = get_current_gdt_rw();
0598 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
0599
0600 wrmsrl(MSR_EFER, efer | EFER_SVME);
0601
0602 wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
0603
0604 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
0605
0606
0607
0608
0609 __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
0610 }
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
0623 uint64_t len, status = 0;
0624 int err;
0625
0626 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
0627 if (!err)
0628 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
0629 &err);
0630
0631 if (err)
0632 osvw_status = osvw_len = 0;
0633 else {
0634 if (len < osvw_len)
0635 osvw_len = len;
0636 osvw_status |= status;
0637 osvw_status &= (1ULL << osvw_len) - 1;
0638 }
0639 } else
0640 osvw_status = osvw_len = 0;
0641
0642 svm_init_erratum_383();
0643
0644 amd_pmu_enable_virt();
0645
0646 return 0;
0647 }
0648
0649 static void svm_cpu_uninit(int cpu)
0650 {
0651 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
0652
0653 if (!sd)
0654 return;
0655
0656 per_cpu(svm_data, cpu) = NULL;
0657 kfree(sd->sev_vmcbs);
0658 __free_page(sd->save_area);
0659 kfree(sd);
0660 }
0661
0662 static int svm_cpu_init(int cpu)
0663 {
0664 struct svm_cpu_data *sd;
0665 int ret = -ENOMEM;
0666
0667 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
0668 if (!sd)
0669 return ret;
0670 sd->cpu = cpu;
0671 sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
0672 if (!sd->save_area)
0673 goto free_cpu_data;
0674
0675 ret = sev_cpu_init(sd);
0676 if (ret)
0677 goto free_save_area;
0678
0679 per_cpu(svm_data, cpu) = sd;
0680
0681 return 0;
0682
0683 free_save_area:
0684 __free_page(sd->save_area);
0685 free_cpu_data:
0686 kfree(sd);
0687 return ret;
0688
0689 }
0690
0691 static int direct_access_msr_slot(u32 msr)
0692 {
0693 u32 i;
0694
0695 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
0696 if (direct_access_msrs[i].index == msr)
0697 return i;
0698
0699 return -ENOENT;
0700 }
0701
0702 static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
0703 int write)
0704 {
0705 struct vcpu_svm *svm = to_svm(vcpu);
0706 int slot = direct_access_msr_slot(msr);
0707
0708 if (slot == -ENOENT)
0709 return;
0710
0711
0712 if (read)
0713 set_bit(slot, svm->shadow_msr_intercept.read);
0714 else
0715 clear_bit(slot, svm->shadow_msr_intercept.read);
0716
0717 if (write)
0718 set_bit(slot, svm->shadow_msr_intercept.write);
0719 else
0720 clear_bit(slot, svm->shadow_msr_intercept.write);
0721 }
0722
0723 static bool valid_msr_intercept(u32 index)
0724 {
0725 return direct_access_msr_slot(index) != -ENOENT;
0726 }
0727
0728 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
0729 {
0730 u8 bit_write;
0731 unsigned long tmp;
0732 u32 offset;
0733 u32 *msrpm;
0734
0735 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
0736 to_svm(vcpu)->msrpm;
0737
0738 offset = svm_msrpm_offset(msr);
0739 bit_write = 2 * (msr & 0x0f) + 1;
0740 tmp = msrpm[offset];
0741
0742 BUG_ON(offset == MSR_INVALID);
0743
0744 return !!test_bit(bit_write, &tmp);
0745 }
0746
0747 static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
0748 u32 msr, int read, int write)
0749 {
0750 struct vcpu_svm *svm = to_svm(vcpu);
0751 u8 bit_read, bit_write;
0752 unsigned long tmp;
0753 u32 offset;
0754
0755
0756
0757
0758
0759 WARN_ON(!valid_msr_intercept(msr));
0760
0761
0762 if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
0763 read = 0;
0764
0765 if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
0766 write = 0;
0767
0768 offset = svm_msrpm_offset(msr);
0769 bit_read = 2 * (msr & 0x0f);
0770 bit_write = 2 * (msr & 0x0f) + 1;
0771 tmp = msrpm[offset];
0772
0773 BUG_ON(offset == MSR_INVALID);
0774
0775 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
0776 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
0777
0778 msrpm[offset] = tmp;
0779
0780 svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
0781 svm->nested.force_msr_bitmap_recalc = true;
0782 }
0783
0784 void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
0785 int read, int write)
0786 {
0787 set_shadow_msr_intercept(vcpu, msr, read, write);
0788 set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
0789 }
0790
0791 u32 *svm_vcpu_alloc_msrpm(void)
0792 {
0793 unsigned int order = get_order(MSRPM_SIZE);
0794 struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, order);
0795 u32 *msrpm;
0796
0797 if (!pages)
0798 return NULL;
0799
0800 msrpm = page_address(pages);
0801 memset(msrpm, 0xff, PAGE_SIZE * (1 << order));
0802
0803 return msrpm;
0804 }
0805
0806 void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
0807 {
0808 int i;
0809
0810 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
0811 if (!direct_access_msrs[i].always)
0812 continue;
0813 set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
0814 }
0815 }
0816
0817 void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
0818 {
0819 int i;
0820
0821 if (intercept == svm->x2avic_msrs_intercepted)
0822 return;
0823
0824 if (avic_mode != AVIC_MODE_X2 ||
0825 !apic_x2apic_mode(svm->vcpu.arch.apic))
0826 return;
0827
0828 for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
0829 int index = direct_access_msrs[i].index;
0830
0831 if ((index < APIC_BASE_MSR) ||
0832 (index > APIC_BASE_MSR + 0xff))
0833 continue;
0834 set_msr_interception(&svm->vcpu, svm->msrpm, index,
0835 !intercept, !intercept);
0836 }
0837
0838 svm->x2avic_msrs_intercepted = intercept;
0839 }
0840
0841 void svm_vcpu_free_msrpm(u32 *msrpm)
0842 {
0843 __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE));
0844 }
0845
0846 static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
0847 {
0848 struct vcpu_svm *svm = to_svm(vcpu);
0849 u32 i;
0850
0851
0852
0853
0854
0855
0856 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
0857 u32 msr = direct_access_msrs[i].index;
0858 u32 read = test_bit(i, svm->shadow_msr_intercept.read);
0859 u32 write = test_bit(i, svm->shadow_msr_intercept.write);
0860
0861 set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
0862 }
0863 }
0864
0865 static void add_msr_offset(u32 offset)
0866 {
0867 int i;
0868
0869 for (i = 0; i < MSRPM_OFFSETS; ++i) {
0870
0871
0872 if (msrpm_offsets[i] == offset)
0873 return;
0874
0875
0876 if (msrpm_offsets[i] != MSR_INVALID)
0877 continue;
0878
0879
0880 msrpm_offsets[i] = offset;
0881
0882 return;
0883 }
0884
0885
0886
0887
0888
0889 BUG();
0890 }
0891
0892 static void init_msrpm_offsets(void)
0893 {
0894 int i;
0895
0896 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
0897
0898 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
0899 u32 offset;
0900
0901 offset = svm_msrpm_offset(direct_access_msrs[i].index);
0902 BUG_ON(offset == MSR_INVALID);
0903
0904 add_msr_offset(offset);
0905 }
0906 }
0907
0908 void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
0909 {
0910 to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
0911 to_vmcb->save.br_from = from_vmcb->save.br_from;
0912 to_vmcb->save.br_to = from_vmcb->save.br_to;
0913 to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
0914 to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
0915
0916 vmcb_mark_dirty(to_vmcb, VMCB_LBR);
0917 }
0918
0919 static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
0920 {
0921 struct vcpu_svm *svm = to_svm(vcpu);
0922
0923 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
0924 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
0925 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
0926 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
0927 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
0928
0929
0930 if (is_guest_mode(vcpu))
0931 svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
0932 }
0933
0934 static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
0935 {
0936 struct vcpu_svm *svm = to_svm(vcpu);
0937
0938 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
0939 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
0940 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
0941 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
0942 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
0943
0944
0945
0946
0947
0948 if (is_guest_mode(vcpu))
0949 svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
0950 }
0951
0952 static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
0953 {
0954
0955
0956
0957
0958
0959
0960
0961 struct vmcb *vmcb =
0962 (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
0963 svm->vmcb : svm->vmcb01.ptr;
0964
0965 switch (index) {
0966 case MSR_IA32_DEBUGCTLMSR:
0967 return vmcb->save.dbgctl;
0968 case MSR_IA32_LASTBRANCHFROMIP:
0969 return vmcb->save.br_from;
0970 case MSR_IA32_LASTBRANCHTOIP:
0971 return vmcb->save.br_to;
0972 case MSR_IA32_LASTINTFROMIP:
0973 return vmcb->save.last_excp_from;
0974 case MSR_IA32_LASTINTTOIP:
0975 return vmcb->save.last_excp_to;
0976 default:
0977 KVM_BUG(false, svm->vcpu.kvm,
0978 "%s: Unknown MSR 0x%x", __func__, index);
0979 return 0;
0980 }
0981 }
0982
0983 void svm_update_lbrv(struct kvm_vcpu *vcpu)
0984 {
0985 struct vcpu_svm *svm = to_svm(vcpu);
0986
0987 bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
0988 DEBUGCTLMSR_LBR;
0989
0990 bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
0991 LBR_CTL_ENABLE_MASK);
0992
0993 if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
0994 if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
0995 enable_lbrv = true;
0996
0997 if (enable_lbrv == current_enable_lbrv)
0998 return;
0999
1000 if (enable_lbrv)
1001 svm_enable_lbrv(vcpu);
1002 else
1003 svm_disable_lbrv(vcpu);
1004 }
1005
1006 void disable_nmi_singlestep(struct vcpu_svm *svm)
1007 {
1008 svm->nmi_singlestep = false;
1009
1010 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1011
1012 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1013 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1014 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1015 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1016 }
1017 }
1018
1019 static void grow_ple_window(struct kvm_vcpu *vcpu)
1020 {
1021 struct vcpu_svm *svm = to_svm(vcpu);
1022 struct vmcb_control_area *control = &svm->vmcb->control;
1023 int old = control->pause_filter_count;
1024
1025 if (kvm_pause_in_guest(vcpu->kvm))
1026 return;
1027
1028 control->pause_filter_count = __grow_ple_window(old,
1029 pause_filter_count,
1030 pause_filter_count_grow,
1031 pause_filter_count_max);
1032
1033 if (control->pause_filter_count != old) {
1034 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1035 trace_kvm_ple_window_update(vcpu->vcpu_id,
1036 control->pause_filter_count, old);
1037 }
1038 }
1039
1040 static void shrink_ple_window(struct kvm_vcpu *vcpu)
1041 {
1042 struct vcpu_svm *svm = to_svm(vcpu);
1043 struct vmcb_control_area *control = &svm->vmcb->control;
1044 int old = control->pause_filter_count;
1045
1046 if (kvm_pause_in_guest(vcpu->kvm))
1047 return;
1048
1049 control->pause_filter_count =
1050 __shrink_ple_window(old,
1051 pause_filter_count,
1052 pause_filter_count_shrink,
1053 pause_filter_count);
1054 if (control->pause_filter_count != old) {
1055 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1056 trace_kvm_ple_window_update(vcpu->vcpu_id,
1057 control->pause_filter_count, old);
1058 }
1059 }
1060
1061 static void svm_hardware_unsetup(void)
1062 {
1063 int cpu;
1064
1065 sev_hardware_unsetup();
1066
1067 for_each_possible_cpu(cpu)
1068 svm_cpu_uninit(cpu);
1069
1070 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT),
1071 get_order(IOPM_SIZE));
1072 iopm_base = 0;
1073 }
1074
1075 static void init_seg(struct vmcb_seg *seg)
1076 {
1077 seg->selector = 0;
1078 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1079 SVM_SELECTOR_WRITE_MASK;
1080 seg->limit = 0xffff;
1081 seg->base = 0;
1082 }
1083
1084 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1085 {
1086 seg->selector = 0;
1087 seg->attrib = SVM_SELECTOR_P_MASK | type;
1088 seg->limit = 0xffff;
1089 seg->base = 0;
1090 }
1091
1092 static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
1093 {
1094 struct vcpu_svm *svm = to_svm(vcpu);
1095
1096 return svm->nested.ctl.tsc_offset;
1097 }
1098
1099 static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
1100 {
1101 struct vcpu_svm *svm = to_svm(vcpu);
1102
1103 return svm->tsc_ratio_msr;
1104 }
1105
1106 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1107 {
1108 struct vcpu_svm *svm = to_svm(vcpu);
1109
1110 svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
1111 svm->vmcb->control.tsc_offset = offset;
1112 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1113 }
1114
1115 static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
1116 {
1117 __svm_write_tsc_multiplier(multiplier);
1118 }
1119
1120
1121
1122 static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
1123 struct vcpu_svm *svm)
1124 {
1125
1126
1127
1128
1129 if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
1130 if (!npt_enabled ||
1131 !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
1132 svm_set_intercept(svm, INTERCEPT_INVPCID);
1133 else
1134 svm_clr_intercept(svm, INTERCEPT_INVPCID);
1135 }
1136
1137 if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
1138 if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
1139 svm_clr_intercept(svm, INTERCEPT_RDTSCP);
1140 else
1141 svm_set_intercept(svm, INTERCEPT_RDTSCP);
1142 }
1143 }
1144
1145 static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
1146 {
1147 struct vcpu_svm *svm = to_svm(vcpu);
1148
1149 if (guest_cpuid_is_intel(vcpu)) {
1150
1151
1152
1153
1154
1155 svm_set_intercept(svm, INTERCEPT_VMLOAD);
1156 svm_set_intercept(svm, INTERCEPT_VMSAVE);
1157 svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1158
1159 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
1160 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
1161
1162 svm->v_vmload_vmsave_enabled = false;
1163 } else {
1164
1165
1166
1167
1168 if (vls) {
1169 svm_clr_intercept(svm, INTERCEPT_VMLOAD);
1170 svm_clr_intercept(svm, INTERCEPT_VMSAVE);
1171 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1172 }
1173
1174 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
1175 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
1176 }
1177 }
1178
1179 static void init_vmcb(struct kvm_vcpu *vcpu)
1180 {
1181 struct vcpu_svm *svm = to_svm(vcpu);
1182 struct vmcb *vmcb = svm->vmcb01.ptr;
1183 struct vmcb_control_area *control = &vmcb->control;
1184 struct vmcb_save_area *save = &vmcb->save;
1185
1186 svm_set_intercept(svm, INTERCEPT_CR0_READ);
1187 svm_set_intercept(svm, INTERCEPT_CR3_READ);
1188 svm_set_intercept(svm, INTERCEPT_CR4_READ);
1189 svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
1190 svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
1191 svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
1192 if (!kvm_vcpu_apicv_active(vcpu))
1193 svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
1194
1195 set_dr_intercepts(svm);
1196
1197 set_exception_intercept(svm, PF_VECTOR);
1198 set_exception_intercept(svm, UD_VECTOR);
1199 set_exception_intercept(svm, MC_VECTOR);
1200 set_exception_intercept(svm, AC_VECTOR);
1201 set_exception_intercept(svm, DB_VECTOR);
1202
1203
1204
1205
1206
1207
1208
1209 if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
1210 set_exception_intercept(svm, GP_VECTOR);
1211
1212 svm_set_intercept(svm, INTERCEPT_INTR);
1213 svm_set_intercept(svm, INTERCEPT_NMI);
1214
1215 if (intercept_smi)
1216 svm_set_intercept(svm, INTERCEPT_SMI);
1217
1218 svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1219 svm_set_intercept(svm, INTERCEPT_RDPMC);
1220 svm_set_intercept(svm, INTERCEPT_CPUID);
1221 svm_set_intercept(svm, INTERCEPT_INVD);
1222 svm_set_intercept(svm, INTERCEPT_INVLPG);
1223 svm_set_intercept(svm, INTERCEPT_INVLPGA);
1224 svm_set_intercept(svm, INTERCEPT_IOIO_PROT);
1225 svm_set_intercept(svm, INTERCEPT_MSR_PROT);
1226 svm_set_intercept(svm, INTERCEPT_TASK_SWITCH);
1227 svm_set_intercept(svm, INTERCEPT_SHUTDOWN);
1228 svm_set_intercept(svm, INTERCEPT_VMRUN);
1229 svm_set_intercept(svm, INTERCEPT_VMMCALL);
1230 svm_set_intercept(svm, INTERCEPT_VMLOAD);
1231 svm_set_intercept(svm, INTERCEPT_VMSAVE);
1232 svm_set_intercept(svm, INTERCEPT_STGI);
1233 svm_set_intercept(svm, INTERCEPT_CLGI);
1234 svm_set_intercept(svm, INTERCEPT_SKINIT);
1235 svm_set_intercept(svm, INTERCEPT_WBINVD);
1236 svm_set_intercept(svm, INTERCEPT_XSETBV);
1237 svm_set_intercept(svm, INTERCEPT_RDPRU);
1238 svm_set_intercept(svm, INTERCEPT_RSM);
1239
1240 if (!kvm_mwait_in_guest(vcpu->kvm)) {
1241 svm_set_intercept(svm, INTERCEPT_MONITOR);
1242 svm_set_intercept(svm, INTERCEPT_MWAIT);
1243 }
1244
1245 if (!kvm_hlt_in_guest(vcpu->kvm))
1246 svm_set_intercept(svm, INTERCEPT_HLT);
1247
1248 control->iopm_base_pa = __sme_set(iopm_base);
1249 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1250 control->int_ctl = V_INTR_MASKING_MASK;
1251
1252 init_seg(&save->es);
1253 init_seg(&save->ss);
1254 init_seg(&save->ds);
1255 init_seg(&save->fs);
1256 init_seg(&save->gs);
1257
1258 save->cs.selector = 0xf000;
1259 save->cs.base = 0xffff0000;
1260
1261 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1262 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1263 save->cs.limit = 0xffff;
1264
1265 save->gdtr.base = 0;
1266 save->gdtr.limit = 0xffff;
1267 save->idtr.base = 0;
1268 save->idtr.limit = 0xffff;
1269
1270 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1271 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1272
1273 if (npt_enabled) {
1274
1275 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1276 svm_clr_intercept(svm, INTERCEPT_INVLPG);
1277 clr_exception_intercept(svm, PF_VECTOR);
1278 svm_clr_intercept(svm, INTERCEPT_CR3_READ);
1279 svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
1280 save->g_pat = vcpu->arch.pat;
1281 save->cr3 = 0;
1282 }
1283 svm->current_vmcb->asid_generation = 0;
1284 svm->asid = 0;
1285
1286 svm->nested.vmcb12_gpa = INVALID_GPA;
1287 svm->nested.last_vmcb12_gpa = INVALID_GPA;
1288
1289 if (!kvm_pause_in_guest(vcpu->kvm)) {
1290 control->pause_filter_count = pause_filter_count;
1291 if (pause_filter_thresh)
1292 control->pause_filter_thresh = pause_filter_thresh;
1293 svm_set_intercept(svm, INTERCEPT_PAUSE);
1294 } else {
1295 svm_clr_intercept(svm, INTERCEPT_PAUSE);
1296 }
1297
1298 svm_recalc_instruction_intercepts(vcpu, svm);
1299
1300
1301
1302
1303
1304 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
1305 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
1306
1307 if (kvm_vcpu_apicv_active(vcpu))
1308 avic_init_vmcb(svm, vmcb);
1309
1310 if (vgif) {
1311 svm_clr_intercept(svm, INTERCEPT_STGI);
1312 svm_clr_intercept(svm, INTERCEPT_CLGI);
1313 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1314 }
1315
1316 if (sev_guest(vcpu->kvm))
1317 sev_init_vmcb(svm);
1318
1319 svm_hv_init_vmcb(vmcb);
1320 init_vmcb_after_set_cpuid(vcpu);
1321
1322 vmcb_mark_all_dirty(vmcb);
1323
1324 enable_gif(svm);
1325 }
1326
1327 static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
1328 {
1329 struct vcpu_svm *svm = to_svm(vcpu);
1330
1331 svm_vcpu_init_msrpm(vcpu, svm->msrpm);
1332
1333 svm_init_osvw(vcpu);
1334 vcpu->arch.microcode_version = 0x01000065;
1335 svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
1336
1337 if (sev_es_guest(vcpu->kvm))
1338 sev_es_vcpu_reset(svm);
1339 }
1340
1341 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1342 {
1343 struct vcpu_svm *svm = to_svm(vcpu);
1344
1345 svm->spec_ctrl = 0;
1346 svm->virt_spec_ctrl = 0;
1347
1348 init_vmcb(vcpu);
1349
1350 if (!init_event)
1351 __svm_vcpu_reset(vcpu);
1352 }
1353
1354 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
1355 {
1356 svm->current_vmcb = target_vmcb;
1357 svm->vmcb = target_vmcb->ptr;
1358 }
1359
1360 static int svm_vcpu_create(struct kvm_vcpu *vcpu)
1361 {
1362 struct vcpu_svm *svm;
1363 struct page *vmcb01_page;
1364 struct page *vmsa_page = NULL;
1365 int err;
1366
1367 BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
1368 svm = to_svm(vcpu);
1369
1370 err = -ENOMEM;
1371 vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1372 if (!vmcb01_page)
1373 goto out;
1374
1375 if (sev_es_guest(vcpu->kvm)) {
1376
1377
1378
1379
1380 vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1381 if (!vmsa_page)
1382 goto error_free_vmcb_page;
1383
1384
1385
1386
1387
1388
1389
1390 fpstate_set_confidential(&vcpu->arch.guest_fpu);
1391 }
1392
1393 err = avic_init_vcpu(svm);
1394 if (err)
1395 goto error_free_vmsa_page;
1396
1397 svm->msrpm = svm_vcpu_alloc_msrpm();
1398 if (!svm->msrpm) {
1399 err = -ENOMEM;
1400 goto error_free_vmsa_page;
1401 }
1402
1403 svm->x2avic_msrs_intercepted = true;
1404
1405 svm->vmcb01.ptr = page_address(vmcb01_page);
1406 svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
1407 svm_switch_vmcb(svm, &svm->vmcb01);
1408
1409 if (vmsa_page)
1410 svm->sev_es.vmsa = page_address(vmsa_page);
1411
1412 svm->guest_state_loaded = false;
1413
1414 return 0;
1415
1416 error_free_vmsa_page:
1417 if (vmsa_page)
1418 __free_page(vmsa_page);
1419 error_free_vmcb_page:
1420 __free_page(vmcb01_page);
1421 out:
1422 return err;
1423 }
1424
1425 static void svm_clear_current_vmcb(struct vmcb *vmcb)
1426 {
1427 int i;
1428
1429 for_each_online_cpu(i)
1430 cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
1431 }
1432
1433 static void svm_vcpu_free(struct kvm_vcpu *vcpu)
1434 {
1435 struct vcpu_svm *svm = to_svm(vcpu);
1436
1437
1438
1439
1440
1441
1442 svm_clear_current_vmcb(svm->vmcb);
1443
1444 svm_free_nested(svm);
1445
1446 sev_free_vcpu(vcpu);
1447
1448 __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
1449 __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
1450 }
1451
1452 static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1453 {
1454 struct vcpu_svm *svm = to_svm(vcpu);
1455 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
1456
1457 if (sev_es_guest(vcpu->kvm))
1458 sev_es_unmap_ghcb(svm);
1459
1460 if (svm->guest_state_loaded)
1461 return;
1462
1463
1464
1465
1466
1467 vmsave(__sme_page_pa(sd->save_area));
1468 if (sev_es_guest(vcpu->kvm)) {
1469 struct sev_es_save_area *hostsa;
1470 hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
1471
1472 sev_es_prepare_switch_to_guest(hostsa);
1473 }
1474
1475 if (tsc_scaling)
1476 __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
1477
1478 if (likely(tsc_aux_uret_slot >= 0))
1479 kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
1480
1481 svm->guest_state_loaded = true;
1482 }
1483
1484 static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
1485 {
1486 to_svm(vcpu)->guest_state_loaded = false;
1487 }
1488
1489 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1490 {
1491 struct vcpu_svm *svm = to_svm(vcpu);
1492 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1493
1494 if (sd->current_vmcb != svm->vmcb) {
1495 sd->current_vmcb = svm->vmcb;
1496 indirect_branch_prediction_barrier();
1497 }
1498 if (kvm_vcpu_apicv_active(vcpu))
1499 avic_vcpu_load(vcpu, cpu);
1500 }
1501
1502 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1503 {
1504 if (kvm_vcpu_apicv_active(vcpu))
1505 avic_vcpu_put(vcpu);
1506
1507 svm_prepare_host_switch(vcpu);
1508
1509 ++vcpu->stat.host_state_reload;
1510 }
1511
1512 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1513 {
1514 struct vcpu_svm *svm = to_svm(vcpu);
1515 unsigned long rflags = svm->vmcb->save.rflags;
1516
1517 if (svm->nmi_singlestep) {
1518
1519 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1520 rflags &= ~X86_EFLAGS_TF;
1521 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1522 rflags &= ~X86_EFLAGS_RF;
1523 }
1524 return rflags;
1525 }
1526
1527 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1528 {
1529 if (to_svm(vcpu)->nmi_singlestep)
1530 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
1531
1532
1533
1534
1535
1536
1537 to_svm(vcpu)->vmcb->save.rflags = rflags;
1538 }
1539
1540 static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
1541 {
1542 struct vmcb *vmcb = to_svm(vcpu)->vmcb;
1543
1544 return sev_es_guest(vcpu->kvm)
1545 ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
1546 : kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
1547 }
1548
1549 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1550 {
1551 kvm_register_mark_available(vcpu, reg);
1552
1553 switch (reg) {
1554 case VCPU_EXREG_PDPTR:
1555
1556
1557
1558
1559 if (npt_enabled)
1560 load_pdptrs(vcpu, kvm_read_cr3(vcpu));
1561 break;
1562 default:
1563 KVM_BUG_ON(1, vcpu->kvm);
1564 }
1565 }
1566
1567 static void svm_set_vintr(struct vcpu_svm *svm)
1568 {
1569 struct vmcb_control_area *control;
1570
1571
1572
1573
1574 WARN_ON(kvm_vcpu_apicv_activated(&svm->vcpu));
1575
1576 svm_set_intercept(svm, INTERCEPT_VINTR);
1577
1578
1579
1580
1581
1582 control = &svm->vmcb->control;
1583 control->int_vector = 0x0;
1584 control->int_ctl &= ~V_INTR_PRIO_MASK;
1585 control->int_ctl |= V_IRQ_MASK |
1586 (( 0xf) << V_INTR_PRIO_SHIFT);
1587 vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
1588 }
1589
1590 static void svm_clear_vintr(struct vcpu_svm *svm)
1591 {
1592 svm_clr_intercept(svm, INTERCEPT_VINTR);
1593
1594
1595 svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
1596 if (is_guest_mode(&svm->vcpu)) {
1597 svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
1598
1599 WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
1600 (svm->nested.ctl.int_ctl & V_TPR_MASK));
1601
1602 svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
1603 V_IRQ_INJECTION_BITS_MASK;
1604
1605 svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
1606 }
1607
1608 vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
1609 }
1610
1611 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1612 {
1613 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1614 struct vmcb_save_area *save01 = &to_svm(vcpu)->vmcb01.ptr->save;
1615
1616 switch (seg) {
1617 case VCPU_SREG_CS: return &save->cs;
1618 case VCPU_SREG_DS: return &save->ds;
1619 case VCPU_SREG_ES: return &save->es;
1620 case VCPU_SREG_FS: return &save01->fs;
1621 case VCPU_SREG_GS: return &save01->gs;
1622 case VCPU_SREG_SS: return &save->ss;
1623 case VCPU_SREG_TR: return &save01->tr;
1624 case VCPU_SREG_LDTR: return &save01->ldtr;
1625 }
1626 BUG();
1627 return NULL;
1628 }
1629
1630 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1631 {
1632 struct vmcb_seg *s = svm_seg(vcpu, seg);
1633
1634 return s->base;
1635 }
1636
1637 static void svm_get_segment(struct kvm_vcpu *vcpu,
1638 struct kvm_segment *var, int seg)
1639 {
1640 struct vmcb_seg *s = svm_seg(vcpu, seg);
1641
1642 var->base = s->base;
1643 var->limit = s->limit;
1644 var->selector = s->selector;
1645 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1646 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1647 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1648 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1649 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1650 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1651 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661 var->g = s->limit > 0xfffff;
1662
1663
1664
1665
1666
1667 var->unusable = !var->present;
1668
1669 switch (seg) {
1670 case VCPU_SREG_TR:
1671
1672
1673
1674
1675 var->type |= 0x2;
1676 break;
1677 case VCPU_SREG_DS:
1678 case VCPU_SREG_ES:
1679 case VCPU_SREG_FS:
1680 case VCPU_SREG_GS:
1681
1682
1683
1684
1685
1686
1687
1688 if (!var->unusable)
1689 var->type |= 0x1;
1690 break;
1691 case VCPU_SREG_SS:
1692
1693
1694
1695
1696
1697
1698 if (var->unusable)
1699 var->db = 0;
1700
1701 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
1702 break;
1703 }
1704 }
1705
1706 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1707 {
1708 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1709
1710 return save->cpl;
1711 }
1712
1713 static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
1714 {
1715 struct kvm_segment cs;
1716
1717 svm_get_segment(vcpu, &cs, VCPU_SREG_CS);
1718 *db = cs.db;
1719 *l = cs.l;
1720 }
1721
1722 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1723 {
1724 struct vcpu_svm *svm = to_svm(vcpu);
1725
1726 dt->size = svm->vmcb->save.idtr.limit;
1727 dt->address = svm->vmcb->save.idtr.base;
1728 }
1729
1730 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1731 {
1732 struct vcpu_svm *svm = to_svm(vcpu);
1733
1734 svm->vmcb->save.idtr.limit = dt->size;
1735 svm->vmcb->save.idtr.base = dt->address ;
1736 vmcb_mark_dirty(svm->vmcb, VMCB_DT);
1737 }
1738
1739 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1740 {
1741 struct vcpu_svm *svm = to_svm(vcpu);
1742
1743 dt->size = svm->vmcb->save.gdtr.limit;
1744 dt->address = svm->vmcb->save.gdtr.base;
1745 }
1746
1747 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1748 {
1749 struct vcpu_svm *svm = to_svm(vcpu);
1750
1751 svm->vmcb->save.gdtr.limit = dt->size;
1752 svm->vmcb->save.gdtr.base = dt->address ;
1753 vmcb_mark_dirty(svm->vmcb, VMCB_DT);
1754 }
1755
1756 static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1757 {
1758 struct vcpu_svm *svm = to_svm(vcpu);
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 if (sev_es_guest(vcpu->kvm)) {
1769 svm->vmcb->save.cr3 = cr3;
1770 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
1771 }
1772 }
1773
1774 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1775 {
1776 struct vcpu_svm *svm = to_svm(vcpu);
1777 u64 hcr0 = cr0;
1778 bool old_paging = is_paging(vcpu);
1779
1780 #ifdef CONFIG_X86_64
1781 if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
1782 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1783 vcpu->arch.efer |= EFER_LMA;
1784 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1785 }
1786
1787 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1788 vcpu->arch.efer &= ~EFER_LMA;
1789 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1790 }
1791 }
1792 #endif
1793 vcpu->arch.cr0 = cr0;
1794
1795 if (!npt_enabled) {
1796 hcr0 |= X86_CR0_PG | X86_CR0_WP;
1797 if (old_paging != is_paging(vcpu))
1798 svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
1799 }
1800
1801
1802
1803
1804
1805
1806 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1807 hcr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1808
1809 svm->vmcb->save.cr0 = hcr0;
1810 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
1811
1812
1813
1814
1815
1816 if (sev_es_guest(vcpu->kvm))
1817 return;
1818
1819 if (hcr0 == cr0) {
1820
1821 svm_clr_intercept(svm, INTERCEPT_CR0_READ);
1822 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
1823 } else {
1824 svm_set_intercept(svm, INTERCEPT_CR0_READ);
1825 svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
1826 }
1827 }
1828
1829 static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1830 {
1831 return true;
1832 }
1833
1834 void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1835 {
1836 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
1837 unsigned long old_cr4 = vcpu->arch.cr4;
1838
1839 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1840 svm_flush_tlb_current(vcpu);
1841
1842 vcpu->arch.cr4 = cr4;
1843 if (!npt_enabled) {
1844 cr4 |= X86_CR4_PAE;
1845
1846 if (!is_paging(vcpu))
1847 cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
1848 }
1849 cr4 |= host_cr4_mce;
1850 to_svm(vcpu)->vmcb->save.cr4 = cr4;
1851 vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1852
1853 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
1854 kvm_update_cpuid_runtime(vcpu);
1855 }
1856
1857 static void svm_set_segment(struct kvm_vcpu *vcpu,
1858 struct kvm_segment *var, int seg)
1859 {
1860 struct vcpu_svm *svm = to_svm(vcpu);
1861 struct vmcb_seg *s = svm_seg(vcpu, seg);
1862
1863 s->base = var->base;
1864 s->limit = var->limit;
1865 s->selector = var->selector;
1866 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1867 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1868 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1869 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
1870 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1871 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1872 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1873 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1874
1875
1876
1877
1878
1879
1880
1881 if (seg == VCPU_SREG_SS)
1882
1883 svm->vmcb->save.cpl = (var->dpl & 3);
1884
1885 vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
1886 }
1887
1888 static void svm_update_exception_bitmap(struct kvm_vcpu *vcpu)
1889 {
1890 struct vcpu_svm *svm = to_svm(vcpu);
1891
1892 clr_exception_intercept(svm, BP_VECTOR);
1893
1894 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1895 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1896 set_exception_intercept(svm, BP_VECTOR);
1897 }
1898 }
1899
1900 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1901 {
1902 if (sd->next_asid > sd->max_asid) {
1903 ++sd->asid_generation;
1904 sd->next_asid = sd->min_asid;
1905 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1906 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1907 }
1908
1909 svm->current_vmcb->asid_generation = sd->asid_generation;
1910 svm->asid = sd->next_asid++;
1911 }
1912
1913 static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
1914 {
1915 struct vmcb *vmcb = svm->vmcb;
1916
1917 if (svm->vcpu.arch.guest_state_protected)
1918 return;
1919
1920 if (unlikely(value != vmcb->save.dr6)) {
1921 vmcb->save.dr6 = value;
1922 vmcb_mark_dirty(vmcb, VMCB_DR);
1923 }
1924 }
1925
1926 static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1927 {
1928 struct vcpu_svm *svm = to_svm(vcpu);
1929
1930 if (vcpu->arch.guest_state_protected)
1931 return;
1932
1933 get_debugreg(vcpu->arch.db[0], 0);
1934 get_debugreg(vcpu->arch.db[1], 1);
1935 get_debugreg(vcpu->arch.db[2], 2);
1936 get_debugreg(vcpu->arch.db[3], 3);
1937
1938
1939
1940
1941 vcpu->arch.dr6 = svm->vmcb->save.dr6;
1942 vcpu->arch.dr7 = svm->vmcb->save.dr7;
1943 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1944 set_dr_intercepts(svm);
1945 }
1946
1947 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1948 {
1949 struct vcpu_svm *svm = to_svm(vcpu);
1950
1951 if (vcpu->arch.guest_state_protected)
1952 return;
1953
1954 svm->vmcb->save.dr7 = value;
1955 vmcb_mark_dirty(svm->vmcb, VMCB_DR);
1956 }
1957
1958 static int pf_interception(struct kvm_vcpu *vcpu)
1959 {
1960 struct vcpu_svm *svm = to_svm(vcpu);
1961
1962 u64 fault_address = svm->vmcb->control.exit_info_2;
1963 u64 error_code = svm->vmcb->control.exit_info_1;
1964
1965 return kvm_handle_page_fault(vcpu, error_code, fault_address,
1966 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1967 svm->vmcb->control.insn_bytes : NULL,
1968 svm->vmcb->control.insn_len);
1969 }
1970
1971 static int npf_interception(struct kvm_vcpu *vcpu)
1972 {
1973 struct vcpu_svm *svm = to_svm(vcpu);
1974
1975 u64 fault_address = svm->vmcb->control.exit_info_2;
1976 u64 error_code = svm->vmcb->control.exit_info_1;
1977
1978 trace_kvm_page_fault(fault_address, error_code);
1979 return kvm_mmu_page_fault(vcpu, fault_address, error_code,
1980 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1981 svm->vmcb->control.insn_bytes : NULL,
1982 svm->vmcb->control.insn_len);
1983 }
1984
1985 static int db_interception(struct kvm_vcpu *vcpu)
1986 {
1987 struct kvm_run *kvm_run = vcpu->run;
1988 struct vcpu_svm *svm = to_svm(vcpu);
1989
1990 if (!(vcpu->guest_debug &
1991 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1992 !svm->nmi_singlestep) {
1993 u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW;
1994 kvm_queue_exception_p(vcpu, DB_VECTOR, payload);
1995 return 1;
1996 }
1997
1998 if (svm->nmi_singlestep) {
1999 disable_nmi_singlestep(svm);
2000
2001 kvm_make_request(KVM_REQ_EVENT, vcpu);
2002 }
2003
2004 if (vcpu->guest_debug &
2005 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
2006 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2007 kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
2008 kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
2009 kvm_run->debug.arch.pc =
2010 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2011 kvm_run->debug.arch.exception = DB_VECTOR;
2012 return 0;
2013 }
2014
2015 return 1;
2016 }
2017
2018 static int bp_interception(struct kvm_vcpu *vcpu)
2019 {
2020 struct vcpu_svm *svm = to_svm(vcpu);
2021 struct kvm_run *kvm_run = vcpu->run;
2022
2023 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2024 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2025 kvm_run->debug.arch.exception = BP_VECTOR;
2026 return 0;
2027 }
2028
2029 static int ud_interception(struct kvm_vcpu *vcpu)
2030 {
2031 return handle_ud(vcpu);
2032 }
2033
2034 static int ac_interception(struct kvm_vcpu *vcpu)
2035 {
2036 kvm_queue_exception_e(vcpu, AC_VECTOR, 0);
2037 return 1;
2038 }
2039
2040 static bool is_erratum_383(void)
2041 {
2042 int err, i;
2043 u64 value;
2044
2045 if (!erratum_383_found)
2046 return false;
2047
2048 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2049 if (err)
2050 return false;
2051
2052
2053 value &= ~(1ULL << 62);
2054
2055 if (value != 0xb600000000010015ULL)
2056 return false;
2057
2058
2059 for (i = 0; i < 6; ++i)
2060 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2061
2062 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2063 if (!err) {
2064 u32 low, high;
2065
2066 value &= ~(1ULL << 2);
2067 low = lower_32_bits(value);
2068 high = upper_32_bits(value);
2069
2070 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2071 }
2072
2073
2074 __flush_tlb_all();
2075
2076 return true;
2077 }
2078
2079 static void svm_handle_mce(struct kvm_vcpu *vcpu)
2080 {
2081 if (is_erratum_383()) {
2082
2083
2084
2085
2086 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2087
2088 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2089
2090 return;
2091 }
2092
2093
2094
2095
2096
2097 kvm_machine_check();
2098 }
2099
2100 static int mc_interception(struct kvm_vcpu *vcpu)
2101 {
2102 return 1;
2103 }
2104
2105 static int shutdown_interception(struct kvm_vcpu *vcpu)
2106 {
2107 struct kvm_run *kvm_run = vcpu->run;
2108 struct vcpu_svm *svm = to_svm(vcpu);
2109
2110
2111
2112
2113
2114 if (sev_es_guest(vcpu->kvm))
2115 return -EINVAL;
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125 clear_page(svm->vmcb);
2126 kvm_vcpu_reset(vcpu, true);
2127
2128 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2129 return 0;
2130 }
2131
2132 static int io_interception(struct kvm_vcpu *vcpu)
2133 {
2134 struct vcpu_svm *svm = to_svm(vcpu);
2135 u32 io_info = svm->vmcb->control.exit_info_1;
2136 int size, in, string;
2137 unsigned port;
2138
2139 ++vcpu->stat.io_exits;
2140 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2141 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2142 port = io_info >> 16;
2143 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2144
2145 if (string) {
2146 if (sev_es_guest(vcpu->kvm))
2147 return sev_es_string_io(svm, size, port, in);
2148 else
2149 return kvm_emulate_instruction(vcpu, 0);
2150 }
2151
2152 svm->next_rip = svm->vmcb->control.exit_info_2;
2153
2154 return kvm_fast_pio(vcpu, size, port, in);
2155 }
2156
2157 static int nmi_interception(struct kvm_vcpu *vcpu)
2158 {
2159 return 1;
2160 }
2161
2162 static int smi_interception(struct kvm_vcpu *vcpu)
2163 {
2164 return 1;
2165 }
2166
2167 static int intr_interception(struct kvm_vcpu *vcpu)
2168 {
2169 ++vcpu->stat.irq_exits;
2170 return 1;
2171 }
2172
2173 static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
2174 {
2175 struct vcpu_svm *svm = to_svm(vcpu);
2176 struct vmcb *vmcb12;
2177 struct kvm_host_map map;
2178 int ret;
2179
2180 if (nested_svm_check_permissions(vcpu))
2181 return 1;
2182
2183 ret = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
2184 if (ret) {
2185 if (ret == -EINVAL)
2186 kvm_inject_gp(vcpu, 0);
2187 return 1;
2188 }
2189
2190 vmcb12 = map.hva;
2191
2192 ret = kvm_skip_emulated_instruction(vcpu);
2193
2194 if (vmload) {
2195 svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
2196 svm->sysenter_eip_hi = 0;
2197 svm->sysenter_esp_hi = 0;
2198 } else {
2199 svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
2200 }
2201
2202 kvm_vcpu_unmap(vcpu, &map, true);
2203
2204 return ret;
2205 }
2206
2207 static int vmload_interception(struct kvm_vcpu *vcpu)
2208 {
2209 return vmload_vmsave_interception(vcpu, true);
2210 }
2211
2212 static int vmsave_interception(struct kvm_vcpu *vcpu)
2213 {
2214 return vmload_vmsave_interception(vcpu, false);
2215 }
2216
2217 static int vmrun_interception(struct kvm_vcpu *vcpu)
2218 {
2219 if (nested_svm_check_permissions(vcpu))
2220 return 1;
2221
2222 return nested_svm_vmrun(vcpu);
2223 }
2224
2225 enum {
2226 NONE_SVM_INSTR,
2227 SVM_INSTR_VMRUN,
2228 SVM_INSTR_VMLOAD,
2229 SVM_INSTR_VMSAVE,
2230 };
2231
2232
2233 static int svm_instr_opcode(struct kvm_vcpu *vcpu)
2234 {
2235 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
2236
2237 if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
2238 return NONE_SVM_INSTR;
2239
2240 switch (ctxt->modrm) {
2241 case 0xd8:
2242 return SVM_INSTR_VMRUN;
2243 case 0xda:
2244 return SVM_INSTR_VMLOAD;
2245 case 0xdb:
2246 return SVM_INSTR_VMSAVE;
2247 default:
2248 break;
2249 }
2250
2251 return NONE_SVM_INSTR;
2252 }
2253
2254 static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
2255 {
2256 const int guest_mode_exit_codes[] = {
2257 [SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
2258 [SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
2259 [SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
2260 };
2261 int (*const svm_instr_handlers[])(struct kvm_vcpu *vcpu) = {
2262 [SVM_INSTR_VMRUN] = vmrun_interception,
2263 [SVM_INSTR_VMLOAD] = vmload_interception,
2264 [SVM_INSTR_VMSAVE] = vmsave_interception,
2265 };
2266 struct vcpu_svm *svm = to_svm(vcpu);
2267 int ret;
2268
2269 if (is_guest_mode(vcpu)) {
2270
2271 ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
2272 if (ret)
2273 return ret;
2274 return 1;
2275 }
2276 return svm_instr_handlers[opcode](vcpu);
2277 }
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287 static int gp_interception(struct kvm_vcpu *vcpu)
2288 {
2289 struct vcpu_svm *svm = to_svm(vcpu);
2290 u32 error_code = svm->vmcb->control.exit_info_1;
2291 int opcode;
2292
2293
2294 if (error_code)
2295 goto reinject;
2296
2297
2298 if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
2299 goto reinject;
2300
2301 opcode = svm_instr_opcode(vcpu);
2302
2303 if (opcode == NONE_SVM_INSTR) {
2304 if (!enable_vmware_backdoor)
2305 goto reinject;
2306
2307
2308
2309
2310
2311 if (!is_guest_mode(vcpu))
2312 return kvm_emulate_instruction(vcpu,
2313 EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
2314 } else {
2315
2316 if (svm->vmcb->save.rax & ~PAGE_MASK)
2317 goto reinject;
2318
2319 return emulate_svm_instr(vcpu, opcode);
2320 }
2321
2322 reinject:
2323 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2324 return 1;
2325 }
2326
2327 void svm_set_gif(struct vcpu_svm *svm, bool value)
2328 {
2329 if (value) {
2330
2331
2332
2333
2334
2335
2336 if (vgif)
2337 svm_clr_intercept(svm, INTERCEPT_STGI);
2338 if (svm_is_intercept(svm, INTERCEPT_VINTR))
2339 svm_clear_vintr(svm);
2340
2341 enable_gif(svm);
2342 if (svm->vcpu.arch.smi_pending ||
2343 svm->vcpu.arch.nmi_pending ||
2344 kvm_cpu_has_injectable_intr(&svm->vcpu))
2345 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2346 } else {
2347 disable_gif(svm);
2348
2349
2350
2351
2352
2353
2354 if (!vgif)
2355 svm_clear_vintr(svm);
2356 }
2357 }
2358
2359 static int stgi_interception(struct kvm_vcpu *vcpu)
2360 {
2361 int ret;
2362
2363 if (nested_svm_check_permissions(vcpu))
2364 return 1;
2365
2366 ret = kvm_skip_emulated_instruction(vcpu);
2367 svm_set_gif(to_svm(vcpu), true);
2368 return ret;
2369 }
2370
2371 static int clgi_interception(struct kvm_vcpu *vcpu)
2372 {
2373 int ret;
2374
2375 if (nested_svm_check_permissions(vcpu))
2376 return 1;
2377
2378 ret = kvm_skip_emulated_instruction(vcpu);
2379 svm_set_gif(to_svm(vcpu), false);
2380 return ret;
2381 }
2382
2383 static int invlpga_interception(struct kvm_vcpu *vcpu)
2384 {
2385 gva_t gva = kvm_rax_read(vcpu);
2386 u32 asid = kvm_rcx_read(vcpu);
2387
2388
2389 if (!is_long_mode(vcpu))
2390 gva = (u32)gva;
2391
2392 trace_kvm_invlpga(to_svm(vcpu)->vmcb->save.rip, asid, gva);
2393
2394
2395 kvm_mmu_invlpg(vcpu, gva);
2396
2397 return kvm_skip_emulated_instruction(vcpu);
2398 }
2399
2400 static int skinit_interception(struct kvm_vcpu *vcpu)
2401 {
2402 trace_kvm_skinit(to_svm(vcpu)->vmcb->save.rip, kvm_rax_read(vcpu));
2403
2404 kvm_queue_exception(vcpu, UD_VECTOR);
2405 return 1;
2406 }
2407
2408 static int task_switch_interception(struct kvm_vcpu *vcpu)
2409 {
2410 struct vcpu_svm *svm = to_svm(vcpu);
2411 u16 tss_selector;
2412 int reason;
2413 int int_type = svm->vmcb->control.exit_int_info &
2414 SVM_EXITINTINFO_TYPE_MASK;
2415 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2416 uint32_t type =
2417 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2418 uint32_t idt_v =
2419 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2420 bool has_error_code = false;
2421 u32 error_code = 0;
2422
2423 tss_selector = (u16)svm->vmcb->control.exit_info_1;
2424
2425 if (svm->vmcb->control.exit_info_2 &
2426 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2427 reason = TASK_SWITCH_IRET;
2428 else if (svm->vmcb->control.exit_info_2 &
2429 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2430 reason = TASK_SWITCH_JMP;
2431 else if (idt_v)
2432 reason = TASK_SWITCH_GATE;
2433 else
2434 reason = TASK_SWITCH_CALL;
2435
2436 if (reason == TASK_SWITCH_GATE) {
2437 switch (type) {
2438 case SVM_EXITINTINFO_TYPE_NMI:
2439 vcpu->arch.nmi_injected = false;
2440 break;
2441 case SVM_EXITINTINFO_TYPE_EXEPT:
2442 if (svm->vmcb->control.exit_info_2 &
2443 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2444 has_error_code = true;
2445 error_code =
2446 (u32)svm->vmcb->control.exit_info_2;
2447 }
2448 kvm_clear_exception_queue(vcpu);
2449 break;
2450 case SVM_EXITINTINFO_TYPE_INTR:
2451 case SVM_EXITINTINFO_TYPE_SOFT:
2452 kvm_clear_interrupt_queue(vcpu);
2453 break;
2454 default:
2455 break;
2456 }
2457 }
2458
2459 if (reason != TASK_SWITCH_GATE ||
2460 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2461 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2462 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
2463 if (!svm_skip_emulated_instruction(vcpu))
2464 return 0;
2465 }
2466
2467 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2468 int_vec = -1;
2469
2470 return kvm_task_switch(vcpu, tss_selector, int_vec, reason,
2471 has_error_code, error_code);
2472 }
2473
2474 static int iret_interception(struct kvm_vcpu *vcpu)
2475 {
2476 struct vcpu_svm *svm = to_svm(vcpu);
2477
2478 ++vcpu->stat.nmi_window_exits;
2479 vcpu->arch.hflags |= HF_IRET_MASK;
2480 if (!sev_es_guest(vcpu->kvm)) {
2481 svm_clr_intercept(svm, INTERCEPT_IRET);
2482 svm->nmi_iret_rip = kvm_rip_read(vcpu);
2483 }
2484 kvm_make_request(KVM_REQ_EVENT, vcpu);
2485 return 1;
2486 }
2487
2488 static int invlpg_interception(struct kvm_vcpu *vcpu)
2489 {
2490 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2491 return kvm_emulate_instruction(vcpu, 0);
2492
2493 kvm_mmu_invlpg(vcpu, to_svm(vcpu)->vmcb->control.exit_info_1);
2494 return kvm_skip_emulated_instruction(vcpu);
2495 }
2496
2497 static int emulate_on_interception(struct kvm_vcpu *vcpu)
2498 {
2499 return kvm_emulate_instruction(vcpu, 0);
2500 }
2501
2502 static int rsm_interception(struct kvm_vcpu *vcpu)
2503 {
2504 return kvm_emulate_instruction_from_buffer(vcpu, rsm_ins_bytes, 2);
2505 }
2506
2507 static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
2508 unsigned long val)
2509 {
2510 struct vcpu_svm *svm = to_svm(vcpu);
2511 unsigned long cr0 = vcpu->arch.cr0;
2512 bool ret = false;
2513
2514 if (!is_guest_mode(vcpu) ||
2515 (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
2516 return false;
2517
2518 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2519 val &= ~SVM_CR0_SELECTIVE_MASK;
2520
2521 if (cr0 ^ val) {
2522 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2523 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2524 }
2525
2526 return ret;
2527 }
2528
2529 #define CR_VALID (1ULL << 63)
2530
2531 static int cr_interception(struct kvm_vcpu *vcpu)
2532 {
2533 struct vcpu_svm *svm = to_svm(vcpu);
2534 int reg, cr;
2535 unsigned long val;
2536 int err;
2537
2538 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2539 return emulate_on_interception(vcpu);
2540
2541 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2542 return emulate_on_interception(vcpu);
2543
2544 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2545 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
2546 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
2547 else
2548 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2549
2550 err = 0;
2551 if (cr >= 16) {
2552 cr -= 16;
2553 val = kvm_register_read(vcpu, reg);
2554 trace_kvm_cr_write(cr, val);
2555 switch (cr) {
2556 case 0:
2557 if (!check_selective_cr0_intercepted(vcpu, val))
2558 err = kvm_set_cr0(vcpu, val);
2559 else
2560 return 1;
2561
2562 break;
2563 case 3:
2564 err = kvm_set_cr3(vcpu, val);
2565 break;
2566 case 4:
2567 err = kvm_set_cr4(vcpu, val);
2568 break;
2569 case 8:
2570 err = kvm_set_cr8(vcpu, val);
2571 break;
2572 default:
2573 WARN(1, "unhandled write to CR%d", cr);
2574 kvm_queue_exception(vcpu, UD_VECTOR);
2575 return 1;
2576 }
2577 } else {
2578 switch (cr) {
2579 case 0:
2580 val = kvm_read_cr0(vcpu);
2581 break;
2582 case 2:
2583 val = vcpu->arch.cr2;
2584 break;
2585 case 3:
2586 val = kvm_read_cr3(vcpu);
2587 break;
2588 case 4:
2589 val = kvm_read_cr4(vcpu);
2590 break;
2591 case 8:
2592 val = kvm_get_cr8(vcpu);
2593 break;
2594 default:
2595 WARN(1, "unhandled read from CR%d", cr);
2596 kvm_queue_exception(vcpu, UD_VECTOR);
2597 return 1;
2598 }
2599 kvm_register_write(vcpu, reg, val);
2600 trace_kvm_cr_read(cr, val);
2601 }
2602 return kvm_complete_insn_gp(vcpu, err);
2603 }
2604
2605 static int cr_trap(struct kvm_vcpu *vcpu)
2606 {
2607 struct vcpu_svm *svm = to_svm(vcpu);
2608 unsigned long old_value, new_value;
2609 unsigned int cr;
2610 int ret = 0;
2611
2612 new_value = (unsigned long)svm->vmcb->control.exit_info_1;
2613
2614 cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
2615 switch (cr) {
2616 case 0:
2617 old_value = kvm_read_cr0(vcpu);
2618 svm_set_cr0(vcpu, new_value);
2619
2620 kvm_post_set_cr0(vcpu, old_value, new_value);
2621 break;
2622 case 4:
2623 old_value = kvm_read_cr4(vcpu);
2624 svm_set_cr4(vcpu, new_value);
2625
2626 kvm_post_set_cr4(vcpu, old_value, new_value);
2627 break;
2628 case 8:
2629 ret = kvm_set_cr8(vcpu, new_value);
2630 break;
2631 default:
2632 WARN(1, "unhandled CR%d write trap", cr);
2633 kvm_queue_exception(vcpu, UD_VECTOR);
2634 return 1;
2635 }
2636
2637 return kvm_complete_insn_gp(vcpu, ret);
2638 }
2639
2640 static int dr_interception(struct kvm_vcpu *vcpu)
2641 {
2642 struct vcpu_svm *svm = to_svm(vcpu);
2643 int reg, dr;
2644 unsigned long val;
2645 int err = 0;
2646
2647 if (vcpu->guest_debug == 0) {
2648
2649
2650
2651
2652
2653 clr_dr_intercepts(svm);
2654 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
2655 return 1;
2656 }
2657
2658 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2659 return emulate_on_interception(vcpu);
2660
2661 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2662 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2663 if (dr >= 16) {
2664 dr -= 16;
2665 val = kvm_register_read(vcpu, reg);
2666 err = kvm_set_dr(vcpu, dr, val);
2667 } else {
2668 kvm_get_dr(vcpu, dr, &val);
2669 kvm_register_write(vcpu, reg, val);
2670 }
2671
2672 return kvm_complete_insn_gp(vcpu, err);
2673 }
2674
2675 static int cr8_write_interception(struct kvm_vcpu *vcpu)
2676 {
2677 int r;
2678
2679 u8 cr8_prev = kvm_get_cr8(vcpu);
2680
2681 r = cr_interception(vcpu);
2682 if (lapic_in_kernel(vcpu))
2683 return r;
2684 if (cr8_prev <= kvm_get_cr8(vcpu))
2685 return r;
2686 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
2687 return 0;
2688 }
2689
2690 static int efer_trap(struct kvm_vcpu *vcpu)
2691 {
2692 struct msr_data msr_info;
2693 int ret;
2694
2695
2696
2697
2698
2699
2700
2701 msr_info.host_initiated = false;
2702 msr_info.index = MSR_EFER;
2703 msr_info.data = to_svm(vcpu)->vmcb->control.exit_info_1 & ~EFER_SVME;
2704 ret = kvm_set_msr_common(vcpu, &msr_info);
2705
2706 return kvm_complete_insn_gp(vcpu, ret);
2707 }
2708
2709 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
2710 {
2711 msr->data = 0;
2712
2713 switch (msr->index) {
2714 case MSR_F10H_DECFG:
2715 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
2716 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
2717 break;
2718 case MSR_IA32_PERF_CAPABILITIES:
2719 return 0;
2720 default:
2721 return KVM_MSR_RET_INVALID;
2722 }
2723
2724 return 0;
2725 }
2726
2727 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2728 {
2729 struct vcpu_svm *svm = to_svm(vcpu);
2730
2731 switch (msr_info->index) {
2732 case MSR_AMD64_TSC_RATIO:
2733 if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
2734 return 1;
2735 msr_info->data = svm->tsc_ratio_msr;
2736 break;
2737 case MSR_STAR:
2738 msr_info->data = svm->vmcb01.ptr->save.star;
2739 break;
2740 #ifdef CONFIG_X86_64
2741 case MSR_LSTAR:
2742 msr_info->data = svm->vmcb01.ptr->save.lstar;
2743 break;
2744 case MSR_CSTAR:
2745 msr_info->data = svm->vmcb01.ptr->save.cstar;
2746 break;
2747 case MSR_KERNEL_GS_BASE:
2748 msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base;
2749 break;
2750 case MSR_SYSCALL_MASK:
2751 msr_info->data = svm->vmcb01.ptr->save.sfmask;
2752 break;
2753 #endif
2754 case MSR_IA32_SYSENTER_CS:
2755 msr_info->data = svm->vmcb01.ptr->save.sysenter_cs;
2756 break;
2757 case MSR_IA32_SYSENTER_EIP:
2758 msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
2759 if (guest_cpuid_is_intel(vcpu))
2760 msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
2761 break;
2762 case MSR_IA32_SYSENTER_ESP:
2763 msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
2764 if (guest_cpuid_is_intel(vcpu))
2765 msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
2766 break;
2767 case MSR_TSC_AUX:
2768 msr_info->data = svm->tsc_aux;
2769 break;
2770 case MSR_IA32_DEBUGCTLMSR:
2771 case MSR_IA32_LASTBRANCHFROMIP:
2772 case MSR_IA32_LASTBRANCHTOIP:
2773 case MSR_IA32_LASTINTFROMIP:
2774 case MSR_IA32_LASTINTTOIP:
2775 msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
2776 break;
2777 case MSR_VM_HSAVE_PA:
2778 msr_info->data = svm->nested.hsave_msr;
2779 break;
2780 case MSR_VM_CR:
2781 msr_info->data = svm->nested.vm_cr_msr;
2782 break;
2783 case MSR_IA32_SPEC_CTRL:
2784 if (!msr_info->host_initiated &&
2785 !guest_has_spec_ctrl_msr(vcpu))
2786 return 1;
2787
2788 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
2789 msr_info->data = svm->vmcb->save.spec_ctrl;
2790 else
2791 msr_info->data = svm->spec_ctrl;
2792 break;
2793 case MSR_AMD64_VIRT_SPEC_CTRL:
2794 if (!msr_info->host_initiated &&
2795 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2796 return 1;
2797
2798 msr_info->data = svm->virt_spec_ctrl;
2799 break;
2800 case MSR_F15H_IC_CFG: {
2801
2802 int family, model;
2803
2804 family = guest_cpuid_family(vcpu);
2805 model = guest_cpuid_model(vcpu);
2806
2807 if (family < 0 || model < 0)
2808 return kvm_get_msr_common(vcpu, msr_info);
2809
2810 msr_info->data = 0;
2811
2812 if (family == 0x15 &&
2813 (model >= 0x2 && model < 0x20))
2814 msr_info->data = 0x1E;
2815 }
2816 break;
2817 case MSR_F10H_DECFG:
2818 msr_info->data = svm->msr_decfg;
2819 break;
2820 default:
2821 return kvm_get_msr_common(vcpu, msr_info);
2822 }
2823 return 0;
2824 }
2825
2826 static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
2827 {
2828 struct vcpu_svm *svm = to_svm(vcpu);
2829 if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
2830 return kvm_complete_insn_gp(vcpu, err);
2831
2832 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
2833 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
2834 X86_TRAP_GP |
2835 SVM_EVTINJ_TYPE_EXEPT |
2836 SVM_EVTINJ_VALID);
2837 return 1;
2838 }
2839
2840 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2841 {
2842 struct vcpu_svm *svm = to_svm(vcpu);
2843 int svm_dis, chg_mask;
2844
2845 if (data & ~SVM_VM_CR_VALID_MASK)
2846 return 1;
2847
2848 chg_mask = SVM_VM_CR_VALID_MASK;
2849
2850 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2851 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2852
2853 svm->nested.vm_cr_msr &= ~chg_mask;
2854 svm->nested.vm_cr_msr |= (data & chg_mask);
2855
2856 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2857
2858
2859 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2860 return 1;
2861
2862 return 0;
2863 }
2864
2865 static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2866 {
2867 struct vcpu_svm *svm = to_svm(vcpu);
2868 int r;
2869
2870 u32 ecx = msr->index;
2871 u64 data = msr->data;
2872 switch (ecx) {
2873 case MSR_AMD64_TSC_RATIO:
2874
2875 if (!svm->tsc_scaling_enabled) {
2876
2877 if (!msr->host_initiated)
2878 return 1;
2879
2880
2881
2882
2883
2884
2885
2886
2887 if (data != 0 && data != svm->tsc_ratio_msr)
2888 return 1;
2889 break;
2890 }
2891
2892 if (data & SVM_TSC_RATIO_RSVD)
2893 return 1;
2894
2895 svm->tsc_ratio_msr = data;
2896
2897 if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
2898 nested_svm_update_tsc_ratio_msr(vcpu);
2899
2900 break;
2901 case MSR_IA32_CR_PAT:
2902 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
2903 return 1;
2904 vcpu->arch.pat = data;
2905 svm->vmcb01.ptr->save.g_pat = data;
2906 if (is_guest_mode(vcpu))
2907 nested_vmcb02_compute_g_pat(svm);
2908 vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
2909 break;
2910 case MSR_IA32_SPEC_CTRL:
2911 if (!msr->host_initiated &&
2912 !guest_has_spec_ctrl_msr(vcpu))
2913 return 1;
2914
2915 if (kvm_spec_ctrl_test_value(data))
2916 return 1;
2917
2918 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
2919 svm->vmcb->save.spec_ctrl = data;
2920 else
2921 svm->spec_ctrl = data;
2922 if (!data)
2923 break;
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
2937 break;
2938 case MSR_IA32_PRED_CMD:
2939 if (!msr->host_initiated &&
2940 !guest_has_pred_cmd_msr(vcpu))
2941 return 1;
2942
2943 if (data & ~PRED_CMD_IBPB)
2944 return 1;
2945 if (!boot_cpu_has(X86_FEATURE_IBPB))
2946 return 1;
2947 if (!data)
2948 break;
2949
2950 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2951 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
2952 break;
2953 case MSR_AMD64_VIRT_SPEC_CTRL:
2954 if (!msr->host_initiated &&
2955 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2956 return 1;
2957
2958 if (data & ~SPEC_CTRL_SSBD)
2959 return 1;
2960
2961 svm->virt_spec_ctrl = data;
2962 break;
2963 case MSR_STAR:
2964 svm->vmcb01.ptr->save.star = data;
2965 break;
2966 #ifdef CONFIG_X86_64
2967 case MSR_LSTAR:
2968 svm->vmcb01.ptr->save.lstar = data;
2969 break;
2970 case MSR_CSTAR:
2971 svm->vmcb01.ptr->save.cstar = data;
2972 break;
2973 case MSR_KERNEL_GS_BASE:
2974 svm->vmcb01.ptr->save.kernel_gs_base = data;
2975 break;
2976 case MSR_SYSCALL_MASK:
2977 svm->vmcb01.ptr->save.sfmask = data;
2978 break;
2979 #endif
2980 case MSR_IA32_SYSENTER_CS:
2981 svm->vmcb01.ptr->save.sysenter_cs = data;
2982 break;
2983 case MSR_IA32_SYSENTER_EIP:
2984 svm->vmcb01.ptr->save.sysenter_eip = (u32)data;
2985
2986
2987
2988
2989
2990
2991
2992 svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
2993 break;
2994 case MSR_IA32_SYSENTER_ESP:
2995 svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
2996 svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
2997 break;
2998 case MSR_TSC_AUX:
2999
3000
3001
3002
3003
3004 preempt_disable();
3005 r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
3006 preempt_enable();
3007 if (r)
3008 return 1;
3009
3010 svm->tsc_aux = data;
3011 break;
3012 case MSR_IA32_DEBUGCTLMSR:
3013 if (!lbrv) {
3014 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3015 __func__, data);
3016 break;
3017 }
3018 if (data & DEBUGCTL_RESERVED_BITS)
3019 return 1;
3020
3021 if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
3022 svm->vmcb->save.dbgctl = data;
3023 else
3024 svm->vmcb01.ptr->save.dbgctl = data;
3025
3026 svm_update_lbrv(vcpu);
3027
3028 break;
3029 case MSR_VM_HSAVE_PA:
3030
3031
3032
3033
3034
3035
3036 if (!msr->host_initiated && !page_address_valid(vcpu, data))
3037 return 1;
3038
3039 svm->nested.hsave_msr = data & PAGE_MASK;
3040 break;
3041 case MSR_VM_CR:
3042 return svm_set_vm_cr(vcpu, data);
3043 case MSR_VM_IGNNE:
3044 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3045 break;
3046 case MSR_F10H_DECFG: {
3047 struct kvm_msr_entry msr_entry;
3048
3049 msr_entry.index = msr->index;
3050 if (svm_get_msr_feature(&msr_entry))
3051 return 1;
3052
3053
3054 if (data & ~msr_entry.data)
3055 return 1;
3056
3057
3058 if (!msr->host_initiated && (data ^ msr_entry.data))
3059 return 1;
3060
3061 svm->msr_decfg = data;
3062 break;
3063 }
3064 default:
3065 return kvm_set_msr_common(vcpu, msr);
3066 }
3067 return 0;
3068 }
3069
3070 static int msr_interception(struct kvm_vcpu *vcpu)
3071 {
3072 if (to_svm(vcpu)->vmcb->control.exit_info_1)
3073 return kvm_emulate_wrmsr(vcpu);
3074 else
3075 return kvm_emulate_rdmsr(vcpu);
3076 }
3077
3078 static int interrupt_window_interception(struct kvm_vcpu *vcpu)
3079 {
3080 kvm_make_request(KVM_REQ_EVENT, vcpu);
3081 svm_clear_vintr(to_svm(vcpu));
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
3096
3097 ++vcpu->stat.irq_window_exits;
3098 return 1;
3099 }
3100
3101 static int pause_interception(struct kvm_vcpu *vcpu)
3102 {
3103 bool in_kernel;
3104
3105
3106
3107
3108
3109 in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
3110
3111 grow_ple_window(vcpu);
3112
3113 kvm_vcpu_on_spin(vcpu, in_kernel);
3114 return kvm_skip_emulated_instruction(vcpu);
3115 }
3116
3117 static int invpcid_interception(struct kvm_vcpu *vcpu)
3118 {
3119 struct vcpu_svm *svm = to_svm(vcpu);
3120 unsigned long type;
3121 gva_t gva;
3122
3123 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
3124 kvm_queue_exception(vcpu, UD_VECTOR);
3125 return 1;
3126 }
3127
3128
3129
3130
3131
3132
3133 type = svm->vmcb->control.exit_info_2;
3134 gva = svm->vmcb->control.exit_info_1;
3135
3136 return kvm_handle_invpcid(vcpu, type, gva);
3137 }
3138
3139 static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3140 [SVM_EXIT_READ_CR0] = cr_interception,
3141 [SVM_EXIT_READ_CR3] = cr_interception,
3142 [SVM_EXIT_READ_CR4] = cr_interception,
3143 [SVM_EXIT_READ_CR8] = cr_interception,
3144 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
3145 [SVM_EXIT_WRITE_CR0] = cr_interception,
3146 [SVM_EXIT_WRITE_CR3] = cr_interception,
3147 [SVM_EXIT_WRITE_CR4] = cr_interception,
3148 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
3149 [SVM_EXIT_READ_DR0] = dr_interception,
3150 [SVM_EXIT_READ_DR1] = dr_interception,
3151 [SVM_EXIT_READ_DR2] = dr_interception,
3152 [SVM_EXIT_READ_DR3] = dr_interception,
3153 [SVM_EXIT_READ_DR4] = dr_interception,
3154 [SVM_EXIT_READ_DR5] = dr_interception,
3155 [SVM_EXIT_READ_DR6] = dr_interception,
3156 [SVM_EXIT_READ_DR7] = dr_interception,
3157 [SVM_EXIT_WRITE_DR0] = dr_interception,
3158 [SVM_EXIT_WRITE_DR1] = dr_interception,
3159 [SVM_EXIT_WRITE_DR2] = dr_interception,
3160 [SVM_EXIT_WRITE_DR3] = dr_interception,
3161 [SVM_EXIT_WRITE_DR4] = dr_interception,
3162 [SVM_EXIT_WRITE_DR5] = dr_interception,
3163 [SVM_EXIT_WRITE_DR6] = dr_interception,
3164 [SVM_EXIT_WRITE_DR7] = dr_interception,
3165 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
3166 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
3167 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
3168 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
3169 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
3170 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
3171 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
3172 [SVM_EXIT_INTR] = intr_interception,
3173 [SVM_EXIT_NMI] = nmi_interception,
3174 [SVM_EXIT_SMI] = smi_interception,
3175 [SVM_EXIT_VINTR] = interrupt_window_interception,
3176 [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
3177 [SVM_EXIT_CPUID] = kvm_emulate_cpuid,
3178 [SVM_EXIT_IRET] = iret_interception,
3179 [SVM_EXIT_INVD] = kvm_emulate_invd,
3180 [SVM_EXIT_PAUSE] = pause_interception,
3181 [SVM_EXIT_HLT] = kvm_emulate_halt,
3182 [SVM_EXIT_INVLPG] = invlpg_interception,
3183 [SVM_EXIT_INVLPGA] = invlpga_interception,
3184 [SVM_EXIT_IOIO] = io_interception,
3185 [SVM_EXIT_MSR] = msr_interception,
3186 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
3187 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3188 [SVM_EXIT_VMRUN] = vmrun_interception,
3189 [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
3190 [SVM_EXIT_VMLOAD] = vmload_interception,
3191 [SVM_EXIT_VMSAVE] = vmsave_interception,
3192 [SVM_EXIT_STGI] = stgi_interception,
3193 [SVM_EXIT_CLGI] = clgi_interception,
3194 [SVM_EXIT_SKINIT] = skinit_interception,
3195 [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
3196 [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
3197 [SVM_EXIT_MONITOR] = kvm_emulate_monitor,
3198 [SVM_EXIT_MWAIT] = kvm_emulate_mwait,
3199 [SVM_EXIT_XSETBV] = kvm_emulate_xsetbv,
3200 [SVM_EXIT_RDPRU] = kvm_handle_invalid_op,
3201 [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap,
3202 [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap,
3203 [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
3204 [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
3205 [SVM_EXIT_INVPCID] = invpcid_interception,
3206 [SVM_EXIT_NPF] = npf_interception,
3207 [SVM_EXIT_RSM] = rsm_interception,
3208 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
3209 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
3210 [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit,
3211 };
3212
3213 static void dump_vmcb(struct kvm_vcpu *vcpu)
3214 {
3215 struct vcpu_svm *svm = to_svm(vcpu);
3216 struct vmcb_control_area *control = &svm->vmcb->control;
3217 struct vmcb_save_area *save = &svm->vmcb->save;
3218 struct vmcb_save_area *save01 = &svm->vmcb01.ptr->save;
3219
3220 if (!dump_invalid_vmcb) {
3221 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
3222 return;
3223 }
3224
3225 pr_err("VMCB %p, last attempted VMRUN on CPU %d\n",
3226 svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
3227 pr_err("VMCB Control Area:\n");
3228 pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
3229 pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
3230 pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff);
3231 pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16);
3232 pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]);
3233 pr_err("%-20s%08x %08x\n", "intercepts:",
3234 control->intercepts[INTERCEPT_WORD3],
3235 control->intercepts[INTERCEPT_WORD4]);
3236 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3237 pr_err("%-20s%d\n", "pause filter threshold:",
3238 control->pause_filter_thresh);
3239 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3240 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3241 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3242 pr_err("%-20s%d\n", "asid:", control->asid);
3243 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3244 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3245 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3246 pr_err("%-20s%08x\n", "int_state:", control->int_state);
3247 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3248 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3249 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3250 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3251 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3252 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3253 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3254 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
3255 pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
3256 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3257 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3258 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
3259 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3260 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
3261 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
3262 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
3263 pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa);
3264 pr_err("VMCB State Save Area:\n");
3265 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3266 "es:",
3267 save->es.selector, save->es.attrib,
3268 save->es.limit, save->es.base);
3269 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3270 "cs:",
3271 save->cs.selector, save->cs.attrib,
3272 save->cs.limit, save->cs.base);
3273 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3274 "ss:",
3275 save->ss.selector, save->ss.attrib,
3276 save->ss.limit, save->ss.base);
3277 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3278 "ds:",
3279 save->ds.selector, save->ds.attrib,
3280 save->ds.limit, save->ds.base);
3281 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3282 "fs:",
3283 save01->fs.selector, save01->fs.attrib,
3284 save01->fs.limit, save01->fs.base);
3285 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3286 "gs:",
3287 save01->gs.selector, save01->gs.attrib,
3288 save01->gs.limit, save01->gs.base);
3289 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3290 "gdtr:",
3291 save->gdtr.selector, save->gdtr.attrib,
3292 save->gdtr.limit, save->gdtr.base);
3293 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3294 "ldtr:",
3295 save01->ldtr.selector, save01->ldtr.attrib,
3296 save01->ldtr.limit, save01->ldtr.base);
3297 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3298 "idtr:",
3299 save->idtr.selector, save->idtr.attrib,
3300 save->idtr.limit, save->idtr.base);
3301 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3302 "tr:",
3303 save01->tr.selector, save01->tr.attrib,
3304 save01->tr.limit, save01->tr.base);
3305 pr_err("vmpl: %d cpl: %d efer: %016llx\n",
3306 save->vmpl, save->cpl, save->efer);
3307 pr_err("%-15s %016llx %-13s %016llx\n",
3308 "cr0:", save->cr0, "cr2:", save->cr2);
3309 pr_err("%-15s %016llx %-13s %016llx\n",
3310 "cr3:", save->cr3, "cr4:", save->cr4);
3311 pr_err("%-15s %016llx %-13s %016llx\n",
3312 "dr6:", save->dr6, "dr7:", save->dr7);
3313 pr_err("%-15s %016llx %-13s %016llx\n",
3314 "rip:", save->rip, "rflags:", save->rflags);
3315 pr_err("%-15s %016llx %-13s %016llx\n",
3316 "rsp:", save->rsp, "rax:", save->rax);
3317 pr_err("%-15s %016llx %-13s %016llx\n",
3318 "star:", save01->star, "lstar:", save01->lstar);
3319 pr_err("%-15s %016llx %-13s %016llx\n",
3320 "cstar:", save01->cstar, "sfmask:", save01->sfmask);
3321 pr_err("%-15s %016llx %-13s %016llx\n",
3322 "kernel_gs_base:", save01->kernel_gs_base,
3323 "sysenter_cs:", save01->sysenter_cs);
3324 pr_err("%-15s %016llx %-13s %016llx\n",
3325 "sysenter_esp:", save01->sysenter_esp,
3326 "sysenter_eip:", save01->sysenter_eip);
3327 pr_err("%-15s %016llx %-13s %016llx\n",
3328 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3329 pr_err("%-15s %016llx %-13s %016llx\n",
3330 "br_from:", save->br_from, "br_to:", save->br_to);
3331 pr_err("%-15s %016llx %-13s %016llx\n",
3332 "excp_from:", save->last_excp_from,
3333 "excp_to:", save->last_excp_to);
3334 }
3335
3336 static bool svm_check_exit_valid(u64 exit_code)
3337 {
3338 return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
3339 svm_exit_handlers[exit_code]);
3340 }
3341
3342 static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
3343 {
3344 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
3345 dump_vmcb(vcpu);
3346 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3347 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
3348 vcpu->run->internal.ndata = 2;
3349 vcpu->run->internal.data[0] = exit_code;
3350 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
3351 return 0;
3352 }
3353
3354 int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
3355 {
3356 if (!svm_check_exit_valid(exit_code))
3357 return svm_handle_invalid_exit(vcpu, exit_code);
3358
3359 #ifdef CONFIG_RETPOLINE
3360 if (exit_code == SVM_EXIT_MSR)
3361 return msr_interception(vcpu);
3362 else if (exit_code == SVM_EXIT_VINTR)
3363 return interrupt_window_interception(vcpu);
3364 else if (exit_code == SVM_EXIT_INTR)
3365 return intr_interception(vcpu);
3366 else if (exit_code == SVM_EXIT_HLT)
3367 return kvm_emulate_halt(vcpu);
3368 else if (exit_code == SVM_EXIT_NPF)
3369 return npf_interception(vcpu);
3370 #endif
3371 return svm_exit_handlers[exit_code](vcpu);
3372 }
3373
3374 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
3375 u64 *info1, u64 *info2,
3376 u32 *intr_info, u32 *error_code)
3377 {
3378 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3379
3380 *reason = control->exit_code;
3381 *info1 = control->exit_info_1;
3382 *info2 = control->exit_info_2;
3383 *intr_info = control->exit_int_info;
3384 if ((*intr_info & SVM_EXITINTINFO_VALID) &&
3385 (*intr_info & SVM_EXITINTINFO_VALID_ERR))
3386 *error_code = control->exit_int_info_err;
3387 else
3388 *error_code = 0;
3389 }
3390
3391 static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
3392 {
3393 struct vcpu_svm *svm = to_svm(vcpu);
3394 struct kvm_run *kvm_run = vcpu->run;
3395 u32 exit_code = svm->vmcb->control.exit_code;
3396
3397 trace_kvm_exit(vcpu, KVM_ISA_SVM);
3398
3399
3400 if (!sev_es_guest(vcpu->kvm)) {
3401 if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
3402 vcpu->arch.cr0 = svm->vmcb->save.cr0;
3403 if (npt_enabled)
3404 vcpu->arch.cr3 = svm->vmcb->save.cr3;
3405 }
3406
3407 if (is_guest_mode(vcpu)) {
3408 int vmexit;
3409
3410 trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM);
3411
3412 vmexit = nested_svm_exit_special(svm);
3413
3414 if (vmexit == NESTED_EXIT_CONTINUE)
3415 vmexit = nested_svm_exit_handled(svm);
3416
3417 if (vmexit == NESTED_EXIT_DONE)
3418 return 1;
3419 }
3420
3421 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3422 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3423 kvm_run->fail_entry.hardware_entry_failure_reason
3424 = svm->vmcb->control.exit_code;
3425 kvm_run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
3426 dump_vmcb(vcpu);
3427 return 0;
3428 }
3429
3430 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3431 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3432 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3433 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3434 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
3435 "exit_code 0x%x\n",
3436 __func__, svm->vmcb->control.exit_int_info,
3437 exit_code);
3438
3439 if (exit_fastpath != EXIT_FASTPATH_NONE)
3440 return 1;
3441
3442 return svm_invoke_exit_handler(vcpu, exit_code);
3443 }
3444
3445 static void reload_tss(struct kvm_vcpu *vcpu)
3446 {
3447 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3448
3449 sd->tss_desc->type = 9;
3450 load_TR_desc();
3451 }
3452
3453 static void pre_svm_run(struct kvm_vcpu *vcpu)
3454 {
3455 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3456 struct vcpu_svm *svm = to_svm(vcpu);
3457
3458
3459
3460
3461
3462
3463 if (unlikely(svm->current_vmcb->cpu != vcpu->cpu)) {
3464 svm->current_vmcb->asid_generation = 0;
3465 vmcb_mark_all_dirty(svm->vmcb);
3466 svm->current_vmcb->cpu = vcpu->cpu;
3467 }
3468
3469 if (sev_guest(vcpu->kvm))
3470 return pre_sev_run(svm, vcpu->cpu);
3471
3472
3473 if (svm->current_vmcb->asid_generation != sd->asid_generation)
3474 new_asid(svm, sd);
3475 }
3476
3477 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3478 {
3479 struct vcpu_svm *svm = to_svm(vcpu);
3480
3481 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3482
3483 if (svm->nmi_l1_to_l2)
3484 return;
3485
3486 vcpu->arch.hflags |= HF_NMI_MASK;
3487 if (!sev_es_guest(vcpu->kvm))
3488 svm_set_intercept(svm, INTERCEPT_IRET);
3489 ++vcpu->stat.nmi_injections;
3490 }
3491
3492 static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
3493 {
3494 struct vcpu_svm *svm = to_svm(vcpu);
3495 u32 type;
3496
3497 if (vcpu->arch.interrupt.soft) {
3498 if (svm_update_soft_interrupt_rip(vcpu))
3499 return;
3500
3501 type = SVM_EVTINJ_TYPE_SOFT;
3502 } else {
3503 type = SVM_EVTINJ_TYPE_INTR;
3504 }
3505
3506 trace_kvm_inj_virq(vcpu->arch.interrupt.nr,
3507 vcpu->arch.interrupt.soft, reinjected);
3508 ++vcpu->stat.irq_injections;
3509
3510 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3511 SVM_EVTINJ_VALID | type;
3512 }
3513
3514 void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
3515 int trig_mode, int vector)
3516 {
3517
3518
3519
3520
3521 bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
3522
3523
3524 if (!READ_ONCE(vcpu->arch.apic->apicv_active)) {
3525
3526 kvm_make_request(KVM_REQ_EVENT, vcpu);
3527 kvm_vcpu_kick(vcpu);
3528 return;
3529 }
3530
3531 trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector);
3532 if (in_guest_mode) {
3533
3534
3535
3536
3537
3538 avic_ring_doorbell(vcpu);
3539 } else {
3540
3541
3542
3543
3544 kvm_vcpu_wake_up(vcpu);
3545 }
3546 }
3547
3548 static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
3549 int trig_mode, int vector)
3550 {
3551 kvm_lapic_set_irr(vector, apic);
3552
3553
3554
3555
3556
3557
3558
3559
3560 smp_mb__after_atomic();
3561 svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector);
3562 }
3563
3564 static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3565 {
3566 struct vcpu_svm *svm = to_svm(vcpu);
3567
3568
3569
3570
3571
3572 if (sev_es_guest(vcpu->kvm))
3573 return;
3574
3575 if (nested_svm_virtualize_tpr(vcpu))
3576 return;
3577
3578 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
3579
3580 if (irr == -1)
3581 return;
3582
3583 if (tpr >= irr)
3584 svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
3585 }
3586
3587 bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
3588 {
3589 struct vcpu_svm *svm = to_svm(vcpu);
3590 struct vmcb *vmcb = svm->vmcb;
3591 bool ret;
3592
3593 if (!gif_set(svm))
3594 return true;
3595
3596 if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3597 return false;
3598
3599 ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
3600 (vcpu->arch.hflags & HF_NMI_MASK);
3601
3602 return ret;
3603 }
3604
3605 static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3606 {
3607 struct vcpu_svm *svm = to_svm(vcpu);
3608 if (svm->nested.nested_run_pending)
3609 return -EBUSY;
3610
3611 if (svm_nmi_blocked(vcpu))
3612 return 0;
3613
3614
3615 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3616 return -EBUSY;
3617 return 1;
3618 }
3619
3620 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3621 {
3622 return !!(vcpu->arch.hflags & HF_NMI_MASK);
3623 }
3624
3625 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3626 {
3627 struct vcpu_svm *svm = to_svm(vcpu);
3628
3629 if (masked) {
3630 vcpu->arch.hflags |= HF_NMI_MASK;
3631 if (!sev_es_guest(vcpu->kvm))
3632 svm_set_intercept(svm, INTERCEPT_IRET);
3633 } else {
3634 vcpu->arch.hflags &= ~HF_NMI_MASK;
3635 if (!sev_es_guest(vcpu->kvm))
3636 svm_clr_intercept(svm, INTERCEPT_IRET);
3637 }
3638 }
3639
3640 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
3641 {
3642 struct vcpu_svm *svm = to_svm(vcpu);
3643 struct vmcb *vmcb = svm->vmcb;
3644
3645 if (!gif_set(svm))
3646 return true;
3647
3648 if (is_guest_mode(vcpu)) {
3649
3650 if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
3651 ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
3652 : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3653 return true;
3654
3655
3656 if (nested_exit_on_intr(svm))
3657 return false;
3658 } else {
3659 if (!svm_get_if_flag(vcpu))
3660 return true;
3661 }
3662
3663 return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
3664 }
3665
3666 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3667 {
3668 struct vcpu_svm *svm = to_svm(vcpu);
3669
3670 if (svm->nested.nested_run_pending)
3671 return -EBUSY;
3672
3673 if (svm_interrupt_blocked(vcpu))
3674 return 0;
3675
3676
3677
3678
3679
3680 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
3681 return -EBUSY;
3682
3683 return 1;
3684 }
3685
3686 static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
3687 {
3688 struct vcpu_svm *svm = to_svm(vcpu);
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698 if (vgif || gif_set(svm)) {
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709 if (!is_guest_mode(vcpu))
3710 kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
3711
3712 svm_set_vintr(svm);
3713 }
3714 }
3715
3716 static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
3717 {
3718 struct vcpu_svm *svm = to_svm(vcpu);
3719
3720 if ((vcpu->arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK)
3721 return;
3722
3723 if (!gif_set(svm)) {
3724 if (vgif)
3725 svm_set_intercept(svm, INTERCEPT_STGI);
3726 return;
3727 }
3728
3729
3730
3731
3732
3733 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
3734 svm->nmi_singlestep = true;
3735 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3736 }
3737
3738 static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
3739 {
3740 struct vcpu_svm *svm = to_svm(vcpu);
3741
3742
3743
3744
3745
3746
3747
3748
3749 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3750 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3751 else
3752 svm->current_vmcb->asid_generation--;
3753 }
3754
3755 static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
3756 {
3757 struct vcpu_svm *svm = to_svm(vcpu);
3758
3759 invlpga(gva, svm->vmcb->control.asid);
3760 }
3761
3762 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3763 {
3764 struct vcpu_svm *svm = to_svm(vcpu);
3765
3766 if (nested_svm_virtualize_tpr(vcpu))
3767 return;
3768
3769 if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) {
3770 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3771 kvm_set_cr8(vcpu, cr8);
3772 }
3773 }
3774
3775 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3776 {
3777 struct vcpu_svm *svm = to_svm(vcpu);
3778 u64 cr8;
3779
3780 if (nested_svm_virtualize_tpr(vcpu) ||
3781 kvm_vcpu_apicv_active(vcpu))
3782 return;
3783
3784 cr8 = kvm_get_cr8(vcpu);
3785 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3786 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3787 }
3788
3789 static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector,
3790 int type)
3791 {
3792 bool is_exception = (type == SVM_EXITINTINFO_TYPE_EXEPT);
3793 bool is_soft = (type == SVM_EXITINTINFO_TYPE_SOFT);
3794 struct vcpu_svm *svm = to_svm(vcpu);
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805 if (nrips && (is_soft || (is_exception && kvm_exception_is_soft(vector))) &&
3806 kvm_is_linear_rip(vcpu, svm->soft_int_old_rip + svm->soft_int_csbase))
3807 svm->vmcb->control.next_rip = svm->soft_int_next_rip;
3808
3809
3810
3811
3812
3813
3814
3815
3816 else if (!nrips && (is_soft || is_exception) &&
3817 kvm_is_linear_rip(vcpu, svm->soft_int_next_rip + svm->soft_int_csbase))
3818 kvm_rip_write(vcpu, svm->soft_int_old_rip);
3819 }
3820
3821 static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
3822 {
3823 struct vcpu_svm *svm = to_svm(vcpu);
3824 u8 vector;
3825 int type;
3826 u32 exitintinfo = svm->vmcb->control.exit_int_info;
3827 bool nmi_l1_to_l2 = svm->nmi_l1_to_l2;
3828 bool soft_int_injected = svm->soft_int_injected;
3829
3830 svm->nmi_l1_to_l2 = false;
3831 svm->soft_int_injected = false;
3832
3833
3834
3835
3836
3837 if ((vcpu->arch.hflags & HF_IRET_MASK) &&
3838 (sev_es_guest(vcpu->kvm) ||
3839 kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
3840 vcpu->arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3841 kvm_make_request(KVM_REQ_EVENT, vcpu);
3842 }
3843
3844 vcpu->arch.nmi_injected = false;
3845 kvm_clear_exception_queue(vcpu);
3846 kvm_clear_interrupt_queue(vcpu);
3847
3848 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3849 return;
3850
3851 kvm_make_request(KVM_REQ_EVENT, vcpu);
3852
3853 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3854 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3855
3856 if (soft_int_injected)
3857 svm_complete_soft_interrupt(vcpu, vector, type);
3858
3859 switch (type) {
3860 case SVM_EXITINTINFO_TYPE_NMI:
3861 vcpu->arch.nmi_injected = true;
3862 svm->nmi_l1_to_l2 = nmi_l1_to_l2;
3863 break;
3864 case SVM_EXITINTINFO_TYPE_EXEPT:
3865
3866
3867
3868 if (vector == X86_TRAP_VC)
3869 break;
3870
3871 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3872 u32 err = svm->vmcb->control.exit_int_info_err;
3873 kvm_requeue_exception_e(vcpu, vector, err);
3874
3875 } else
3876 kvm_requeue_exception(vcpu, vector);
3877 break;
3878 case SVM_EXITINTINFO_TYPE_INTR:
3879 kvm_queue_interrupt(vcpu, vector, false);
3880 break;
3881 case SVM_EXITINTINFO_TYPE_SOFT:
3882 kvm_queue_interrupt(vcpu, vector, true);
3883 break;
3884 default:
3885 break;
3886 }
3887
3888 }
3889
3890 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3891 {
3892 struct vcpu_svm *svm = to_svm(vcpu);
3893 struct vmcb_control_area *control = &svm->vmcb->control;
3894
3895 control->exit_int_info = control->event_inj;
3896 control->exit_int_info_err = control->event_inj_err;
3897 control->event_inj = 0;
3898 svm_complete_interrupts(vcpu);
3899 }
3900
3901 static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
3902 {
3903 return 1;
3904 }
3905
3906 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
3907 {
3908 if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
3909 to_svm(vcpu)->vmcb->control.exit_info_1)
3910 return handle_fastpath_set_msr_irqoff(vcpu);
3911
3912 return EXIT_FASTPATH_NONE;
3913 }
3914
3915 static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
3916 {
3917 struct vcpu_svm *svm = to_svm(vcpu);
3918 unsigned long vmcb_pa = svm->current_vmcb->pa;
3919
3920 guest_state_enter_irqoff();
3921
3922 if (sev_es_guest(vcpu->kvm)) {
3923 __svm_sev_es_vcpu_run(vmcb_pa);
3924 } else {
3925 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3926
3927
3928
3929
3930
3931
3932
3933 vmload(svm->vmcb01.pa);
3934 __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
3935 vmsave(svm->vmcb01.pa);
3936
3937 vmload(__sme_page_pa(sd->save_area));
3938 }
3939
3940 guest_state_exit_irqoff();
3941 }
3942
3943 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
3944 {
3945 struct vcpu_svm *svm = to_svm(vcpu);
3946
3947 trace_kvm_entry(vcpu);
3948
3949 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3950 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3951 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3952
3953
3954
3955
3956
3957
3958
3959 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
3960
3961
3962
3963
3964
3965 disable_nmi_singlestep(svm);
3966 smp_send_reschedule(vcpu->cpu);
3967 }
3968
3969 pre_svm_run(vcpu);
3970
3971 sync_lapic_to_cr8(vcpu);
3972
3973 if (unlikely(svm->asid != svm->vmcb->control.asid)) {
3974 svm->vmcb->control.asid = svm->asid;
3975 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
3976 }
3977 svm->vmcb->save.cr2 = vcpu->arch.cr2;
3978
3979 svm_hv_update_vp_id(svm->vmcb, vcpu);
3980
3981
3982
3983
3984
3985 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
3986 svm_set_dr6(svm, vcpu->arch.dr6);
3987 else
3988 svm_set_dr6(svm, DR6_ACTIVE_LOW);
3989
3990 clgi();
3991 kvm_load_guest_xsave_state(vcpu);
3992
3993 kvm_wait_lapic_expire(vcpu);
3994
3995
3996
3997
3998
3999
4000
4001 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
4002 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
4003
4004 svm_vcpu_enter_exit(vcpu);
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
4022 unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
4023 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
4024
4025 if (!sev_es_guest(vcpu->kvm))
4026 reload_tss(vcpu);
4027
4028 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
4029 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
4030
4031 if (!sev_es_guest(vcpu->kvm)) {
4032 vcpu->arch.cr2 = svm->vmcb->save.cr2;
4033 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
4034 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
4035 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
4036 }
4037 vcpu->arch.regs_dirty = 0;
4038
4039 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
4040 kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
4041
4042 kvm_load_host_xsave_state(vcpu);
4043 stgi();
4044
4045
4046
4047 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
4048 kvm_after_interrupt(vcpu);
4049
4050 sync_cr8_to_lapic(vcpu);
4051
4052 svm->next_rip = 0;
4053 if (is_guest_mode(vcpu)) {
4054 nested_sync_control_from_vmcb02(svm);
4055
4056
4057 if (svm->nested.nested_run_pending &&
4058 svm->vmcb->control.exit_code != SVM_EXIT_ERR)
4059 ++vcpu->stat.nested_run;
4060
4061 svm->nested.nested_run_pending = 0;
4062 }
4063
4064 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
4065 vmcb_mark_all_clean(svm->vmcb);
4066
4067
4068 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
4069 vcpu->arch.apf.host_apf_flags =
4070 kvm_read_and_reset_apf_flags();
4071
4072 vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
4073
4074
4075
4076
4077
4078 if (unlikely(svm->vmcb->control.exit_code ==
4079 SVM_EXIT_EXCP_BASE + MC_VECTOR))
4080 svm_handle_mce(vcpu);
4081
4082 svm_complete_interrupts(vcpu);
4083
4084 if (is_guest_mode(vcpu))
4085 return EXIT_FASTPATH_NONE;
4086
4087 return svm_exit_handlers_fastpath(vcpu);
4088 }
4089
4090 static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
4091 int root_level)
4092 {
4093 struct vcpu_svm *svm = to_svm(vcpu);
4094 unsigned long cr3;
4095
4096 if (npt_enabled) {
4097 svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
4098 vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
4099
4100 hv_track_root_tdp(vcpu, root_hpa);
4101
4102 cr3 = vcpu->arch.cr3;
4103 } else if (root_level >= PT64_ROOT_4LEVEL) {
4104 cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
4105 } else {
4106
4107 WARN_ON_ONCE(kvm_get_active_pcid(vcpu));
4108 cr3 = root_hpa;
4109 }
4110
4111 svm->vmcb->save.cr3 = cr3;
4112 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
4113 }
4114
4115 static int is_disabled(void)
4116 {
4117 u64 vm_cr;
4118
4119 rdmsrl(MSR_VM_CR, vm_cr);
4120 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
4121 return 1;
4122
4123 return 0;
4124 }
4125
4126 static void
4127 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4128 {
4129
4130
4131
4132 hypercall[0] = 0x0f;
4133 hypercall[1] = 0x01;
4134 hypercall[2] = 0xd9;
4135 }
4136
4137 static int __init svm_check_processor_compat(void)
4138 {
4139 return 0;
4140 }
4141
4142
4143
4144
4145
4146 static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
4147 {
4148 switch (index) {
4149 case MSR_IA32_MCG_EXT_CTL:
4150 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
4151 return false;
4152 case MSR_IA32_SMBASE:
4153
4154 if (kvm && sev_es_guest(kvm))
4155 return false;
4156 break;
4157 default:
4158 break;
4159 }
4160
4161 return true;
4162 }
4163
4164 static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
4165 {
4166 struct vcpu_svm *svm = to_svm(vcpu);
4167 struct kvm_cpuid_entry2 *best;
4168
4169 vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4170 boot_cpu_has(X86_FEATURE_XSAVE) &&
4171 boot_cpu_has(X86_FEATURE_XSAVES);
4172
4173
4174 svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
4175 guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
4176
4177 svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
4178 svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
4179
4180 svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
4181
4182 svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
4183 guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
4184
4185 svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
4186 guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
4187
4188 svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
4189
4190 svm_recalc_instruction_intercepts(vcpu, svm);
4191
4192
4193 if (sev_guest(vcpu->kvm)) {
4194 best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
4195 if (best)
4196 vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
4197 }
4198
4199 init_vmcb_after_set_cpuid(vcpu);
4200 }
4201
4202 static bool svm_has_wbinvd_exit(void)
4203 {
4204 return true;
4205 }
4206
4207 #define PRE_EX(exit) { .exit_code = (exit), \
4208 .stage = X86_ICPT_PRE_EXCEPT, }
4209 #define POST_EX(exit) { .exit_code = (exit), \
4210 .stage = X86_ICPT_POST_EXCEPT, }
4211 #define POST_MEM(exit) { .exit_code = (exit), \
4212 .stage = X86_ICPT_POST_MEMACCESS, }
4213
4214 static const struct __x86_intercept {
4215 u32 exit_code;
4216 enum x86_intercept_stage stage;
4217 } x86_intercept_map[] = {
4218 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
4219 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
4220 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
4221 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
4222 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
4223 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
4224 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
4225 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
4226 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
4227 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
4228 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
4229 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
4230 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
4231 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
4232 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
4233 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
4234 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
4235 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
4236 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
4237 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
4238 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
4239 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
4240 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
4241 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
4242 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
4243 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
4244 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
4245 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
4246 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
4247 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
4248 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
4249 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
4250 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
4251 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
4252 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
4253 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
4254 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
4255 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
4256 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
4257 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
4258 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
4259 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
4260 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
4261 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
4262 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
4263 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
4264 [x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
4265 };
4266
4267 #undef PRE_EX
4268 #undef POST_EX
4269 #undef POST_MEM
4270
4271 static int svm_check_intercept(struct kvm_vcpu *vcpu,
4272 struct x86_instruction_info *info,
4273 enum x86_intercept_stage stage,
4274 struct x86_exception *exception)
4275 {
4276 struct vcpu_svm *svm = to_svm(vcpu);
4277 int vmexit, ret = X86EMUL_CONTINUE;
4278 struct __x86_intercept icpt_info;
4279 struct vmcb *vmcb = svm->vmcb;
4280
4281 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4282 goto out;
4283
4284 icpt_info = x86_intercept_map[info->intercept];
4285
4286 if (stage != icpt_info.stage)
4287 goto out;
4288
4289 switch (icpt_info.exit_code) {
4290 case SVM_EXIT_READ_CR0:
4291 if (info->intercept == x86_intercept_cr_read)
4292 icpt_info.exit_code += info->modrm_reg;
4293 break;
4294 case SVM_EXIT_WRITE_CR0: {
4295 unsigned long cr0, val;
4296
4297 if (info->intercept == x86_intercept_cr_write)
4298 icpt_info.exit_code += info->modrm_reg;
4299
4300 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
4301 info->intercept == x86_intercept_clts)
4302 break;
4303
4304 if (!(vmcb12_is_intercept(&svm->nested.ctl,
4305 INTERCEPT_SELECTIVE_CR0)))
4306 break;
4307
4308 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4309 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4310
4311 if (info->intercept == x86_intercept_lmsw) {
4312 cr0 &= 0xfUL;
4313 val &= 0xfUL;
4314
4315 if (cr0 & X86_CR0_PE)
4316 val |= X86_CR0_PE;
4317 }
4318
4319 if (cr0 ^ val)
4320 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4321
4322 break;
4323 }
4324 case SVM_EXIT_READ_DR0:
4325 case SVM_EXIT_WRITE_DR0:
4326 icpt_info.exit_code += info->modrm_reg;
4327 break;
4328 case SVM_EXIT_MSR:
4329 if (info->intercept == x86_intercept_wrmsr)
4330 vmcb->control.exit_info_1 = 1;
4331 else
4332 vmcb->control.exit_info_1 = 0;
4333 break;
4334 case SVM_EXIT_PAUSE:
4335
4336
4337
4338
4339 if (info->rep_prefix != REPE_PREFIX)
4340 goto out;
4341 break;
4342 case SVM_EXIT_IOIO: {
4343 u64 exit_info;
4344 u32 bytes;
4345
4346 if (info->intercept == x86_intercept_in ||
4347 info->intercept == x86_intercept_ins) {
4348 exit_info = ((info->src_val & 0xffff) << 16) |
4349 SVM_IOIO_TYPE_MASK;
4350 bytes = info->dst_bytes;
4351 } else {
4352 exit_info = (info->dst_val & 0xffff) << 16;
4353 bytes = info->src_bytes;
4354 }
4355
4356 if (info->intercept == x86_intercept_outs ||
4357 info->intercept == x86_intercept_ins)
4358 exit_info |= SVM_IOIO_STR_MASK;
4359
4360 if (info->rep_prefix)
4361 exit_info |= SVM_IOIO_REP_MASK;
4362
4363 bytes = min(bytes, 4u);
4364
4365 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4366
4367 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4368
4369 vmcb->control.exit_info_1 = exit_info;
4370 vmcb->control.exit_info_2 = info->next_rip;
4371
4372 break;
4373 }
4374 default:
4375 break;
4376 }
4377
4378
4379 if (static_cpu_has(X86_FEATURE_NRIPS))
4380 vmcb->control.next_rip = info->next_rip;
4381 vmcb->control.exit_code = icpt_info.exit_code;
4382 vmexit = nested_svm_exit_handled(svm);
4383
4384 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4385 : X86EMUL_CONTINUE;
4386
4387 out:
4388 return ret;
4389 }
4390
4391 static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
4392 {
4393 if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
4394 vcpu->arch.at_instruction_boundary = true;
4395 }
4396
4397 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
4398 {
4399 if (!kvm_pause_in_guest(vcpu->kvm))
4400 shrink_ple_window(vcpu);
4401 }
4402
4403 static void svm_setup_mce(struct kvm_vcpu *vcpu)
4404 {
4405
4406 vcpu->arch.mcg_cap &= 0x1ff;
4407 }
4408
4409 bool svm_smi_blocked(struct kvm_vcpu *vcpu)
4410 {
4411 struct vcpu_svm *svm = to_svm(vcpu);
4412
4413
4414 if (!gif_set(svm))
4415 return true;
4416
4417 return is_smm(vcpu);
4418 }
4419
4420 static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
4421 {
4422 struct vcpu_svm *svm = to_svm(vcpu);
4423 if (svm->nested.nested_run_pending)
4424 return -EBUSY;
4425
4426 if (svm_smi_blocked(vcpu))
4427 return 0;
4428
4429
4430 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
4431 return -EBUSY;
4432
4433 return 1;
4434 }
4435
4436 static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
4437 {
4438 struct vcpu_svm *svm = to_svm(vcpu);
4439 struct kvm_host_map map_save;
4440 int ret;
4441
4442 if (!is_guest_mode(vcpu))
4443 return 0;
4444
4445
4446 put_smstate(u64, smstate, 0x7ed8, 1);
4447
4448 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
4449
4450 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
4451 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
4452 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
4453
4454 ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
4455 if (ret)
4456 return ret;
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470 if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
4471 &map_save) == -EINVAL)
4472 return 1;
4473
4474 BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
4475
4476 svm_copy_vmrun_state(map_save.hva + 0x400,
4477 &svm->vmcb01.ptr->save);
4478
4479 kvm_vcpu_unmap(vcpu, &map_save, true);
4480 return 0;
4481 }
4482
4483 static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
4484 {
4485 struct vcpu_svm *svm = to_svm(vcpu);
4486 struct kvm_host_map map, map_save;
4487 u64 saved_efer, vmcb12_gpa;
4488 struct vmcb *vmcb12;
4489 int ret;
4490
4491 if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
4492 return 0;
4493
4494
4495 if (!GET_SMSTATE(u64, smstate, 0x7ed8))
4496 return 0;
4497
4498 if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
4499 return 1;
4500
4501 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
4502 if (!(saved_efer & EFER_SVME))
4503 return 1;
4504
4505 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
4506 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
4507 return 1;
4508
4509 ret = 1;
4510 if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
4511 goto unmap_map;
4512
4513 if (svm_allocate_nested(svm))
4514 goto unmap_save;
4515
4516
4517
4518
4519
4520
4521 svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
4522
4523
4524
4525
4526
4527 vmcb_mark_all_dirty(svm->vmcb01.ptr);
4528
4529 vmcb12 = map.hva;
4530 nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
4531 nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
4532 ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
4533
4534 if (ret)
4535 goto unmap_save;
4536
4537 svm->nested.nested_run_pending = 1;
4538
4539 unmap_save:
4540 kvm_vcpu_unmap(vcpu, &map_save, true);
4541 unmap_map:
4542 kvm_vcpu_unmap(vcpu, &map, true);
4543 return ret;
4544 }
4545
4546 static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
4547 {
4548 struct vcpu_svm *svm = to_svm(vcpu);
4549
4550 if (!gif_set(svm)) {
4551 if (vgif)
4552 svm_set_intercept(svm, INTERCEPT_STGI);
4553
4554 } else {
4555
4556 }
4557 }
4558
4559 static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
4560 void *insn, int insn_len)
4561 {
4562 bool smep, smap, is_user;
4563 unsigned long cr4;
4564 u64 error_code;
4565
4566
4567 if (!sev_guest(vcpu->kvm))
4568 return true;
4569
4570
4571 WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
4572 EMULTYPE_TRAP_UD_FORCED |
4573 EMULTYPE_VMWARE_GP));
4574
4575
4576
4577
4578
4579 if (sev_es_guest(vcpu->kvm))
4580 return false;
4581
4582
4583
4584
4585
4586 if (emul_type & EMULTYPE_NO_DECODE)
4587 return true;
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604 if (unlikely(!insn)) {
4605 kvm_queue_exception(vcpu, UD_VECTOR);
4606 return false;
4607 }
4608
4609
4610
4611
4612
4613
4614
4615
4616 if (likely(insn_len))
4617 return true;
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651 error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
4652 if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
4653 goto resume_guest;
4654
4655 cr4 = kvm_read_cr4(vcpu);
4656 smep = cr4 & X86_CR4_SMEP;
4657 smap = cr4 & X86_CR4_SMAP;
4658 is_user = svm_get_cpl(vcpu) == 3;
4659 if (smap && (!smep || is_user)) {
4660 pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672 if (is_user)
4673 kvm_inject_gp(vcpu, 0);
4674 else
4675 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4676 }
4677
4678 resume_guest:
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693 return false;
4694 }
4695
4696 static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
4697 {
4698 struct vcpu_svm *svm = to_svm(vcpu);
4699
4700
4701
4702
4703
4704
4705
4706
4707 return !gif_set(svm) ||
4708 (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
4709 }
4710
4711 static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
4712 {
4713 if (!sev_es_guest(vcpu->kvm))
4714 return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
4715
4716 sev_vcpu_deliver_sipi_vector(vcpu, vector);
4717 }
4718
4719 static void svm_vm_destroy(struct kvm *kvm)
4720 {
4721 avic_vm_destroy(kvm);
4722 sev_vm_destroy(kvm);
4723 }
4724
4725 static int svm_vm_init(struct kvm *kvm)
4726 {
4727 if (!pause_filter_count || !pause_filter_thresh)
4728 kvm->arch.pause_in_guest = true;
4729
4730 if (enable_apicv) {
4731 int ret = avic_vm_init(kvm);
4732 if (ret)
4733 return ret;
4734 }
4735
4736 return 0;
4737 }
4738
4739 static struct kvm_x86_ops svm_x86_ops __initdata = {
4740 .name = "kvm_amd",
4741
4742 .hardware_unsetup = svm_hardware_unsetup,
4743 .hardware_enable = svm_hardware_enable,
4744 .hardware_disable = svm_hardware_disable,
4745 .has_emulated_msr = svm_has_emulated_msr,
4746
4747 .vcpu_create = svm_vcpu_create,
4748 .vcpu_free = svm_vcpu_free,
4749 .vcpu_reset = svm_vcpu_reset,
4750
4751 .vm_size = sizeof(struct kvm_svm),
4752 .vm_init = svm_vm_init,
4753 .vm_destroy = svm_vm_destroy,
4754
4755 .prepare_switch_to_guest = svm_prepare_switch_to_guest,
4756 .vcpu_load = svm_vcpu_load,
4757 .vcpu_put = svm_vcpu_put,
4758 .vcpu_blocking = avic_vcpu_blocking,
4759 .vcpu_unblocking = avic_vcpu_unblocking,
4760
4761 .update_exception_bitmap = svm_update_exception_bitmap,
4762 .get_msr_feature = svm_get_msr_feature,
4763 .get_msr = svm_get_msr,
4764 .set_msr = svm_set_msr,
4765 .get_segment_base = svm_get_segment_base,
4766 .get_segment = svm_get_segment,
4767 .set_segment = svm_set_segment,
4768 .get_cpl = svm_get_cpl,
4769 .get_cs_db_l_bits = svm_get_cs_db_l_bits,
4770 .set_cr0 = svm_set_cr0,
4771 .post_set_cr3 = sev_post_set_cr3,
4772 .is_valid_cr4 = svm_is_valid_cr4,
4773 .set_cr4 = svm_set_cr4,
4774 .set_efer = svm_set_efer,
4775 .get_idt = svm_get_idt,
4776 .set_idt = svm_set_idt,
4777 .get_gdt = svm_get_gdt,
4778 .set_gdt = svm_set_gdt,
4779 .set_dr7 = svm_set_dr7,
4780 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
4781 .cache_reg = svm_cache_reg,
4782 .get_rflags = svm_get_rflags,
4783 .set_rflags = svm_set_rflags,
4784 .get_if_flag = svm_get_if_flag,
4785
4786 .flush_tlb_all = svm_flush_tlb_current,
4787 .flush_tlb_current = svm_flush_tlb_current,
4788 .flush_tlb_gva = svm_flush_tlb_gva,
4789 .flush_tlb_guest = svm_flush_tlb_current,
4790
4791 .vcpu_pre_run = svm_vcpu_pre_run,
4792 .vcpu_run = svm_vcpu_run,
4793 .handle_exit = svm_handle_exit,
4794 .skip_emulated_instruction = svm_skip_emulated_instruction,
4795 .update_emulated_instruction = NULL,
4796 .set_interrupt_shadow = svm_set_interrupt_shadow,
4797 .get_interrupt_shadow = svm_get_interrupt_shadow,
4798 .patch_hypercall = svm_patch_hypercall,
4799 .inject_irq = svm_inject_irq,
4800 .inject_nmi = svm_inject_nmi,
4801 .queue_exception = svm_queue_exception,
4802 .cancel_injection = svm_cancel_injection,
4803 .interrupt_allowed = svm_interrupt_allowed,
4804 .nmi_allowed = svm_nmi_allowed,
4805 .get_nmi_mask = svm_get_nmi_mask,
4806 .set_nmi_mask = svm_set_nmi_mask,
4807 .enable_nmi_window = svm_enable_nmi_window,
4808 .enable_irq_window = svm_enable_irq_window,
4809 .update_cr8_intercept = svm_update_cr8_intercept,
4810 .set_virtual_apic_mode = avic_set_virtual_apic_mode,
4811 .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
4812 .check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
4813 .apicv_post_state_restore = avic_apicv_post_state_restore,
4814
4815 .get_exit_info = svm_get_exit_info,
4816
4817 .vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
4818
4819 .has_wbinvd_exit = svm_has_wbinvd_exit,
4820
4821 .get_l2_tsc_offset = svm_get_l2_tsc_offset,
4822 .get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
4823 .write_tsc_offset = svm_write_tsc_offset,
4824 .write_tsc_multiplier = svm_write_tsc_multiplier,
4825
4826 .load_mmu_pgd = svm_load_mmu_pgd,
4827
4828 .check_intercept = svm_check_intercept,
4829 .handle_exit_irqoff = svm_handle_exit_irqoff,
4830
4831 .request_immediate_exit = __kvm_request_immediate_exit,
4832
4833 .sched_in = svm_sched_in,
4834
4835 .nested_ops = &svm_nested_ops,
4836
4837 .deliver_interrupt = svm_deliver_interrupt,
4838 .pi_update_irte = avic_pi_update_irte,
4839 .setup_mce = svm_setup_mce,
4840
4841 .smi_allowed = svm_smi_allowed,
4842 .enter_smm = svm_enter_smm,
4843 .leave_smm = svm_leave_smm,
4844 .enable_smi_window = svm_enable_smi_window,
4845
4846 .mem_enc_ioctl = sev_mem_enc_ioctl,
4847 .mem_enc_register_region = sev_mem_enc_register_region,
4848 .mem_enc_unregister_region = sev_mem_enc_unregister_region,
4849 .guest_memory_reclaimed = sev_guest_memory_reclaimed,
4850
4851 .vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
4852 .vm_move_enc_context_from = sev_vm_move_enc_context_from,
4853
4854 .can_emulate_instruction = svm_can_emulate_instruction,
4855
4856 .apic_init_signal_blocked = svm_apic_init_signal_blocked,
4857
4858 .msr_filter_changed = svm_msr_filter_changed,
4859 .complete_emulated_msr = svm_complete_emulated_msr,
4860
4861 .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
4862 .vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
4863 };
4864
4865
4866
4867
4868
4869
4870
4871 static __init void svm_adjust_mmio_mask(void)
4872 {
4873 unsigned int enc_bit, mask_bit;
4874 u64 msr, mask;
4875
4876
4877 if (cpuid_eax(0x80000000) < 0x8000001f)
4878 return;
4879
4880
4881 rdmsrl(MSR_AMD64_SYSCFG, msr);
4882 if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
4883 return;
4884
4885 enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
4886 mask_bit = boot_cpu_data.x86_phys_bits;
4887
4888
4889 if (enc_bit == mask_bit)
4890 mask_bit++;
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901 mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
4902
4903 kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
4904 }
4905
4906 static __init void svm_set_cpu_caps(void)
4907 {
4908 kvm_set_cpu_caps();
4909
4910 kvm_caps.supported_xss = 0;
4911
4912
4913 if (nested) {
4914 kvm_cpu_cap_set(X86_FEATURE_SVM);
4915 kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
4916
4917 if (nrips)
4918 kvm_cpu_cap_set(X86_FEATURE_NRIPS);
4919
4920 if (npt_enabled)
4921 kvm_cpu_cap_set(X86_FEATURE_NPT);
4922
4923 if (tsc_scaling)
4924 kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
4925
4926 if (vls)
4927 kvm_cpu_cap_set(X86_FEATURE_V_VMSAVE_VMLOAD);
4928 if (lbrv)
4929 kvm_cpu_cap_set(X86_FEATURE_LBRV);
4930
4931 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))
4932 kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);
4933
4934 if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
4935 kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
4936
4937 if (vgif)
4938 kvm_cpu_cap_set(X86_FEATURE_VGIF);
4939
4940
4941 kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
4942 }
4943
4944
4945 if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
4946 boot_cpu_has(X86_FEATURE_AMD_SSBD))
4947 kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
4948
4949
4950 if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
4951 kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
4952
4953
4954 sev_set_cpu_caps();
4955 }
4956
4957 static __init int svm_hardware_setup(void)
4958 {
4959 int cpu;
4960 struct page *iopm_pages;
4961 void *iopm_va;
4962 int r;
4963 unsigned int order = get_order(IOPM_SIZE);
4964
4965
4966
4967
4968
4969 if (!boot_cpu_has(X86_FEATURE_NX)) {
4970 pr_err_ratelimited("NX (Execute Disable) not supported\n");
4971 return -EOPNOTSUPP;
4972 }
4973 kvm_enable_efer_bits(EFER_NX);
4974
4975 iopm_pages = alloc_pages(GFP_KERNEL, order);
4976
4977 if (!iopm_pages)
4978 return -ENOMEM;
4979
4980 iopm_va = page_address(iopm_pages);
4981 memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
4982 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
4983
4984 init_msrpm_offsets();
4985
4986 kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
4987 XFEATURE_MASK_BNDCSR);
4988
4989 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
4990 kvm_enable_efer_bits(EFER_FFXSR);
4991
4992 if (tsc_scaling) {
4993 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
4994 tsc_scaling = false;
4995 } else {
4996 pr_info("TSC scaling supported\n");
4997 kvm_caps.has_tsc_control = true;
4998 }
4999 }
5000 kvm_caps.max_tsc_scaling_ratio = SVM_TSC_RATIO_MAX;
5001 kvm_caps.tsc_scaling_ratio_frac_bits = 32;
5002
5003 tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
5004
5005
5006 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
5007 pause_filter_count = 0;
5008 pause_filter_thresh = 0;
5009 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
5010 pause_filter_thresh = 0;
5011 }
5012
5013 if (nested) {
5014 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
5015 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
5016 }
5017
5018
5019
5020
5021
5022
5023 if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
5024 npt_enabled = false;
5025
5026 if (!boot_cpu_has(X86_FEATURE_NPT))
5027 npt_enabled = false;
5028
5029
5030 kvm_configure_mmu(npt_enabled, get_npt_level(),
5031 get_npt_level(), PG_LEVEL_1G);
5032 pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
5033
5034
5035 kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
5036
5037 svm_adjust_mmio_mask();
5038
5039
5040
5041
5042
5043 sev_hardware_setup();
5044
5045 svm_hv_hardware_setup();
5046
5047 for_each_possible_cpu(cpu) {
5048 r = svm_cpu_init(cpu);
5049 if (r)
5050 goto err;
5051 }
5052
5053 if (nrips) {
5054 if (!boot_cpu_has(X86_FEATURE_NRIPS))
5055 nrips = false;
5056 }
5057
5058 enable_apicv = avic = avic && avic_hardware_setup(&svm_x86_ops);
5059
5060 if (!enable_apicv) {
5061 svm_x86_ops.vcpu_blocking = NULL;
5062 svm_x86_ops.vcpu_unblocking = NULL;
5063 svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
5064 }
5065
5066 if (vls) {
5067 if (!npt_enabled ||
5068 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
5069 !IS_ENABLED(CONFIG_X86_64)) {
5070 vls = false;
5071 } else {
5072 pr_info("Virtual VMLOAD VMSAVE supported\n");
5073 }
5074 }
5075
5076 if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
5077 svm_gp_erratum_intercept = false;
5078
5079 if (vgif) {
5080 if (!boot_cpu_has(X86_FEATURE_VGIF))
5081 vgif = false;
5082 else
5083 pr_info("Virtual GIF supported\n");
5084 }
5085
5086 if (lbrv) {
5087 if (!boot_cpu_has(X86_FEATURE_LBRV))
5088 lbrv = false;
5089 else
5090 pr_info("LBR virtualization supported\n");
5091 }
5092
5093 if (!enable_pmu)
5094 pr_info("PMU virtualization is disabled\n");
5095
5096 svm_set_cpu_caps();
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111 allow_smaller_maxphyaddr = !npt_enabled;
5112
5113 return 0;
5114
5115 err:
5116 svm_hardware_unsetup();
5117 return r;
5118 }
5119
5120
5121 static struct kvm_x86_init_ops svm_init_ops __initdata = {
5122 .cpu_has_kvm_support = has_svm,
5123 .disabled_by_bios = is_disabled,
5124 .hardware_setup = svm_hardware_setup,
5125 .check_processor_compatibility = svm_check_processor_compat,
5126
5127 .runtime_ops = &svm_x86_ops,
5128 .pmu_ops = &amd_pmu_ops,
5129 };
5130
5131 static int __init svm_init(void)
5132 {
5133 __unused_size_checks();
5134
5135 return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
5136 __alignof__(struct vcpu_svm), THIS_MODULE);
5137 }
5138
5139 static void __exit svm_exit(void)
5140 {
5141 kvm_exit();
5142 }
5143
5144 module_init(svm_init)
5145 module_exit(svm_exit)