0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #define pr_fmt(fmt) "SVM: " fmt
0016
0017 #include <linux/kvm_types.h>
0018 #include <linux/hashtable.h>
0019 #include <linux/amd-iommu.h>
0020 #include <linux/kvm_host.h>
0021
0022 #include <asm/irq_remapping.h>
0023
0024 #include "trace.h"
0025 #include "lapic.h"
0026 #include "x86.h"
0027 #include "irq.h"
0028 #include "svm.h"
0029
0030
0031 #define AVIC_VCPU_ID_BITS 8
0032 #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
0033
0034 #define AVIC_VM_ID_BITS 24
0035 #define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
0036 #define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
0037
0038 #define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
0039 (y & AVIC_VCPU_ID_MASK))
0040 #define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
0041 #define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
0042
0043 static bool force_avic;
0044 module_param_unsafe(force_avic, bool, 0444);
0045
0046
0047
0048
0049
0050
0051 #define SVM_VM_DATA_HASH_BITS 8
0052 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
0053 static u32 next_vm_id = 0;
0054 static bool next_vm_id_wrapped = 0;
0055 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
0056 enum avic_modes avic_mode;
0057
0058
0059
0060
0061 struct amd_svm_iommu_ir {
0062 struct list_head node;
0063 void *data;
0064 };
0065
0066 static void avic_activate_vmcb(struct vcpu_svm *svm)
0067 {
0068 struct vmcb *vmcb = svm->vmcb01.ptr;
0069
0070 vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
0071 vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
0072
0073 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
0074
0075
0076
0077
0078
0079
0080
0081
0082 if (apic_x2apic_mode(svm->vcpu.arch.apic) &&
0083 avic_mode == AVIC_MODE_X2) {
0084 vmcb->control.int_ctl |= X2APIC_MODE_MASK;
0085 vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
0086
0087 svm_set_x2apic_msr_interception(svm, false);
0088 } else {
0089
0090 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
0091
0092 svm_set_x2apic_msr_interception(svm, true);
0093 }
0094 }
0095
0096 static void avic_deactivate_vmcb(struct vcpu_svm *svm)
0097 {
0098 struct vmcb *vmcb = svm->vmcb01.ptr;
0099
0100 vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
0101 vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
0102
0103
0104
0105
0106
0107 if (is_guest_mode(&svm->vcpu) &&
0108 vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))
0109 return;
0110
0111
0112 svm_set_x2apic_msr_interception(svm, true);
0113 }
0114
0115
0116
0117
0118
0119 int avic_ga_log_notifier(u32 ga_tag)
0120 {
0121 unsigned long flags;
0122 struct kvm_svm *kvm_svm;
0123 struct kvm_vcpu *vcpu = NULL;
0124 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
0125 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
0126
0127 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
0128 trace_kvm_avic_ga_log(vm_id, vcpu_id);
0129
0130 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
0131 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
0132 if (kvm_svm->avic_vm_id != vm_id)
0133 continue;
0134 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
0135 break;
0136 }
0137 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
0138
0139
0140
0141
0142
0143
0144 if (vcpu)
0145 kvm_vcpu_wake_up(vcpu);
0146
0147 return 0;
0148 }
0149
0150 void avic_vm_destroy(struct kvm *kvm)
0151 {
0152 unsigned long flags;
0153 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
0154
0155 if (!enable_apicv)
0156 return;
0157
0158 if (kvm_svm->avic_logical_id_table_page)
0159 __free_page(kvm_svm->avic_logical_id_table_page);
0160 if (kvm_svm->avic_physical_id_table_page)
0161 __free_page(kvm_svm->avic_physical_id_table_page);
0162
0163 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
0164 hash_del(&kvm_svm->hnode);
0165 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
0166 }
0167
0168 int avic_vm_init(struct kvm *kvm)
0169 {
0170 unsigned long flags;
0171 int err = -ENOMEM;
0172 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
0173 struct kvm_svm *k2;
0174 struct page *p_page;
0175 struct page *l_page;
0176 u32 vm_id;
0177
0178 if (!enable_apicv)
0179 return 0;
0180
0181
0182 p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
0183 if (!p_page)
0184 goto free_avic;
0185
0186 kvm_svm->avic_physical_id_table_page = p_page;
0187
0188
0189 l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
0190 if (!l_page)
0191 goto free_avic;
0192
0193 kvm_svm->avic_logical_id_table_page = l_page;
0194
0195 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
0196 again:
0197 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
0198 if (vm_id == 0) {
0199 next_vm_id_wrapped = 1;
0200 goto again;
0201 }
0202
0203 if (next_vm_id_wrapped) {
0204 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
0205 if (k2->avic_vm_id == vm_id)
0206 goto again;
0207 }
0208 }
0209 kvm_svm->avic_vm_id = vm_id;
0210 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
0211 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
0212
0213 return 0;
0214
0215 free_avic:
0216 avic_vm_destroy(kvm);
0217 return err;
0218 }
0219
0220 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
0221 {
0222 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
0223 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
0224 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
0225 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
0226
0227 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
0228 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
0229 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
0230 vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
0231
0232 if (kvm_apicv_activated(svm->vcpu.kvm))
0233 avic_activate_vmcb(svm);
0234 else
0235 avic_deactivate_vmcb(svm);
0236 }
0237
0238 static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
0239 unsigned int index)
0240 {
0241 u64 *avic_physical_id_table;
0242 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
0243
0244 if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) ||
0245 (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID))
0246 return NULL;
0247
0248 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
0249
0250 return &avic_physical_id_table[index];
0251 }
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261 static int avic_alloc_access_page(struct kvm *kvm)
0262 {
0263 void __user *ret;
0264 int r = 0;
0265
0266 mutex_lock(&kvm->slots_lock);
0267
0268 if (kvm->arch.apic_access_memslot_enabled)
0269 goto out;
0270
0271 ret = __x86_set_memory_region(kvm,
0272 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
0273 APIC_DEFAULT_PHYS_BASE,
0274 PAGE_SIZE);
0275 if (IS_ERR(ret)) {
0276 r = PTR_ERR(ret);
0277 goto out;
0278 }
0279
0280 kvm->arch.apic_access_memslot_enabled = true;
0281 out:
0282 mutex_unlock(&kvm->slots_lock);
0283 return r;
0284 }
0285
0286 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
0287 {
0288 u64 *entry, new_entry;
0289 int id = vcpu->vcpu_id;
0290 struct vcpu_svm *svm = to_svm(vcpu);
0291
0292 if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||
0293 (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID))
0294 return -EINVAL;
0295
0296 if (!vcpu->arch.apic->regs)
0297 return -EINVAL;
0298
0299 if (kvm_apicv_activated(vcpu->kvm)) {
0300 int ret;
0301
0302 ret = avic_alloc_access_page(vcpu->kvm);
0303 if (ret)
0304 return ret;
0305 }
0306
0307 svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
0308
0309
0310 entry = avic_get_physical_id_entry(vcpu, id);
0311 if (!entry)
0312 return -EINVAL;
0313
0314 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
0315 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
0316 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
0317 WRITE_ONCE(*entry, new_entry);
0318
0319 svm->avic_physical_id_cache = entry;
0320
0321 return 0;
0322 }
0323
0324 void avic_ring_doorbell(struct kvm_vcpu *vcpu)
0325 {
0326
0327
0328
0329
0330
0331
0332
0333 int cpu = READ_ONCE(vcpu->cpu);
0334
0335 if (cpu != get_cpu()) {
0336 wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
0337 trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu));
0338 }
0339 put_cpu();
0340 }
0341
0342
0343
0344
0345
0346 static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
0347 u32 icrl, u32 icrh, u32 index)
0348 {
0349 u32 l1_physical_id, dest;
0350 struct kvm_vcpu *target_vcpu;
0351 int dest_mode = icrl & APIC_DEST_MASK;
0352 int shorthand = icrl & APIC_SHORT_MASK;
0353 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
0354
0355 if (shorthand != APIC_DEST_NOSHORT)
0356 return -EINVAL;
0357
0358 if (apic_x2apic_mode(source))
0359 dest = icrh;
0360 else
0361 dest = GET_XAPIC_DEST_FIELD(icrh);
0362
0363 if (dest_mode == APIC_DEST_PHYSICAL) {
0364
0365 if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
0366 return -EINVAL;
0367 if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
0368 return -EINVAL;
0369
0370 l1_physical_id = dest;
0371
0372 if (WARN_ON_ONCE(l1_physical_id != index))
0373 return -EINVAL;
0374
0375 } else {
0376 u32 bitmap, cluster;
0377 int logid_index;
0378
0379 if (apic_x2apic_mode(source)) {
0380
0381 bitmap = dest & 0xFFFF0000;
0382 cluster = (dest >> 16) << 4;
0383 } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
0384
0385 bitmap = dest;
0386 cluster = 0;
0387 } else {
0388
0389 bitmap = dest & 0xF;
0390 cluster = (dest >> 4) << 2;
0391 }
0392
0393 if (unlikely(!bitmap))
0394
0395 return 0;
0396
0397 if (!is_power_of_2(bitmap))
0398
0399 return -EINVAL;
0400
0401 logid_index = cluster + __ffs(bitmap);
0402
0403 if (!apic_x2apic_mode(source)) {
0404 u32 *avic_logical_id_table =
0405 page_address(kvm_svm->avic_logical_id_table_page);
0406
0407 u32 logid_entry = avic_logical_id_table[logid_index];
0408
0409 if (WARN_ON_ONCE(index != logid_index))
0410 return -EINVAL;
0411
0412
0413 if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
0414 return 0;
0415
0416 l1_physical_id = logid_entry &
0417 AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
0418 } else {
0419
0420
0421
0422
0423 int cluster = (icrh & 0xffff0000) >> 16;
0424 int apic = ffs(icrh & 0xffff) - 1;
0425
0426
0427
0428
0429
0430
0431 if (apic < 0 || icrh != (1 << apic))
0432 return -EINVAL;
0433
0434 l1_physical_id = (cluster << 4) + apic;
0435 }
0436 }
0437
0438 target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
0439 if (unlikely(!target_vcpu))
0440
0441 return 0;
0442
0443 target_vcpu->arch.apic->irr_pending = true;
0444 svm_complete_interrupt_delivery(target_vcpu,
0445 icrl & APIC_MODE_MASK,
0446 icrl & APIC_INT_LEVELTRIG,
0447 icrl & APIC_VECTOR_MASK);
0448 return 0;
0449 }
0450
0451 static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
0452 u32 icrl, u32 icrh, u32 index)
0453 {
0454 unsigned long i;
0455 struct kvm_vcpu *vcpu;
0456
0457 if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index))
0458 return;
0459
0460 trace_kvm_avic_kick_vcpu_slowpath(icrh, icrl, index);
0461
0462
0463
0464
0465
0466
0467
0468 kvm_for_each_vcpu(i, vcpu, kvm) {
0469 u32 dest;
0470
0471 if (apic_x2apic_mode(vcpu->arch.apic))
0472 dest = icrh;
0473 else
0474 dest = GET_XAPIC_DEST_FIELD(icrh);
0475
0476 if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
0477 dest, icrl & APIC_DEST_MASK)) {
0478 vcpu->arch.apic->irr_pending = true;
0479 svm_complete_interrupt_delivery(vcpu,
0480 icrl & APIC_MODE_MASK,
0481 icrl & APIC_INT_LEVELTRIG,
0482 icrl & APIC_VECTOR_MASK);
0483 }
0484 }
0485 }
0486
0487 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
0488 {
0489 struct vcpu_svm *svm = to_svm(vcpu);
0490 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
0491 u32 icrl = svm->vmcb->control.exit_info_1;
0492 u32 id = svm->vmcb->control.exit_info_2 >> 32;
0493 u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
0494 struct kvm_lapic *apic = vcpu->arch.apic;
0495
0496 trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
0497
0498 switch (id) {
0499 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509 if (icrl & APIC_ICR_BUSY)
0510 kvm_apic_write_nodecode(vcpu, APIC_ICR);
0511 else
0512 kvm_apic_send_ipi(apic, icrl, icrh);
0513 break;
0514 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
0515
0516
0517
0518
0519
0520 avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
0521 break;
0522 case AVIC_IPI_FAILURE_INVALID_TARGET:
0523 break;
0524 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
0525 WARN_ONCE(1, "Invalid backing page\n");
0526 break;
0527 default:
0528 pr_err("Unknown IPI interception\n");
0529 }
0530
0531 return 1;
0532 }
0533
0534 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
0535 {
0536 if (is_guest_mode(vcpu))
0537 return APICV_INHIBIT_REASON_NESTED;
0538 return 0;
0539 }
0540
0541 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
0542 {
0543 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
0544 int index;
0545 u32 *logical_apic_id_table;
0546 int dlid = GET_APIC_LOGICAL_ID(ldr);
0547
0548 if (!dlid)
0549 return NULL;
0550
0551 if (flat) {
0552 index = ffs(dlid) - 1;
0553 if (index > 7)
0554 return NULL;
0555 } else {
0556 int cluster = (dlid & 0xf0) >> 4;
0557 int apic = ffs(dlid & 0x0f) - 1;
0558
0559 if ((apic < 0) || (apic > 7) ||
0560 (cluster >= 0xf))
0561 return NULL;
0562 index = (cluster << 2) + apic;
0563 }
0564
0565 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
0566
0567 return &logical_apic_id_table[index];
0568 }
0569
0570 static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
0571 {
0572 bool flat;
0573 u32 *entry, new_entry;
0574
0575 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
0576 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
0577 if (!entry)
0578 return -EINVAL;
0579
0580 new_entry = READ_ONCE(*entry);
0581 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
0582 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
0583 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
0584 WRITE_ONCE(*entry, new_entry);
0585
0586 return 0;
0587 }
0588
0589 static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
0590 {
0591 struct vcpu_svm *svm = to_svm(vcpu);
0592 bool flat = svm->dfr_reg == APIC_DFR_FLAT;
0593 u32 *entry;
0594
0595
0596 if (apic_x2apic_mode(vcpu->arch.apic))
0597 return;
0598
0599 entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
0600 if (entry)
0601 clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
0602 }
0603
0604 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
0605 {
0606 int ret = 0;
0607 struct vcpu_svm *svm = to_svm(vcpu);
0608 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
0609 u32 id = kvm_xapic_id(vcpu->arch.apic);
0610
0611
0612 if (apic_x2apic_mode(vcpu->arch.apic))
0613 return 0;
0614
0615 if (ldr == svm->ldr_reg)
0616 return 0;
0617
0618 avic_invalidate_logical_id_entry(vcpu);
0619
0620 if (ldr)
0621 ret = avic_ldr_write(vcpu, id, ldr);
0622
0623 if (!ret)
0624 svm->ldr_reg = ldr;
0625
0626 return ret;
0627 }
0628
0629 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
0630 {
0631 struct vcpu_svm *svm = to_svm(vcpu);
0632 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
0633
0634 if (svm->dfr_reg == dfr)
0635 return;
0636
0637 avic_invalidate_logical_id_entry(vcpu);
0638 svm->dfr_reg = dfr;
0639 }
0640
0641 static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
0642 {
0643 u32 offset = to_svm(vcpu)->vmcb->control.exit_info_1 &
0644 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
0645
0646 switch (offset) {
0647 case APIC_LDR:
0648 if (avic_handle_ldr_update(vcpu))
0649 return 0;
0650 break;
0651 case APIC_DFR:
0652 avic_handle_dfr_update(vcpu);
0653 break;
0654 default:
0655 break;
0656 }
0657
0658 kvm_apic_write_nodecode(vcpu, offset);
0659 return 1;
0660 }
0661
0662 static bool is_avic_unaccelerated_access_trap(u32 offset)
0663 {
0664 bool ret = false;
0665
0666 switch (offset) {
0667 case APIC_ID:
0668 case APIC_EOI:
0669 case APIC_RRR:
0670 case APIC_LDR:
0671 case APIC_DFR:
0672 case APIC_SPIV:
0673 case APIC_ESR:
0674 case APIC_ICR:
0675 case APIC_LVTT:
0676 case APIC_LVTTHMR:
0677 case APIC_LVTPC:
0678 case APIC_LVT0:
0679 case APIC_LVT1:
0680 case APIC_LVTERR:
0681 case APIC_TMICT:
0682 case APIC_TDCR:
0683 ret = true;
0684 break;
0685 default:
0686 break;
0687 }
0688 return ret;
0689 }
0690
0691 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
0692 {
0693 struct vcpu_svm *svm = to_svm(vcpu);
0694 int ret = 0;
0695 u32 offset = svm->vmcb->control.exit_info_1 &
0696 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
0697 u32 vector = svm->vmcb->control.exit_info_2 &
0698 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
0699 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
0700 AVIC_UNACCEL_ACCESS_WRITE_MASK;
0701 bool trap = is_avic_unaccelerated_access_trap(offset);
0702
0703 trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
0704 trap, write, vector);
0705 if (trap) {
0706
0707 WARN_ONCE(!write, "svm: Handling trap read.\n");
0708 ret = avic_unaccel_trap_write(vcpu);
0709 } else {
0710
0711 ret = kvm_emulate_instruction(vcpu, 0);
0712 }
0713
0714 return ret;
0715 }
0716
0717 int avic_init_vcpu(struct vcpu_svm *svm)
0718 {
0719 int ret;
0720 struct kvm_vcpu *vcpu = &svm->vcpu;
0721
0722 if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
0723 return 0;
0724
0725 ret = avic_init_backing_page(vcpu);
0726 if (ret)
0727 return ret;
0728
0729 INIT_LIST_HEAD(&svm->ir_list);
0730 spin_lock_init(&svm->ir_list_lock);
0731 svm->dfr_reg = APIC_DFR_FLAT;
0732
0733 return ret;
0734 }
0735
0736 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
0737 {
0738 avic_handle_dfr_update(vcpu);
0739 avic_handle_ldr_update(vcpu);
0740 }
0741
0742 void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
0743 {
0744 if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
0745 return;
0746
0747 if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) {
0748 WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id);
0749 return;
0750 }
0751 avic_refresh_apicv_exec_ctrl(vcpu);
0752 }
0753
0754 static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
0755 {
0756 int ret = 0;
0757 unsigned long flags;
0758 struct amd_svm_iommu_ir *ir;
0759 struct vcpu_svm *svm = to_svm(vcpu);
0760
0761 if (!kvm_arch_has_assigned_device(vcpu->kvm))
0762 return 0;
0763
0764
0765
0766
0767
0768 spin_lock_irqsave(&svm->ir_list_lock, flags);
0769
0770 if (list_empty(&svm->ir_list))
0771 goto out;
0772
0773 list_for_each_entry(ir, &svm->ir_list, node) {
0774 if (activate)
0775 ret = amd_iommu_activate_guest_mode(ir->data);
0776 else
0777 ret = amd_iommu_deactivate_guest_mode(ir->data);
0778 if (ret)
0779 break;
0780 }
0781 out:
0782 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
0783 return ret;
0784 }
0785
0786 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
0787 {
0788 unsigned long flags;
0789 struct amd_svm_iommu_ir *cur;
0790
0791 spin_lock_irqsave(&svm->ir_list_lock, flags);
0792 list_for_each_entry(cur, &svm->ir_list, node) {
0793 if (cur->data != pi->ir_data)
0794 continue;
0795 list_del(&cur->node);
0796 kfree(cur);
0797 break;
0798 }
0799 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
0800 }
0801
0802 static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
0803 {
0804 int ret = 0;
0805 unsigned long flags;
0806 struct amd_svm_iommu_ir *ir;
0807
0808
0809
0810
0811
0812
0813 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
0814 struct kvm *kvm = svm->vcpu.kvm;
0815 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
0816 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
0817 struct vcpu_svm *prev_svm;
0818
0819 if (!prev_vcpu) {
0820 ret = -EINVAL;
0821 goto out;
0822 }
0823
0824 prev_svm = to_svm(prev_vcpu);
0825 svm_ir_list_del(prev_svm, pi);
0826 }
0827
0828
0829
0830
0831
0832 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
0833 if (!ir) {
0834 ret = -ENOMEM;
0835 goto out;
0836 }
0837 ir->data = pi->ir_data;
0838
0839 spin_lock_irqsave(&svm->ir_list_lock, flags);
0840 list_add(&ir->node, &svm->ir_list);
0841 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
0842 out:
0843 return ret;
0844 }
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856
0857 static int
0858 get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
0859 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
0860 {
0861 struct kvm_lapic_irq irq;
0862 struct kvm_vcpu *vcpu = NULL;
0863
0864 kvm_set_msi_irq(kvm, e, &irq);
0865
0866 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
0867 !kvm_irq_is_postable(&irq)) {
0868 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
0869 __func__, irq.vector);
0870 return -1;
0871 }
0872
0873 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
0874 irq.vector);
0875 *svm = to_svm(vcpu);
0876 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
0877 vcpu_info->vector = irq.vector;
0878
0879 return 0;
0880 }
0881
0882
0883
0884
0885
0886
0887
0888
0889
0890
0891 int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
0892 uint32_t guest_irq, bool set)
0893 {
0894 struct kvm_kernel_irq_routing_entry *e;
0895 struct kvm_irq_routing_table *irq_rt;
0896 int idx, ret = 0;
0897
0898 if (!kvm_arch_has_assigned_device(kvm) ||
0899 !irq_remapping_cap(IRQ_POSTING_CAP))
0900 return 0;
0901
0902 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
0903 __func__, host_irq, guest_irq, set);
0904
0905 idx = srcu_read_lock(&kvm->irq_srcu);
0906 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
0907
0908 if (guest_irq >= irq_rt->nr_rt_entries ||
0909 hlist_empty(&irq_rt->map[guest_irq])) {
0910 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
0911 guest_irq, irq_rt->nr_rt_entries);
0912 goto out;
0913 }
0914
0915 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
0916 struct vcpu_data vcpu_info;
0917 struct vcpu_svm *svm = NULL;
0918
0919 if (e->type != KVM_IRQ_ROUTING_MSI)
0920 continue;
0921
0922
0923
0924
0925
0926
0927
0928
0929 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
0930 kvm_vcpu_apicv_active(&svm->vcpu)) {
0931 struct amd_iommu_pi_data pi;
0932
0933
0934 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
0935 AVIC_HPA_MASK);
0936 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
0937 svm->vcpu.vcpu_id);
0938 pi.is_guest_mode = true;
0939 pi.vcpu_data = &vcpu_info;
0940 ret = irq_set_vcpu_affinity(host_irq, &pi);
0941
0942
0943
0944
0945
0946
0947
0948
0949 if (!ret && pi.is_guest_mode)
0950 svm_ir_list_add(svm, &pi);
0951 } else {
0952
0953 struct amd_iommu_pi_data pi;
0954
0955
0956
0957
0958
0959
0960 pi.prev_ga_tag = 0;
0961 pi.is_guest_mode = false;
0962 ret = irq_set_vcpu_affinity(host_irq, &pi);
0963
0964
0965
0966
0967
0968
0969
0970 if (!ret && pi.prev_ga_tag) {
0971 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
0972 struct kvm_vcpu *vcpu;
0973
0974 vcpu = kvm_get_vcpu_by_id(kvm, id);
0975 if (vcpu)
0976 svm_ir_list_del(to_svm(vcpu), &pi);
0977 }
0978 }
0979
0980 if (!ret && svm) {
0981 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
0982 e->gsi, vcpu_info.vector,
0983 vcpu_info.pi_desc_addr, set);
0984 }
0985
0986 if (ret < 0) {
0987 pr_err("%s: failed to update PI IRTE\n", __func__);
0988 goto out;
0989 }
0990 }
0991
0992 ret = 0;
0993 out:
0994 srcu_read_unlock(&kvm->irq_srcu, idx);
0995 return ret;
0996 }
0997
0998 bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
0999 {
1000 ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
1001 BIT(APICV_INHIBIT_REASON_ABSENT) |
1002 BIT(APICV_INHIBIT_REASON_HYPERV) |
1003 BIT(APICV_INHIBIT_REASON_NESTED) |
1004 BIT(APICV_INHIBIT_REASON_IRQWIN) |
1005 BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
1006 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
1007 BIT(APICV_INHIBIT_REASON_SEV) |
1008 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
1009 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
1010
1011 return supported & BIT(reason);
1012 }
1013
1014
1015 static inline int
1016 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
1017 {
1018 int ret = 0;
1019 unsigned long flags;
1020 struct amd_svm_iommu_ir *ir;
1021 struct vcpu_svm *svm = to_svm(vcpu);
1022
1023 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1024 return 0;
1025
1026
1027
1028
1029
1030 spin_lock_irqsave(&svm->ir_list_lock, flags);
1031
1032 if (list_empty(&svm->ir_list))
1033 goto out;
1034
1035 list_for_each_entry(ir, &svm->ir_list, node) {
1036 ret = amd_iommu_update_ga(cpu, r, ir->data);
1037 if (ret)
1038 break;
1039 }
1040 out:
1041 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
1042 return ret;
1043 }
1044
1045 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1046 {
1047 u64 entry;
1048 int h_physical_id = kvm_cpu_get_apicid(cpu);
1049 struct vcpu_svm *svm = to_svm(vcpu);
1050
1051 lockdep_assert_preemption_disabled();
1052
1053 if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
1054 return;
1055
1056
1057
1058
1059
1060
1061
1062
1063 if (kvm_vcpu_is_blocking(vcpu))
1064 return;
1065
1066 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1067
1068 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
1069 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
1070 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1071
1072 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1073 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
1074 }
1075
1076 void avic_vcpu_put(struct kvm_vcpu *vcpu)
1077 {
1078 u64 entry;
1079 struct vcpu_svm *svm = to_svm(vcpu);
1080
1081 lockdep_assert_preemption_disabled();
1082
1083 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1084
1085
1086 if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
1087 return;
1088
1089 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1090
1091 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1092 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1093 }
1094
1095
1096 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
1097 {
1098 struct vcpu_svm *svm = to_svm(vcpu);
1099 struct vmcb *vmcb = svm->vmcb01.ptr;
1100 bool activated = kvm_vcpu_apicv_active(vcpu);
1101
1102 if (!enable_apicv)
1103 return;
1104
1105 if (activated) {
1106
1107
1108
1109
1110
1111
1112
1113 avic_apicv_post_state_restore(vcpu);
1114 avic_activate_vmcb(svm);
1115 } else {
1116 avic_deactivate_vmcb(svm);
1117 }
1118 vmcb_mark_dirty(vmcb, VMCB_AVIC);
1119
1120 if (activated)
1121 avic_vcpu_load(vcpu, vcpu->cpu);
1122 else
1123 avic_vcpu_put(vcpu);
1124
1125 avic_set_pi_irte_mode(vcpu, activated);
1126 }
1127
1128 void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
1129 {
1130 if (!kvm_vcpu_apicv_active(vcpu))
1131 return;
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146 avic_vcpu_put(vcpu);
1147 }
1148
1149 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
1150 {
1151 if (!kvm_vcpu_apicv_active(vcpu))
1152 return;
1153
1154 avic_vcpu_load(vcpu, vcpu->cpu);
1155 }
1156
1157
1158
1159
1160
1161
1162
1163 bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
1164 {
1165 if (!npt_enabled)
1166 return false;
1167
1168 if (boot_cpu_has(X86_FEATURE_AVIC)) {
1169 avic_mode = AVIC_MODE_X1;
1170 pr_info("AVIC enabled\n");
1171 } else if (force_avic) {
1172
1173
1174
1175
1176 avic_mode = AVIC_MODE_X1;
1177 pr_warn("AVIC is not supported in CPUID but force enabled");
1178 pr_warn("Your system might crash and burn");
1179 }
1180
1181
1182 if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
1183 if (avic_mode == AVIC_MODE_X1) {
1184 avic_mode = AVIC_MODE_X2;
1185 pr_info("x2AVIC enabled\n");
1186 } else {
1187 pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
1188 pr_warn(FW_BUG "Try enable AVIC using force_avic option");
1189 }
1190 }
1191
1192 if (avic_mode != AVIC_MODE_NONE)
1193 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1194
1195 return !!avic_mode;
1196 }