0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #define pr_fmt(fmt) "kvm-guest: " fmt
0011
0012 #include <linux/context_tracking.h>
0013 #include <linux/init.h>
0014 #include <linux/irq.h>
0015 #include <linux/kernel.h>
0016 #include <linux/kvm_para.h>
0017 #include <linux/cpu.h>
0018 #include <linux/mm.h>
0019 #include <linux/highmem.h>
0020 #include <linux/hardirq.h>
0021 #include <linux/notifier.h>
0022 #include <linux/reboot.h>
0023 #include <linux/hash.h>
0024 #include <linux/sched.h>
0025 #include <linux/slab.h>
0026 #include <linux/kprobes.h>
0027 #include <linux/nmi.h>
0028 #include <linux/swait.h>
0029 #include <linux/syscore_ops.h>
0030 #include <linux/cc_platform.h>
0031 #include <linux/efi.h>
0032 #include <asm/timer.h>
0033 #include <asm/cpu.h>
0034 #include <asm/traps.h>
0035 #include <asm/desc.h>
0036 #include <asm/tlbflush.h>
0037 #include <asm/apic.h>
0038 #include <asm/apicdef.h>
0039 #include <asm/hypervisor.h>
0040 #include <asm/tlb.h>
0041 #include <asm/cpuidle_haltpoll.h>
0042 #include <asm/ptrace.h>
0043 #include <asm/reboot.h>
0044 #include <asm/svm.h>
0045 #include <asm/e820/api.h>
0046
0047 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
0048
0049 static int kvmapf = 1;
0050
0051 static int __init parse_no_kvmapf(char *arg)
0052 {
0053 kvmapf = 0;
0054 return 0;
0055 }
0056
0057 early_param("no-kvmapf", parse_no_kvmapf);
0058
0059 static int steal_acc = 1;
0060 static int __init parse_no_stealacc(char *arg)
0061 {
0062 steal_acc = 0;
0063 return 0;
0064 }
0065
0066 early_param("no-steal-acc", parse_no_stealacc);
0067
0068 static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
0069 DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
0070 static int has_steal_clock = 0;
0071
0072 static int has_guest_poll = 0;
0073
0074
0075
0076 static void kvm_io_delay(void)
0077 {
0078 }
0079
0080 #define KVM_TASK_SLEEP_HASHBITS 8
0081 #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
0082
0083 struct kvm_task_sleep_node {
0084 struct hlist_node link;
0085 struct swait_queue_head wq;
0086 u32 token;
0087 int cpu;
0088 };
0089
0090 static struct kvm_task_sleep_head {
0091 raw_spinlock_t lock;
0092 struct hlist_head list;
0093 } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
0094
0095 static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
0096 u32 token)
0097 {
0098 struct hlist_node *p;
0099
0100 hlist_for_each(p, &b->list) {
0101 struct kvm_task_sleep_node *n =
0102 hlist_entry(p, typeof(*n), link);
0103 if (n->token == token)
0104 return n;
0105 }
0106
0107 return NULL;
0108 }
0109
0110 static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
0111 {
0112 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
0113 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
0114 struct kvm_task_sleep_node *e;
0115
0116 raw_spin_lock(&b->lock);
0117 e = _find_apf_task(b, token);
0118 if (e) {
0119
0120 hlist_del(&e->link);
0121 raw_spin_unlock(&b->lock);
0122 kfree(e);
0123 return false;
0124 }
0125
0126 n->token = token;
0127 n->cpu = smp_processor_id();
0128 init_swait_queue_head(&n->wq);
0129 hlist_add_head(&n->link, &b->list);
0130 raw_spin_unlock(&b->lock);
0131 return true;
0132 }
0133
0134
0135
0136
0137
0138
0139
0140
0141 void kvm_async_pf_task_wait_schedule(u32 token)
0142 {
0143 struct kvm_task_sleep_node n;
0144 DECLARE_SWAITQUEUE(wait);
0145
0146 lockdep_assert_irqs_disabled();
0147
0148 if (!kvm_async_pf_queue_task(token, &n))
0149 return;
0150
0151 for (;;) {
0152 prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
0153 if (hlist_unhashed(&n.link))
0154 break;
0155
0156 local_irq_enable();
0157 schedule();
0158 local_irq_disable();
0159 }
0160 finish_swait(&n.wq, &wait);
0161 }
0162 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
0163
0164 static void apf_task_wake_one(struct kvm_task_sleep_node *n)
0165 {
0166 hlist_del_init(&n->link);
0167 if (swq_has_sleeper(&n->wq))
0168 swake_up_one(&n->wq);
0169 }
0170
0171 static void apf_task_wake_all(void)
0172 {
0173 int i;
0174
0175 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
0176 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
0177 struct kvm_task_sleep_node *n;
0178 struct hlist_node *p, *next;
0179
0180 raw_spin_lock(&b->lock);
0181 hlist_for_each_safe(p, next, &b->list) {
0182 n = hlist_entry(p, typeof(*n), link);
0183 if (n->cpu == smp_processor_id())
0184 apf_task_wake_one(n);
0185 }
0186 raw_spin_unlock(&b->lock);
0187 }
0188 }
0189
0190 void kvm_async_pf_task_wake(u32 token)
0191 {
0192 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
0193 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
0194 struct kvm_task_sleep_node *n, *dummy = NULL;
0195
0196 if (token == ~0) {
0197 apf_task_wake_all();
0198 return;
0199 }
0200
0201 again:
0202 raw_spin_lock(&b->lock);
0203 n = _find_apf_task(b, token);
0204 if (!n) {
0205
0206
0207
0208
0209
0210 if (!dummy) {
0211 raw_spin_unlock(&b->lock);
0212 dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
0213
0214
0215
0216
0217
0218
0219 if (!dummy)
0220 cpu_relax();
0221
0222
0223
0224
0225
0226 goto again;
0227 }
0228 dummy->token = token;
0229 dummy->cpu = smp_processor_id();
0230 init_swait_queue_head(&dummy->wq);
0231 hlist_add_head(&dummy->link, &b->list);
0232 dummy = NULL;
0233 } else {
0234 apf_task_wake_one(n);
0235 }
0236 raw_spin_unlock(&b->lock);
0237
0238
0239 kfree(dummy);
0240 }
0241 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
0242
0243 noinstr u32 kvm_read_and_reset_apf_flags(void)
0244 {
0245 u32 flags = 0;
0246
0247 if (__this_cpu_read(apf_reason.enabled)) {
0248 flags = __this_cpu_read(apf_reason.flags);
0249 __this_cpu_write(apf_reason.flags, 0);
0250 }
0251
0252 return flags;
0253 }
0254 EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
0255
0256 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
0257 {
0258 u32 flags = kvm_read_and_reset_apf_flags();
0259 irqentry_state_t state;
0260
0261 if (!flags)
0262 return false;
0263
0264 state = irqentry_enter(regs);
0265 instrumentation_begin();
0266
0267
0268
0269
0270
0271
0272 if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
0273 panic("Host injected async #PF in interrupt disabled region\n");
0274
0275 if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
0276 if (unlikely(!(user_mode(regs))))
0277 panic("Host injected async #PF in kernel mode\n");
0278
0279 kvm_async_pf_task_wait_schedule(token);
0280 } else {
0281 WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
0282 }
0283
0284 instrumentation_end();
0285 irqentry_exit(regs, state);
0286 return true;
0287 }
0288
0289 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
0290 {
0291 struct pt_regs *old_regs = set_irq_regs(regs);
0292 u32 token;
0293
0294 ack_APIC_irq();
0295
0296 inc_irq_stat(irq_hv_callback_count);
0297
0298 if (__this_cpu_read(apf_reason.enabled)) {
0299 token = __this_cpu_read(apf_reason.token);
0300 kvm_async_pf_task_wake(token);
0301 __this_cpu_write(apf_reason.token, 0);
0302 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
0303 }
0304
0305 set_irq_regs(old_regs);
0306 }
0307
0308 static void __init paravirt_ops_setup(void)
0309 {
0310 pv_info.name = "KVM";
0311
0312 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
0313 pv_ops.cpu.io_delay = kvm_io_delay;
0314
0315 #ifdef CONFIG_X86_IO_APIC
0316 no_timer_check = 1;
0317 #endif
0318 }
0319
0320 static void kvm_register_steal_time(void)
0321 {
0322 int cpu = smp_processor_id();
0323 struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
0324
0325 if (!has_steal_clock)
0326 return;
0327
0328 wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
0329 pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
0330 (unsigned long long) slow_virt_to_phys(st));
0331 }
0332
0333 static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
0334
0335 static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
0336 {
0337
0338
0339
0340
0341
0342
0343
0344 if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
0345 return;
0346 apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
0347 }
0348
0349 static void kvm_guest_cpu_init(void)
0350 {
0351 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
0352 u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
0353
0354 WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
0355
0356 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
0357 pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
0358
0359 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
0360 pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
0361
0362 wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
0363
0364 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
0365 __this_cpu_write(apf_reason.enabled, 1);
0366 pr_debug("setup async PF for cpu %d\n", smp_processor_id());
0367 }
0368
0369 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
0370 unsigned long pa;
0371
0372
0373 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
0374 __this_cpu_write(kvm_apic_eoi, 0);
0375 pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
0376 | KVM_MSR_ENABLED;
0377 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
0378 }
0379
0380 if (has_steal_clock)
0381 kvm_register_steal_time();
0382 }
0383
0384 static void kvm_pv_disable_apf(void)
0385 {
0386 if (!__this_cpu_read(apf_reason.enabled))
0387 return;
0388
0389 wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
0390 __this_cpu_write(apf_reason.enabled, 0);
0391
0392 pr_debug("disable async PF for cpu %d\n", smp_processor_id());
0393 }
0394
0395 static void kvm_disable_steal_time(void)
0396 {
0397 if (!has_steal_clock)
0398 return;
0399
0400 wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
0401 }
0402
0403 static u64 kvm_steal_clock(int cpu)
0404 {
0405 u64 steal;
0406 struct kvm_steal_time *src;
0407 int version;
0408
0409 src = &per_cpu(steal_time, cpu);
0410 do {
0411 version = src->version;
0412 virt_rmb();
0413 steal = src->steal;
0414 virt_rmb();
0415 } while ((version & 1) || (version != src->version));
0416
0417 return steal;
0418 }
0419
0420 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
0421 {
0422 early_set_memory_decrypted((unsigned long) ptr, size);
0423 }
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433 static void __init sev_map_percpu_data(void)
0434 {
0435 int cpu;
0436
0437 if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
0438 return;
0439
0440 for_each_possible_cpu(cpu) {
0441 __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
0442 __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
0443 __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
0444 }
0445 }
0446
0447 static void kvm_guest_cpu_offline(bool shutdown)
0448 {
0449 kvm_disable_steal_time();
0450 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
0451 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
0452 if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
0453 wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
0454 kvm_pv_disable_apf();
0455 if (!shutdown)
0456 apf_task_wake_all();
0457 kvmclock_disable();
0458 }
0459
0460 static int kvm_cpu_online(unsigned int cpu)
0461 {
0462 unsigned long flags;
0463
0464 local_irq_save(flags);
0465 kvm_guest_cpu_init();
0466 local_irq_restore(flags);
0467 return 0;
0468 }
0469
0470 #ifdef CONFIG_SMP
0471
0472 static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
0473
0474 static bool pv_tlb_flush_supported(void)
0475 {
0476 return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
0477 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
0478 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
0479 !boot_cpu_has(X86_FEATURE_MWAIT) &&
0480 (num_possible_cpus() != 1));
0481 }
0482
0483 static bool pv_ipi_supported(void)
0484 {
0485 return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
0486 (num_possible_cpus() != 1));
0487 }
0488
0489 static bool pv_sched_yield_supported(void)
0490 {
0491 return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
0492 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
0493 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
0494 !boot_cpu_has(X86_FEATURE_MWAIT) &&
0495 (num_possible_cpus() != 1));
0496 }
0497
0498 #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
0499
0500 static void __send_ipi_mask(const struct cpumask *mask, int vector)
0501 {
0502 unsigned long flags;
0503 int cpu, apic_id, icr;
0504 int min = 0, max = 0;
0505 #ifdef CONFIG_X86_64
0506 __uint128_t ipi_bitmap = 0;
0507 #else
0508 u64 ipi_bitmap = 0;
0509 #endif
0510 long ret;
0511
0512 if (cpumask_empty(mask))
0513 return;
0514
0515 local_irq_save(flags);
0516
0517 switch (vector) {
0518 default:
0519 icr = APIC_DM_FIXED | vector;
0520 break;
0521 case NMI_VECTOR:
0522 icr = APIC_DM_NMI;
0523 break;
0524 }
0525
0526 for_each_cpu(cpu, mask) {
0527 apic_id = per_cpu(x86_cpu_to_apicid, cpu);
0528 if (!ipi_bitmap) {
0529 min = max = apic_id;
0530 } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
0531 ipi_bitmap <<= min - apic_id;
0532 min = apic_id;
0533 } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) {
0534 max = apic_id < max ? max : apic_id;
0535 } else {
0536 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
0537 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
0538 WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
0539 ret);
0540 min = max = apic_id;
0541 ipi_bitmap = 0;
0542 }
0543 __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
0544 }
0545
0546 if (ipi_bitmap) {
0547 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
0548 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
0549 WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
0550 ret);
0551 }
0552
0553 local_irq_restore(flags);
0554 }
0555
0556 static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
0557 {
0558 __send_ipi_mask(mask, vector);
0559 }
0560
0561 static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
0562 {
0563 unsigned int this_cpu = smp_processor_id();
0564 struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
0565 const struct cpumask *local_mask;
0566
0567 cpumask_copy(new_mask, mask);
0568 cpumask_clear_cpu(this_cpu, new_mask);
0569 local_mask = new_mask;
0570 __send_ipi_mask(local_mask, vector);
0571 }
0572
0573 static int __init setup_efi_kvm_sev_migration(void)
0574 {
0575 efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
0576 efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
0577 efi_status_t status;
0578 unsigned long size;
0579 bool enabled;
0580
0581 if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
0582 !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
0583 return 0;
0584
0585 if (!efi_enabled(EFI_BOOT))
0586 return 0;
0587
0588 if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
0589 pr_info("%s : EFI runtime services are not enabled\n", __func__);
0590 return 0;
0591 }
0592
0593 size = sizeof(enabled);
0594
0595
0596 status = efi.get_variable(efi_sev_live_migration_enabled,
0597 &efi_variable_guid, NULL, &size, &enabled);
0598
0599 if (status == EFI_NOT_FOUND) {
0600 pr_info("%s : EFI live migration variable not found\n", __func__);
0601 return 0;
0602 }
0603
0604 if (status != EFI_SUCCESS) {
0605 pr_info("%s : EFI variable retrieval failed\n", __func__);
0606 return 0;
0607 }
0608
0609 if (enabled == 0) {
0610 pr_info("%s: live migration disabled in EFI\n", __func__);
0611 return 0;
0612 }
0613
0614 pr_info("%s : live migration enabled in EFI\n", __func__);
0615 wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
0616
0617 return 1;
0618 }
0619
0620 late_initcall(setup_efi_kvm_sev_migration);
0621
0622
0623
0624
0625 static void kvm_setup_pv_ipi(void)
0626 {
0627 apic->send_IPI_mask = kvm_send_ipi_mask;
0628 apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
0629 pr_info("setup PV IPIs\n");
0630 }
0631
0632 static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
0633 {
0634 int cpu;
0635
0636 native_send_call_func_ipi(mask);
0637
0638
0639 for_each_cpu(cpu, mask) {
0640 if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
0641 kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
0642 break;
0643 }
0644 }
0645 }
0646
0647 static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
0648 const struct flush_tlb_info *info)
0649 {
0650 u8 state;
0651 int cpu;
0652 struct kvm_steal_time *src;
0653 struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
0654
0655 cpumask_copy(flushmask, cpumask);
0656
0657
0658
0659
0660 for_each_cpu(cpu, flushmask) {
0661
0662
0663
0664
0665
0666 src = &per_cpu(steal_time, cpu);
0667 state = READ_ONCE(src->preempted);
0668 if ((state & KVM_VCPU_PREEMPTED)) {
0669 if (try_cmpxchg(&src->preempted, &state,
0670 state | KVM_VCPU_FLUSH_TLB))
0671 __cpumask_clear_cpu(cpu, flushmask);
0672 }
0673 }
0674
0675 native_flush_tlb_multi(flushmask, info);
0676 }
0677
0678 static __init int kvm_alloc_cpumask(void)
0679 {
0680 int cpu;
0681
0682 if (!kvm_para_available() || nopv)
0683 return 0;
0684
0685 if (pv_tlb_flush_supported() || pv_ipi_supported())
0686 for_each_possible_cpu(cpu) {
0687 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
0688 GFP_KERNEL, cpu_to_node(cpu));
0689 }
0690
0691 return 0;
0692 }
0693 arch_initcall(kvm_alloc_cpumask);
0694
0695 static void __init kvm_smp_prepare_boot_cpu(void)
0696 {
0697
0698
0699
0700
0701 sev_map_percpu_data();
0702
0703 kvm_guest_cpu_init();
0704 native_smp_prepare_boot_cpu();
0705 kvm_spinlock_init();
0706 }
0707
0708 static int kvm_cpu_down_prepare(unsigned int cpu)
0709 {
0710 unsigned long flags;
0711
0712 local_irq_save(flags);
0713 kvm_guest_cpu_offline(false);
0714 local_irq_restore(flags);
0715 return 0;
0716 }
0717
0718 #endif
0719
0720 static int kvm_suspend(void)
0721 {
0722 u64 val = 0;
0723
0724 kvm_guest_cpu_offline(false);
0725
0726 #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
0727 if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
0728 rdmsrl(MSR_KVM_POLL_CONTROL, val);
0729 has_guest_poll = !(val & 1);
0730 #endif
0731 return 0;
0732 }
0733
0734 static void kvm_resume(void)
0735 {
0736 kvm_cpu_online(raw_smp_processor_id());
0737
0738 #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
0739 if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
0740 wrmsrl(MSR_KVM_POLL_CONTROL, 0);
0741 #endif
0742 }
0743
0744 static struct syscore_ops kvm_syscore_ops = {
0745 .suspend = kvm_suspend,
0746 .resume = kvm_resume,
0747 };
0748
0749 static void kvm_pv_guest_cpu_reboot(void *unused)
0750 {
0751 kvm_guest_cpu_offline(true);
0752 }
0753
0754 static int kvm_pv_reboot_notify(struct notifier_block *nb,
0755 unsigned long code, void *unused)
0756 {
0757 if (code == SYS_RESTART)
0758 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
0759 return NOTIFY_DONE;
0760 }
0761
0762 static struct notifier_block kvm_pv_reboot_nb = {
0763 .notifier_call = kvm_pv_reboot_notify,
0764 };
0765
0766
0767
0768
0769
0770
0771
0772 #ifdef CONFIG_KEXEC_CORE
0773 static void kvm_crash_shutdown(struct pt_regs *regs)
0774 {
0775 kvm_guest_cpu_offline(true);
0776 native_machine_crash_shutdown(regs);
0777 }
0778 #endif
0779
0780 #if defined(CONFIG_X86_32) || !defined(CONFIG_SMP)
0781 bool __kvm_vcpu_is_preempted(long cpu);
0782
0783 __visible bool __kvm_vcpu_is_preempted(long cpu)
0784 {
0785 struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
0786
0787 return !!(src->preempted & KVM_VCPU_PREEMPTED);
0788 }
0789 PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
0790
0791 #else
0792
0793 #include <asm/asm-offsets.h>
0794
0795 extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
0796
0797
0798
0799
0800
0801 asm(
0802 ".pushsection .text;"
0803 ".global __raw_callee_save___kvm_vcpu_is_preempted;"
0804 ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
0805 "__raw_callee_save___kvm_vcpu_is_preempted:"
0806 ASM_ENDBR
0807 "movq __per_cpu_offset(,%rdi,8), %rax;"
0808 "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
0809 "setne %al;"
0810 ASM_RET
0811 ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
0812 ".popsection");
0813
0814 #endif
0815
0816 static void __init kvm_guest_init(void)
0817 {
0818 int i;
0819
0820 paravirt_ops_setup();
0821 register_reboot_notifier(&kvm_pv_reboot_nb);
0822 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
0823 raw_spin_lock_init(&async_pf_sleepers[i].lock);
0824
0825 if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
0826 has_steal_clock = 1;
0827 static_call_update(pv_steal_clock, kvm_steal_clock);
0828
0829 pv_ops.lock.vcpu_is_preempted =
0830 PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
0831 }
0832
0833 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
0834 apic_set_eoi_write(kvm_guest_apic_eoi_write);
0835
0836 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
0837 static_branch_enable(&kvm_async_pf_enabled);
0838 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
0839 }
0840
0841 #ifdef CONFIG_SMP
0842 if (pv_tlb_flush_supported()) {
0843 pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
0844 pv_ops.mmu.tlb_remove_table = tlb_remove_table;
0845 pr_info("KVM setup pv remote TLB flush\n");
0846 }
0847
0848 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
0849 if (pv_sched_yield_supported()) {
0850 smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
0851 pr_info("setup PV sched yield\n");
0852 }
0853 if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
0854 kvm_cpu_online, kvm_cpu_down_prepare) < 0)
0855 pr_err("failed to install cpu hotplug callbacks\n");
0856 #else
0857 sev_map_percpu_data();
0858 kvm_guest_cpu_init();
0859 #endif
0860
0861 #ifdef CONFIG_KEXEC_CORE
0862 machine_ops.crash_shutdown = kvm_crash_shutdown;
0863 #endif
0864
0865 register_syscore_ops(&kvm_syscore_ops);
0866
0867
0868
0869
0870
0871
0872 hardlockup_detector_disable();
0873 }
0874
0875 static noinline uint32_t __kvm_cpuid_base(void)
0876 {
0877 if (boot_cpu_data.cpuid_level < 0)
0878 return 0;
0879
0880 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
0881 return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
0882
0883 return 0;
0884 }
0885
0886 static inline uint32_t kvm_cpuid_base(void)
0887 {
0888 static int kvm_cpuid_base = -1;
0889
0890 if (kvm_cpuid_base == -1)
0891 kvm_cpuid_base = __kvm_cpuid_base();
0892
0893 return kvm_cpuid_base;
0894 }
0895
0896 bool kvm_para_available(void)
0897 {
0898 return kvm_cpuid_base() != 0;
0899 }
0900 EXPORT_SYMBOL_GPL(kvm_para_available);
0901
0902 unsigned int kvm_arch_para_features(void)
0903 {
0904 return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
0905 }
0906
0907 unsigned int kvm_arch_para_hints(void)
0908 {
0909 return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
0910 }
0911 EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
0912
0913 static uint32_t __init kvm_detect(void)
0914 {
0915 return kvm_cpuid_base();
0916 }
0917
0918 static void __init kvm_apic_init(void)
0919 {
0920 #ifdef CONFIG_SMP
0921 if (pv_ipi_supported())
0922 kvm_setup_pv_ipi();
0923 #endif
0924 }
0925
0926 static bool __init kvm_msi_ext_dest_id(void)
0927 {
0928 return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
0929 }
0930
0931 static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
0932 {
0933 kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
0934 KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
0935 }
0936
0937 static void __init kvm_init_platform(void)
0938 {
0939 if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
0940 kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
0941 unsigned long nr_pages;
0942 int i;
0943
0944 pv_ops.mmu.notify_page_enc_status_changed =
0945 kvm_sev_hc_page_enc_status;
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958 for (i = 0; i < e820_table->nr_entries; i++) {
0959 struct e820_entry *entry = &e820_table->entries[i];
0960
0961 if (entry->type != E820_TYPE_RAM)
0962 continue;
0963
0964 nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
0965
0966 kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
0967 nr_pages,
0968 KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
0969 }
0970
0971
0972
0973
0974
0975 nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
0976 PAGE_SIZE);
0977 early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
0978 nr_pages, 0);
0979
0980
0981
0982
0983 if (!efi_enabled(EFI_BOOT))
0984 wrmsrl(MSR_KVM_MIGRATION_CONTROL,
0985 KVM_MIGRATION_READY);
0986 }
0987 kvmclock_init();
0988 x86_platform.apic_post_init = kvm_apic_init;
0989 }
0990
0991 #if defined(CONFIG_AMD_MEM_ENCRYPT)
0992 static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
0993 {
0994
0995 ghcb_set_rbx(ghcb, regs->bx);
0996 ghcb_set_rcx(ghcb, regs->cx);
0997 ghcb_set_rdx(ghcb, regs->dx);
0998 ghcb_set_rsi(ghcb, regs->si);
0999 }
1000
1001 static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
1002 {
1003
1004 return true;
1005 }
1006 #endif
1007
1008 const __initconst struct hypervisor_x86 x86_hyper_kvm = {
1009 .name = "KVM",
1010 .detect = kvm_detect,
1011 .type = X86_HYPER_KVM,
1012 .init.guest_late_init = kvm_guest_init,
1013 .init.x2apic_available = kvm_para_available,
1014 .init.msi_ext_dest_id = kvm_msi_ext_dest_id,
1015 .init.init_platform = kvm_init_platform,
1016 #if defined(CONFIG_AMD_MEM_ENCRYPT)
1017 .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
1018 .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish,
1019 #endif
1020 };
1021
1022 static __init int activate_jump_labels(void)
1023 {
1024 if (has_steal_clock) {
1025 static_key_slow_inc(¶virt_steal_enabled);
1026 if (steal_acc)
1027 static_key_slow_inc(¶virt_steal_rq_enabled);
1028 }
1029
1030 return 0;
1031 }
1032 arch_initcall(activate_jump_labels);
1033
1034 #ifdef CONFIG_PARAVIRT_SPINLOCKS
1035
1036
1037 static void kvm_kick_cpu(int cpu)
1038 {
1039 int apicid;
1040 unsigned long flags = 0;
1041
1042 apicid = per_cpu(x86_cpu_to_apicid, cpu);
1043 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
1044 }
1045
1046 #include <asm/qspinlock.h>
1047
1048 static void kvm_wait(u8 *ptr, u8 val)
1049 {
1050 if (in_nmi())
1051 return;
1052
1053
1054
1055
1056
1057
1058 if (irqs_disabled()) {
1059 if (READ_ONCE(*ptr) == val)
1060 halt();
1061 } else {
1062 local_irq_disable();
1063
1064
1065 if (READ_ONCE(*ptr) == val)
1066 safe_halt();
1067 else
1068 local_irq_enable();
1069 }
1070 }
1071
1072
1073
1074
1075 void __init kvm_spinlock_init(void)
1076 {
1077
1078
1079
1080
1081
1082 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
1083 pr_info("PV spinlocks disabled, no host support\n");
1084 return;
1085 }
1086
1087
1088
1089
1090
1091 if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
1092 pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
1093 goto out;
1094 }
1095
1096 if (num_possible_cpus() == 1) {
1097 pr_info("PV spinlocks disabled, single CPU\n");
1098 goto out;
1099 }
1100
1101 if (nopvspin) {
1102 pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
1103 goto out;
1104 }
1105
1106 pr_info("PV spinlocks enabled\n");
1107
1108 __pv_init_lock_hash();
1109 pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
1110 pv_ops.lock.queued_spin_unlock =
1111 PV_CALLEE_SAVE(__pv_queued_spin_unlock);
1112 pv_ops.lock.wait = kvm_wait;
1113 pv_ops.lock.kick = kvm_kick_cpu;
1114
1115
1116
1117
1118
1119
1120 out:
1121 static_branch_disable(&virt_spin_lock_key);
1122 }
1123
1124 #endif
1125
1126 #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
1127
1128 static void kvm_disable_host_haltpoll(void *i)
1129 {
1130 wrmsrl(MSR_KVM_POLL_CONTROL, 0);
1131 }
1132
1133 static void kvm_enable_host_haltpoll(void *i)
1134 {
1135 wrmsrl(MSR_KVM_POLL_CONTROL, 1);
1136 }
1137
1138 void arch_haltpoll_enable(unsigned int cpu)
1139 {
1140 if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
1141 pr_err_once("host does not support poll control\n");
1142 pr_err_once("host upgrade recommended\n");
1143 return;
1144 }
1145
1146
1147 smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
1148 }
1149 EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
1150
1151 void arch_haltpoll_disable(unsigned int cpu)
1152 {
1153 if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
1154 return;
1155
1156
1157 smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
1158 }
1159 EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
1160 #endif