0001
0002
0003
0004
0005
0006 #include <linux/cpu.h>
0007 #include <linux/kvm_host.h>
0008 #include <linux/preempt.h>
0009 #include <linux/export.h>
0010 #include <linux/sched.h>
0011 #include <linux/spinlock.h>
0012 #include <linux/init.h>
0013 #include <linux/memblock.h>
0014 #include <linux/sizes.h>
0015 #include <linux/cma.h>
0016 #include <linux/bitops.h>
0017
0018 #include <asm/cputable.h>
0019 #include <asm/interrupt.h>
0020 #include <asm/kvm_ppc.h>
0021 #include <asm/kvm_book3s.h>
0022 #include <asm/machdep.h>
0023 #include <asm/xics.h>
0024 #include <asm/xive.h>
0025 #include <asm/dbell.h>
0026 #include <asm/cputhreads.h>
0027 #include <asm/io.h>
0028 #include <asm/opal.h>
0029 #include <asm/smp.h>
0030
0031 #define KVM_CMA_CHUNK_ORDER 18
0032
0033 #include "book3s_xics.h"
0034 #include "book3s_xive.h"
0035
0036
0037
0038
0039
0040 #define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT)
0041
0042
0043
0044 static unsigned long kvm_cma_resv_ratio = 5;
0045
0046 static struct cma *kvm_cma;
0047
0048 static int __init early_parse_kvm_cma_resv(char *p)
0049 {
0050 pr_debug("%s(%s)\n", __func__, p);
0051 if (!p)
0052 return -EINVAL;
0053 return kstrtoul(p, 0, &kvm_cma_resv_ratio);
0054 }
0055 early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
0056
0057 struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
0058 {
0059 VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
0060
0061 return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
0062 false);
0063 }
0064 EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
0065
0066 void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
0067 {
0068 cma_release(kvm_cma, page, nr_pages);
0069 }
0070 EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080 void __init kvm_cma_reserve(void)
0081 {
0082 unsigned long align_size;
0083 phys_addr_t selected_size;
0084
0085
0086
0087
0088 if (!cpu_has_feature(CPU_FTR_HVMODE))
0089 return;
0090
0091 selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
0092 if (selected_size) {
0093 pr_info("%s: reserving %ld MiB for global area\n", __func__,
0094 (unsigned long)selected_size / SZ_1M);
0095 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
0096 cma_declare_contiguous(0, selected_size, 0, align_size,
0097 KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, "kvm_cma",
0098 &kvm_cma);
0099 }
0100 }
0101
0102
0103
0104
0105
0106
0107
0108
0109 long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
0110 unsigned int yield_count)
0111 {
0112 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
0113 int ptid = local_paca->kvm_hstate.ptid;
0114 int threads_running;
0115 int threads_ceded;
0116 int threads_conferring;
0117 u64 stop = get_tb() + 10 * tb_ticks_per_usec;
0118 int rv = H_SUCCESS;
0119
0120 set_bit(ptid, &vc->conferring_threads);
0121 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
0122 threads_running = VCORE_ENTRY_MAP(vc);
0123 threads_ceded = vc->napping_threads;
0124 threads_conferring = vc->conferring_threads;
0125 if ((threads_ceded | threads_conferring) == threads_running) {
0126 rv = H_TOO_HARD;
0127 break;
0128 }
0129 }
0130 clear_bit(ptid, &vc->conferring_threads);
0131 return rv;
0132 }
0133
0134
0135
0136
0137
0138
0139
0140
0141 static atomic_t hv_vm_count;
0142
0143 void kvm_hv_vm_activated(void)
0144 {
0145 cpus_read_lock();
0146 atomic_inc(&hv_vm_count);
0147 cpus_read_unlock();
0148 }
0149 EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
0150
0151 void kvm_hv_vm_deactivated(void)
0152 {
0153 cpus_read_lock();
0154 atomic_dec(&hv_vm_count);
0155 cpus_read_unlock();
0156 }
0157 EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
0158
0159 bool kvm_hv_mode_active(void)
0160 {
0161 return atomic_read(&hv_vm_count) != 0;
0162 }
0163
0164 extern int hcall_real_table[], hcall_real_table_end[];
0165
0166 int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
0167 {
0168 cmd /= 4;
0169 if (cmd < hcall_real_table_end - hcall_real_table &&
0170 hcall_real_table[cmd])
0171 return 1;
0172
0173 return 0;
0174 }
0175 EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
0176
0177 int kvmppc_hwrng_present(void)
0178 {
0179 return ppc_md.get_random_seed != NULL;
0180 }
0181 EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
0182
0183 long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
0184 {
0185 if (ppc_md.get_random_seed &&
0186 ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
0187 return H_SUCCESS;
0188
0189 return H_HARDWARE;
0190 }
0191
0192
0193
0194
0195
0196
0197 void kvmhv_rm_send_ipi(int cpu)
0198 {
0199 void __iomem *xics_phys;
0200 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
0201
0202
0203 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0204 msg |= get_hard_smp_processor_id(cpu);
0205 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
0206 return;
0207 }
0208
0209
0210 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
0211 cpu_first_thread_sibling(cpu) ==
0212 cpu_first_thread_sibling(raw_smp_processor_id())) {
0213 msg |= cpu_thread_in_core(cpu);
0214 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
0215 return;
0216 }
0217
0218
0219 if (WARN_ON_ONCE(xics_on_xive()))
0220 return;
0221
0222
0223 xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
0224 if (xics_phys)
0225 __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
0226 else
0227 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
0228 }
0229
0230
0231
0232
0233
0234 static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
0235 {
0236 int cpu = vc->pcpu;
0237
0238
0239 smp_mb();
0240 for (; active; active >>= 1, ++cpu)
0241 if (active & 1)
0242 kvmhv_rm_send_ipi(cpu);
0243 }
0244
0245 void kvmhv_commence_exit(int trap)
0246 {
0247 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
0248 int ptid = local_paca->kvm_hstate.ptid;
0249 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
0250 int me, ee, i;
0251
0252
0253
0254 me = 0x100 << ptid;
0255 do {
0256 ee = vc->entry_exit_map;
0257 } while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
0258
0259
0260 if ((ee >> 8) != 0)
0261 return;
0262
0263
0264
0265
0266
0267
0268 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
0269 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
0270
0271
0272
0273
0274
0275 if (!sip)
0276 return;
0277
0278 for (i = 0; i < MAX_SUBCORES; ++i) {
0279 vc = sip->vc[i];
0280 if (!vc)
0281 break;
0282 do {
0283 ee = vc->entry_exit_map;
0284
0285 if ((ee >> 8) != 0)
0286 break;
0287 } while (cmpxchg(&vc->entry_exit_map, ee,
0288 ee | VCORE_EXIT_REQ) != ee);
0289 if ((ee >> 8) == 0)
0290 kvmhv_interrupt_vcore(vc, ee);
0291 }
0292 }
0293
0294 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
0295 EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
0296
0297 #ifdef CONFIG_KVM_XICS
0298 static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
0299 u32 xisr)
0300 {
0301 int i;
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313 for (i = 0; i < pimap->n_mapped; i++) {
0314 if (xisr == pimap->mapped[i].r_hwirq) {
0315
0316
0317
0318
0319 smp_rmb();
0320 return &pimap->mapped[i];
0321 }
0322 }
0323 return NULL;
0324 }
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338 static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
0339 {
0340 struct kvmppc_passthru_irqmap *pimap;
0341 struct kvmppc_irq_map *irq_map;
0342 struct kvm_vcpu *vcpu;
0343
0344 vcpu = local_paca->kvm_hstate.kvm_vcpu;
0345 if (!vcpu)
0346 return 1;
0347 pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
0348 if (!pimap)
0349 return 1;
0350 irq_map = get_irqmap(pimap, xisr);
0351 if (!irq_map)
0352 return 1;
0353
0354
0355 local_paca->kvm_hstate.saved_xirr = 0;
0356
0357 return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
0358 }
0359
0360 #else
0361 static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
0362 {
0363 return 1;
0364 }
0365 #endif
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376 static long kvmppc_read_one_intr(bool *again);
0377
0378 long kvmppc_read_intr(void)
0379 {
0380 long ret = 0;
0381 long rc;
0382 bool again;
0383
0384 if (xive_enabled())
0385 return 1;
0386
0387 do {
0388 again = false;
0389 rc = kvmppc_read_one_intr(&again);
0390 if (rc && (ret == 0 || rc > ret))
0391 ret = rc;
0392 } while (again);
0393 return ret;
0394 }
0395
0396 static long kvmppc_read_one_intr(bool *again)
0397 {
0398 void __iomem *xics_phys;
0399 u32 h_xirr;
0400 __be32 xirr;
0401 u32 xisr;
0402 u8 host_ipi;
0403 int64_t rc;
0404
0405 if (xive_enabled())
0406 return 1;
0407
0408
0409 host_ipi = local_paca->kvm_hstate.host_ipi;
0410 if (host_ipi)
0411 return 1;
0412
0413
0414 xics_phys = local_paca->kvm_hstate.xics_phys;
0415 rc = 0;
0416 if (!xics_phys)
0417 rc = opal_int_get_xirr(&xirr, false);
0418 else
0419 xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
0420 if (rc < 0)
0421 return 1;
0422
0423
0424
0425
0426
0427
0428
0429 h_xirr = be32_to_cpu(xirr);
0430 local_paca->kvm_hstate.saved_xirr = h_xirr;
0431 xisr = h_xirr & 0xffffff;
0432
0433
0434
0435
0436 smp_mb();
0437
0438
0439 if (!xisr)
0440 return 0;
0441
0442
0443
0444
0445
0446 if (xisr == XICS_IPI) {
0447 rc = 0;
0448 if (xics_phys) {
0449 __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
0450 __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
0451 } else {
0452 opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
0453 rc = opal_int_eoi(h_xirr);
0454 }
0455
0456 *again = rc > 0;
0457
0458
0459
0460
0461
0462 smp_mb();
0463
0464
0465
0466
0467
0468
0469 host_ipi = local_paca->kvm_hstate.host_ipi;
0470 if (unlikely(host_ipi != 0)) {
0471
0472
0473
0474 if (xics_phys)
0475 __raw_rm_writeb(IPI_PRIORITY,
0476 xics_phys + XICS_MFRR);
0477 else
0478 opal_int_set_mfrr(hard_smp_processor_id(),
0479 IPI_PRIORITY);
0480
0481 smp_mb();
0482 return 1;
0483 }
0484
0485
0486 local_paca->kvm_hstate.saved_xirr = 0;
0487 return -1;
0488 }
0489
0490 return kvmppc_check_passthru(xisr, xirr, again);
0491 }
0492
0493 static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
0494 {
0495 vcpu->arch.ceded = 0;
0496 if (vcpu->arch.timer_running) {
0497 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
0498 vcpu->arch.timer_running = 0;
0499 }
0500 }
0501
0502 void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
0503 {
0504
0505 msr = (msr | MSR_ME) & ~MSR_HV;
0506
0507
0508
0509
0510
0511 if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
0512 msr &= ~MSR_TS_MASK;
0513 vcpu->arch.shregs.msr = msr;
0514 kvmppc_end_cede(vcpu);
0515 }
0516 EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
0517
0518 static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
0519 {
0520 unsigned long msr, pc, new_msr, new_pc;
0521
0522 msr = kvmppc_get_msr(vcpu);
0523 pc = kvmppc_get_pc(vcpu);
0524 new_msr = vcpu->arch.intr_msr;
0525 new_pc = vec;
0526
0527
0528 if (MSR_TM_TRANSACTIONAL(msr))
0529 new_msr |= MSR_TS_S;
0530 else
0531 new_msr |= msr & MSR_TS_MASK;
0532
0533
0534
0535
0536
0537
0538
0539
0540 if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
0541 vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
0542 (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
0543 (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
0544 new_msr |= MSR_IR | MSR_DR;
0545 new_pc += 0xC000000000004000ULL;
0546 }
0547
0548 kvmppc_set_srr0(vcpu, pc);
0549 kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
0550 kvmppc_set_pc(vcpu, new_pc);
0551 vcpu->arch.shregs.msr = new_msr;
0552 }
0553
0554 void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
0555 {
0556 inject_interrupt(vcpu, vec, srr1_flags);
0557 kvmppc_end_cede(vcpu);
0558 }
0559 EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
0560
0561
0562
0563
0564
0565 void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
0566 {
0567 int ext;
0568 unsigned long lpcr;
0569
0570 WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
0571
0572
0573 ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
0574 lpcr = mfspr(SPRN_LPCR);
0575 lpcr |= ext << LPCR_MER_SH;
0576 mtspr(SPRN_LPCR, lpcr);
0577 isync();
0578
0579 if (vcpu->arch.shregs.msr & MSR_EE) {
0580 if (ext) {
0581 inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
0582 } else {
0583 long int dec = mfspr(SPRN_DEC);
0584 if (!(lpcr & LPCR_LD))
0585 dec = (int) dec;
0586 if (dec < 0)
0587 inject_interrupt(vcpu,
0588 BOOK3S_INTERRUPT_DECREMENTER, 0);
0589 }
0590 }
0591
0592 if (vcpu->arch.doorbell_request) {
0593 mtspr(SPRN_DPDES, 1);
0594 vcpu->arch.vcore->dpdes = 1;
0595 smp_wmb();
0596 vcpu->arch.doorbell_request = 0;
0597 }
0598 }
0599
0600 static void flush_guest_tlb(struct kvm *kvm)
0601 {
0602 unsigned long rb, set;
0603
0604 rb = PPC_BIT(52);
0605 for (set = 0; set < kvm->arch.tlb_sets; ++set) {
0606
0607 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
0608 : : "r" (rb), "i" (0), "i" (0), "i" (0),
0609 "r" (0) : "memory");
0610 rb += PPC_BIT(51);
0611 }
0612 asm volatile("ptesync": : :"memory");
0613 }
0614
0615 void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
0616 {
0617 if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
0618 flush_guest_tlb(kvm);
0619
0620
0621 cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
0622 }
0623 }
0624 EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);