0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/highmem.h>
0013 #include <linux/kvm_host.h>
0014 #include <linux/uaccess.h>
0015 #include <asm/mmu_context.h>
0016 #include <asm/pgalloc.h>
0017
0018
0019
0020
0021
0022 #if defined(__PAGETABLE_PMD_FOLDED)
0023 #define KVM_MMU_CACHE_MIN_PAGES 1
0024 #else
0025 #define KVM_MMU_CACHE_MIN_PAGES 2
0026 #endif
0027
0028 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
0029 {
0030 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
0031 }
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043 static void kvm_pgd_init(void *page)
0044 {
0045 unsigned long *p, *end;
0046 unsigned long entry;
0047
0048 #ifdef __PAGETABLE_PMD_FOLDED
0049 entry = (unsigned long)invalid_pte_table;
0050 #else
0051 entry = (unsigned long)invalid_pmd_table;
0052 #endif
0053
0054 p = (unsigned long *)page;
0055 end = p + PTRS_PER_PGD;
0056
0057 do {
0058 p[0] = entry;
0059 p[1] = entry;
0060 p[2] = entry;
0061 p[3] = entry;
0062 p[4] = entry;
0063 p += 8;
0064 p[-3] = entry;
0065 p[-2] = entry;
0066 p[-1] = entry;
0067 } while (p != end);
0068 }
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079 pgd_t *kvm_pgd_alloc(void)
0080 {
0081 pgd_t *ret;
0082
0083 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
0084 if (ret)
0085 kvm_pgd_init(ret);
0086
0087 return ret;
0088 }
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
0105 unsigned long addr)
0106 {
0107 p4d_t *p4d;
0108 pud_t *pud;
0109 pmd_t *pmd;
0110
0111 pgd += pgd_index(addr);
0112 if (pgd_none(*pgd)) {
0113
0114 BUG();
0115 return NULL;
0116 }
0117 p4d = p4d_offset(pgd, addr);
0118 pud = pud_offset(p4d, addr);
0119 if (pud_none(*pud)) {
0120 pmd_t *new_pmd;
0121
0122 if (!cache)
0123 return NULL;
0124 new_pmd = kvm_mmu_memory_cache_alloc(cache);
0125 pmd_init((unsigned long)new_pmd,
0126 (unsigned long)invalid_pte_table);
0127 pud_populate(NULL, pud, new_pmd);
0128 }
0129 pmd = pmd_offset(pud, addr);
0130 if (pmd_none(*pmd)) {
0131 pte_t *new_pte;
0132
0133 if (!cache)
0134 return NULL;
0135 new_pte = kvm_mmu_memory_cache_alloc(cache);
0136 clear_page(new_pte);
0137 pmd_populate_kernel(NULL, pmd, new_pte);
0138 }
0139 return pte_offset_kernel(pmd, addr);
0140 }
0141
0142
0143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
0144 struct kvm_mmu_memory_cache *cache,
0145 unsigned long addr)
0146 {
0147 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
0148 }
0149
0150
0151
0152
0153
0154
0155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
0156 unsigned long end_gpa)
0157 {
0158 int i_min = pte_index(start_gpa);
0159 int i_max = pte_index(end_gpa);
0160 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
0161 int i;
0162
0163 for (i = i_min; i <= i_max; ++i) {
0164 if (!pte_present(pte[i]))
0165 continue;
0166
0167 set_pte(pte + i, __pte(0));
0168 }
0169 return safe_to_remove;
0170 }
0171
0172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
0173 unsigned long end_gpa)
0174 {
0175 pte_t *pte;
0176 unsigned long end = ~0ul;
0177 int i_min = pmd_index(start_gpa);
0178 int i_max = pmd_index(end_gpa);
0179 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
0180 int i;
0181
0182 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0183 if (!pmd_present(pmd[i]))
0184 continue;
0185
0186 pte = pte_offset_kernel(pmd + i, 0);
0187 if (i == i_max)
0188 end = end_gpa;
0189
0190 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
0191 pmd_clear(pmd + i);
0192 pte_free_kernel(NULL, pte);
0193 } else {
0194 safe_to_remove = false;
0195 }
0196 }
0197 return safe_to_remove;
0198 }
0199
0200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
0201 unsigned long end_gpa)
0202 {
0203 pmd_t *pmd;
0204 unsigned long end = ~0ul;
0205 int i_min = pud_index(start_gpa);
0206 int i_max = pud_index(end_gpa);
0207 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
0208 int i;
0209
0210 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0211 if (!pud_present(pud[i]))
0212 continue;
0213
0214 pmd = pmd_offset(pud + i, 0);
0215 if (i == i_max)
0216 end = end_gpa;
0217
0218 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
0219 pud_clear(pud + i);
0220 pmd_free(NULL, pmd);
0221 } else {
0222 safe_to_remove = false;
0223 }
0224 }
0225 return safe_to_remove;
0226 }
0227
0228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
0229 unsigned long end_gpa)
0230 {
0231 p4d_t *p4d;
0232 pud_t *pud;
0233 unsigned long end = ~0ul;
0234 int i_min = pgd_index(start_gpa);
0235 int i_max = pgd_index(end_gpa);
0236 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
0237 int i;
0238
0239 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0240 if (!pgd_present(pgd[i]))
0241 continue;
0242
0243 p4d = p4d_offset(pgd, 0);
0244 pud = pud_offset(p4d + i, 0);
0245 if (i == i_max)
0246 end = end_gpa;
0247
0248 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
0249 pgd_clear(pgd + i);
0250 pud_free(NULL, pud);
0251 } else {
0252 safe_to_remove = false;
0253 }
0254 }
0255 return safe_to_remove;
0256 }
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
0272 {
0273 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
0274 start_gfn << PAGE_SHIFT,
0275 end_gfn << PAGE_SHIFT);
0276 }
0277
0278 #define BUILD_PTE_RANGE_OP(name, op) \
0279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \
0280 unsigned long end) \
0281 { \
0282 int ret = 0; \
0283 int i_min = pte_index(start); \
0284 int i_max = pte_index(end); \
0285 int i; \
0286 pte_t old, new; \
0287 \
0288 for (i = i_min; i <= i_max; ++i) { \
0289 if (!pte_present(pte[i])) \
0290 continue; \
0291 \
0292 old = pte[i]; \
0293 new = op(old); \
0294 if (pte_val(new) == pte_val(old)) \
0295 continue; \
0296 set_pte(pte + i, new); \
0297 ret = 1; \
0298 } \
0299 return ret; \
0300 } \
0301 \
0302 \
0303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
0304 unsigned long end) \
0305 { \
0306 int ret = 0; \
0307 pte_t *pte; \
0308 unsigned long cur_end = ~0ul; \
0309 int i_min = pmd_index(start); \
0310 int i_max = pmd_index(end); \
0311 int i; \
0312 \
0313 for (i = i_min; i <= i_max; ++i, start = 0) { \
0314 if (!pmd_present(pmd[i])) \
0315 continue; \
0316 \
0317 pte = pte_offset_kernel(pmd + i, 0); \
0318 if (i == i_max) \
0319 cur_end = end; \
0320 \
0321 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \
0322 } \
0323 return ret; \
0324 } \
0325 \
0326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
0327 unsigned long end) \
0328 { \
0329 int ret = 0; \
0330 pmd_t *pmd; \
0331 unsigned long cur_end = ~0ul; \
0332 int i_min = pud_index(start); \
0333 int i_max = pud_index(end); \
0334 int i; \
0335 \
0336 for (i = i_min; i <= i_max; ++i, start = 0) { \
0337 if (!pud_present(pud[i])) \
0338 continue; \
0339 \
0340 pmd = pmd_offset(pud + i, 0); \
0341 if (i == i_max) \
0342 cur_end = end; \
0343 \
0344 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \
0345 } \
0346 return ret; \
0347 } \
0348 \
0349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
0350 unsigned long end) \
0351 { \
0352 int ret = 0; \
0353 p4d_t *p4d; \
0354 pud_t *pud; \
0355 unsigned long cur_end = ~0ul; \
0356 int i_min = pgd_index(start); \
0357 int i_max = pgd_index(end); \
0358 int i; \
0359 \
0360 for (i = i_min; i <= i_max; ++i, start = 0) { \
0361 if (!pgd_present(pgd[i])) \
0362 continue; \
0363 \
0364 p4d = p4d_offset(pgd, 0); \
0365 pud = pud_offset(p4d + i, 0); \
0366 if (i == i_max) \
0367 cur_end = end; \
0368 \
0369 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \
0370 } \
0371 return ret; \
0372 }
0373
0374
0375
0376
0377
0378
0379
0380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
0398 {
0399 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
0400 start_gfn << PAGE_SHIFT,
0401 end_gfn << PAGE_SHIFT);
0402 }
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
0416 struct kvm_memory_slot *slot,
0417 gfn_t gfn_offset, unsigned long mask)
0418 {
0419 gfn_t base_gfn = slot->base_gfn + gfn_offset;
0420 gfn_t start = base_gfn + __ffs(mask);
0421 gfn_t end = base_gfn + __fls(mask);
0422
0423 kvm_mips_mkclean_gpa_pt(kvm, start, end);
0424 }
0425
0426
0427
0428
0429
0430
0431
0432 BUILD_PTE_RANGE_OP(mkold, pte_mkold)
0433
0434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
0435 gfn_t end_gfn)
0436 {
0437 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
0438 start_gfn << PAGE_SHIFT,
0439 end_gfn << PAGE_SHIFT);
0440 }
0441
0442 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
0443 {
0444 kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
0445 return true;
0446 }
0447
0448 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0449 {
0450 gpa_t gpa = range->start << PAGE_SHIFT;
0451 pte_t hva_pte = range->pte;
0452 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0453 pte_t old_pte;
0454
0455 if (!gpa_pte)
0456 return false;
0457
0458
0459 old_pte = *gpa_pte;
0460 if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
0461 hva_pte = pte_mkclean(hva_pte);
0462 else if (range->slot->flags & KVM_MEM_READONLY)
0463 hva_pte = pte_wrprotect(hva_pte);
0464
0465 set_pte(gpa_pte, hva_pte);
0466
0467
0468 if (!pte_present(old_pte) || !pte_young(old_pte))
0469 return false;
0470
0471
0472 return !pte_present(hva_pte) ||
0473 !pte_young(hva_pte) ||
0474 pte_pfn(old_pte) != pte_pfn(hva_pte) ||
0475 (pte_dirty(old_pte) && !pte_dirty(hva_pte));
0476 }
0477
0478 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0479 {
0480 return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
0481 }
0482
0483 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0484 {
0485 gpa_t gpa = range->start << PAGE_SHIFT;
0486 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0487
0488 if (!gpa_pte)
0489 return false;
0490 return pte_young(*gpa_pte);
0491 }
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
0512 bool write_fault,
0513 pte_t *out_entry, pte_t *out_buddy)
0514 {
0515 struct kvm *kvm = vcpu->kvm;
0516 gfn_t gfn = gpa >> PAGE_SHIFT;
0517 pte_t *ptep;
0518 kvm_pfn_t pfn = 0;
0519 bool pfn_valid = false;
0520 int ret = 0;
0521
0522 spin_lock(&kvm->mmu_lock);
0523
0524
0525 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0526 if (!ptep || !pte_present(*ptep)) {
0527 ret = -EFAULT;
0528 goto out;
0529 }
0530
0531
0532 if (!pte_young(*ptep)) {
0533 set_pte(ptep, pte_mkyoung(*ptep));
0534 pfn = pte_pfn(*ptep);
0535 pfn_valid = true;
0536
0537 }
0538 if (write_fault && !pte_dirty(*ptep)) {
0539 if (!pte_write(*ptep)) {
0540 ret = -EFAULT;
0541 goto out;
0542 }
0543
0544
0545 set_pte(ptep, pte_mkdirty(*ptep));
0546 pfn = pte_pfn(*ptep);
0547 mark_page_dirty(kvm, gfn);
0548 kvm_set_pfn_dirty(pfn);
0549 }
0550
0551 if (out_entry)
0552 *out_entry = *ptep;
0553 if (out_buddy)
0554 *out_buddy = *ptep_buddy(ptep);
0555
0556 out:
0557 spin_unlock(&kvm->mmu_lock);
0558 if (pfn_valid)
0559 kvm_set_pfn_accessed(pfn);
0560 return ret;
0561 }
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
0588 bool write_fault,
0589 pte_t *out_entry, pte_t *out_buddy)
0590 {
0591 struct kvm *kvm = vcpu->kvm;
0592 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
0593 gfn_t gfn = gpa >> PAGE_SHIFT;
0594 int srcu_idx, err;
0595 kvm_pfn_t pfn;
0596 pte_t *ptep, entry, old_pte;
0597 bool writeable;
0598 unsigned long prot_bits;
0599 unsigned long mmu_seq;
0600
0601
0602 srcu_idx = srcu_read_lock(&kvm->srcu);
0603 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
0604 out_buddy);
0605 if (!err)
0606 goto out;
0607
0608
0609 err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
0610 if (err)
0611 goto out;
0612
0613 retry:
0614
0615
0616
0617
0618 mmu_seq = kvm->mmu_invalidate_seq;
0619
0620
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630 smp_rmb();
0631
0632
0633 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
0634 if (is_error_noslot_pfn(pfn)) {
0635 err = -EFAULT;
0636 goto out;
0637 }
0638
0639 spin_lock(&kvm->mmu_lock);
0640
0641 if (mmu_invalidate_retry(kvm, mmu_seq)) {
0642
0643
0644
0645
0646
0647 spin_unlock(&kvm->mmu_lock);
0648 kvm_release_pfn_clean(pfn);
0649 goto retry;
0650 }
0651
0652
0653 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
0654
0655
0656 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
0657 if (writeable) {
0658 prot_bits |= _PAGE_WRITE;
0659 if (write_fault) {
0660 prot_bits |= __WRITEABLE;
0661 mark_page_dirty(kvm, gfn);
0662 kvm_set_pfn_dirty(pfn);
0663 }
0664 }
0665 entry = pfn_pte(pfn, __pgprot(prot_bits));
0666
0667
0668 old_pte = *ptep;
0669 set_pte(ptep, entry);
0670
0671 err = 0;
0672 if (out_entry)
0673 *out_entry = *ptep;
0674 if (out_buddy)
0675 *out_buddy = *ptep_buddy(ptep);
0676
0677 spin_unlock(&kvm->mmu_lock);
0678 kvm_release_pfn_clean(pfn);
0679 kvm_set_pfn_accessed(pfn);
0680 out:
0681 srcu_read_unlock(&kvm->srcu, srcu_idx);
0682 return err;
0683 }
0684
0685 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
0686 struct kvm_vcpu *vcpu,
0687 bool write_fault)
0688 {
0689 int ret;
0690
0691 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
0692 if (ret)
0693 return ret;
0694
0695
0696 return kvm_vz_host_tlb_inv(vcpu, badvaddr);
0697 }
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
0711 {
0712 if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
0713 hrtimer_restart(&vcpu->arch.comparecount_timer);
0714 }
0715
0716
0717 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
0718 {
0719 unsigned long flags;
0720
0721 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
0722
0723 local_irq_save(flags);
0724
0725 vcpu->cpu = cpu;
0726 if (vcpu->arch.last_sched_cpu != cpu) {
0727 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
0728 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
0729
0730
0731
0732
0733
0734 kvm_mips_migrate_count(vcpu);
0735 }
0736
0737
0738 kvm_mips_callbacks->vcpu_load(vcpu, cpu);
0739
0740 local_irq_restore(flags);
0741 }
0742
0743
0744 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
0745 {
0746 unsigned long flags;
0747 int cpu;
0748
0749 local_irq_save(flags);
0750
0751 cpu = smp_processor_id();
0752 vcpu->arch.last_sched_cpu = cpu;
0753 vcpu->cpu = -1;
0754
0755
0756 kvm_mips_callbacks->vcpu_put(vcpu, cpu);
0757
0758 local_irq_restore(flags);
0759 }