Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * This file is subject to the terms and conditions of the GNU General Public
0003  * License.  See the file "COPYING" in the main directory of this archive
0004  * for more details.
0005  *
0006  * KVM/MIPS MMU handling in the KVM module.
0007  *
0008  * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
0009  * Authors: Sanjay Lal <sanjayl@kymasys.com>
0010  */
0011 
0012 #include <linux/highmem.h>
0013 #include <linux/kvm_host.h>
0014 #include <linux/uaccess.h>
0015 #include <asm/mmu_context.h>
0016 #include <asm/pgalloc.h>
0017 
0018 /*
0019  * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
0020  * for which pages need to be cached.
0021  */
0022 #if defined(__PAGETABLE_PMD_FOLDED)
0023 #define KVM_MMU_CACHE_MIN_PAGES 1
0024 #else
0025 #define KVM_MMU_CACHE_MIN_PAGES 2
0026 #endif
0027 
0028 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
0029 {
0030     kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
0031 }
0032 
0033 /**
0034  * kvm_pgd_init() - Initialise KVM GPA page directory.
0035  * @page:   Pointer to page directory (PGD) for KVM GPA.
0036  *
0037  * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
0038  * representing no mappings. This is similar to pgd_init(), however it
0039  * initialises all the page directory pointers, not just the ones corresponding
0040  * to the userland address space (since it is for the guest physical address
0041  * space rather than a virtual address space).
0042  */
0043 static void kvm_pgd_init(void *page)
0044 {
0045     unsigned long *p, *end;
0046     unsigned long entry;
0047 
0048 #ifdef __PAGETABLE_PMD_FOLDED
0049     entry = (unsigned long)invalid_pte_table;
0050 #else
0051     entry = (unsigned long)invalid_pmd_table;
0052 #endif
0053 
0054     p = (unsigned long *)page;
0055     end = p + PTRS_PER_PGD;
0056 
0057     do {
0058         p[0] = entry;
0059         p[1] = entry;
0060         p[2] = entry;
0061         p[3] = entry;
0062         p[4] = entry;
0063         p += 8;
0064         p[-3] = entry;
0065         p[-2] = entry;
0066         p[-1] = entry;
0067     } while (p != end);
0068 }
0069 
0070 /**
0071  * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
0072  *
0073  * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
0074  * to host physical page mappings.
0075  *
0076  * Returns: Pointer to new KVM GPA page directory.
0077  *      NULL on allocation failure.
0078  */
0079 pgd_t *kvm_pgd_alloc(void)
0080 {
0081     pgd_t *ret;
0082 
0083     ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
0084     if (ret)
0085         kvm_pgd_init(ret);
0086 
0087     return ret;
0088 }
0089 
0090 /**
0091  * kvm_mips_walk_pgd() - Walk page table with optional allocation.
0092  * @pgd:    Page directory pointer.
0093  * @addr:   Address to index page table using.
0094  * @cache:  MMU page cache to allocate new page tables from, or NULL.
0095  *
0096  * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
0097  * address @addr. If page tables don't exist for @addr, they will be created
0098  * from the MMU cache if @cache is not NULL.
0099  *
0100  * Returns: Pointer to pte_t corresponding to @addr.
0101  *      NULL if a page table doesn't exist for @addr and !@cache.
0102  *      NULL if a page table allocation failed.
0103  */
0104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
0105                 unsigned long addr)
0106 {
0107     p4d_t *p4d;
0108     pud_t *pud;
0109     pmd_t *pmd;
0110 
0111     pgd += pgd_index(addr);
0112     if (pgd_none(*pgd)) {
0113         /* Not used on MIPS yet */
0114         BUG();
0115         return NULL;
0116     }
0117     p4d = p4d_offset(pgd, addr);
0118     pud = pud_offset(p4d, addr);
0119     if (pud_none(*pud)) {
0120         pmd_t *new_pmd;
0121 
0122         if (!cache)
0123             return NULL;
0124         new_pmd = kvm_mmu_memory_cache_alloc(cache);
0125         pmd_init((unsigned long)new_pmd,
0126              (unsigned long)invalid_pte_table);
0127         pud_populate(NULL, pud, new_pmd);
0128     }
0129     pmd = pmd_offset(pud, addr);
0130     if (pmd_none(*pmd)) {
0131         pte_t *new_pte;
0132 
0133         if (!cache)
0134             return NULL;
0135         new_pte = kvm_mmu_memory_cache_alloc(cache);
0136         clear_page(new_pte);
0137         pmd_populate_kernel(NULL, pmd, new_pte);
0138     }
0139     return pte_offset_kernel(pmd, addr);
0140 }
0141 
0142 /* Caller must hold kvm->mm_lock */
0143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
0144                    struct kvm_mmu_memory_cache *cache,
0145                    unsigned long addr)
0146 {
0147     return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
0148 }
0149 
0150 /*
0151  * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
0152  * Flush a range of guest physical address space from the VM's GPA page tables.
0153  */
0154 
0155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
0156                    unsigned long end_gpa)
0157 {
0158     int i_min = pte_index(start_gpa);
0159     int i_max = pte_index(end_gpa);
0160     bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
0161     int i;
0162 
0163     for (i = i_min; i <= i_max; ++i) {
0164         if (!pte_present(pte[i]))
0165             continue;
0166 
0167         set_pte(pte + i, __pte(0));
0168     }
0169     return safe_to_remove;
0170 }
0171 
0172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
0173                    unsigned long end_gpa)
0174 {
0175     pte_t *pte;
0176     unsigned long end = ~0ul;
0177     int i_min = pmd_index(start_gpa);
0178     int i_max = pmd_index(end_gpa);
0179     bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
0180     int i;
0181 
0182     for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0183         if (!pmd_present(pmd[i]))
0184             continue;
0185 
0186         pte = pte_offset_kernel(pmd + i, 0);
0187         if (i == i_max)
0188             end = end_gpa;
0189 
0190         if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
0191             pmd_clear(pmd + i);
0192             pte_free_kernel(NULL, pte);
0193         } else {
0194             safe_to_remove = false;
0195         }
0196     }
0197     return safe_to_remove;
0198 }
0199 
0200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
0201                    unsigned long end_gpa)
0202 {
0203     pmd_t *pmd;
0204     unsigned long end = ~0ul;
0205     int i_min = pud_index(start_gpa);
0206     int i_max = pud_index(end_gpa);
0207     bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
0208     int i;
0209 
0210     for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0211         if (!pud_present(pud[i]))
0212             continue;
0213 
0214         pmd = pmd_offset(pud + i, 0);
0215         if (i == i_max)
0216             end = end_gpa;
0217 
0218         if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
0219             pud_clear(pud + i);
0220             pmd_free(NULL, pmd);
0221         } else {
0222             safe_to_remove = false;
0223         }
0224     }
0225     return safe_to_remove;
0226 }
0227 
0228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
0229                    unsigned long end_gpa)
0230 {
0231     p4d_t *p4d;
0232     pud_t *pud;
0233     unsigned long end = ~0ul;
0234     int i_min = pgd_index(start_gpa);
0235     int i_max = pgd_index(end_gpa);
0236     bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
0237     int i;
0238 
0239     for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
0240         if (!pgd_present(pgd[i]))
0241             continue;
0242 
0243         p4d = p4d_offset(pgd, 0);
0244         pud = pud_offset(p4d + i, 0);
0245         if (i == i_max)
0246             end = end_gpa;
0247 
0248         if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
0249             pgd_clear(pgd + i);
0250             pud_free(NULL, pud);
0251         } else {
0252             safe_to_remove = false;
0253         }
0254     }
0255     return safe_to_remove;
0256 }
0257 
0258 /**
0259  * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
0260  * @kvm:    KVM pointer.
0261  * @start_gfn:  Guest frame number of first page in GPA range to flush.
0262  * @end_gfn:    Guest frame number of last page in GPA range to flush.
0263  *
0264  * Flushes a range of GPA mappings from the GPA page tables.
0265  *
0266  * The caller must hold the @kvm->mmu_lock spinlock.
0267  *
0268  * Returns: Whether its safe to remove the top level page directory because
0269  *      all lower levels have been removed.
0270  */
0271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
0272 {
0273     return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
0274                       start_gfn << PAGE_SHIFT,
0275                       end_gfn << PAGE_SHIFT);
0276 }
0277 
0278 #define BUILD_PTE_RANGE_OP(name, op)                    \
0279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start,   \
0280                  unsigned long end)         \
0281 {                                   \
0282     int ret = 0;                            \
0283     int i_min = pte_index(start);               \
0284     int i_max = pte_index(end);                 \
0285     int i;                              \
0286     pte_t old, new;                         \
0287                                     \
0288     for (i = i_min; i <= i_max; ++i) {              \
0289         if (!pte_present(pte[i]))               \
0290             continue;                   \
0291                                     \
0292         old = pte[i];                       \
0293         new = op(old);                      \
0294         if (pte_val(new) == pte_val(old))           \
0295             continue;                   \
0296         set_pte(pte + i, new);                  \
0297         ret = 1;                        \
0298     }                               \
0299     return ret;                         \
0300 }                                   \
0301                                     \
0302 /* returns true if anything was done */                 \
0303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,   \
0304                  unsigned long end)         \
0305 {                                   \
0306     int ret = 0;                            \
0307     pte_t *pte;                         \
0308     unsigned long cur_end = ~0ul;                   \
0309     int i_min = pmd_index(start);               \
0310     int i_max = pmd_index(end);                 \
0311     int i;                              \
0312                                     \
0313     for (i = i_min; i <= i_max; ++i, start = 0) {           \
0314         if (!pmd_present(pmd[i]))               \
0315             continue;                   \
0316                                     \
0317         pte = pte_offset_kernel(pmd + i, 0);                \
0318         if (i == i_max)                     \
0319             cur_end = end;                  \
0320                                     \
0321         ret |= kvm_mips_##name##_pte(pte, start, cur_end);  \
0322     }                               \
0323     return ret;                         \
0324 }                                   \
0325                                     \
0326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,   \
0327                  unsigned long end)         \
0328 {                                   \
0329     int ret = 0;                            \
0330     pmd_t *pmd;                         \
0331     unsigned long cur_end = ~0ul;                   \
0332     int i_min = pud_index(start);               \
0333     int i_max = pud_index(end);                 \
0334     int i;                              \
0335                                     \
0336     for (i = i_min; i <= i_max; ++i, start = 0) {           \
0337         if (!pud_present(pud[i]))               \
0338             continue;                   \
0339                                     \
0340         pmd = pmd_offset(pud + i, 0);               \
0341         if (i == i_max)                     \
0342             cur_end = end;                  \
0343                                     \
0344         ret |= kvm_mips_##name##_pmd(pmd, start, cur_end);  \
0345     }                               \
0346     return ret;                         \
0347 }                                   \
0348                                     \
0349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,   \
0350                  unsigned long end)         \
0351 {                                   \
0352     int ret = 0;                            \
0353     p4d_t *p4d;                         \
0354     pud_t *pud;                         \
0355     unsigned long cur_end = ~0ul;                   \
0356     int i_min = pgd_index(start);                   \
0357     int i_max = pgd_index(end);                 \
0358     int i;                              \
0359                                     \
0360     for (i = i_min; i <= i_max; ++i, start = 0) {           \
0361         if (!pgd_present(pgd[i]))               \
0362             continue;                   \
0363                                     \
0364         p4d = p4d_offset(pgd, 0);               \
0365         pud = pud_offset(p4d + i, 0);               \
0366         if (i == i_max)                     \
0367             cur_end = end;                  \
0368                                     \
0369         ret |= kvm_mips_##name##_pud(pud, start, cur_end);  \
0370     }                               \
0371     return ret;                         \
0372 }
0373 
0374 /*
0375  * kvm_mips_mkclean_gpa_pt.
0376  * Mark a range of guest physical address space clean (writes fault) in the VM's
0377  * GPA page table to allow dirty page tracking.
0378  */
0379 
0380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
0381 
0382 /**
0383  * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
0384  * @kvm:    KVM pointer.
0385  * @start_gfn:  Guest frame number of first page in GPA range to flush.
0386  * @end_gfn:    Guest frame number of last page in GPA range to flush.
0387  *
0388  * Make a range of GPA mappings clean so that guest writes will fault and
0389  * trigger dirty page logging.
0390  *
0391  * The caller must hold the @kvm->mmu_lock spinlock.
0392  *
0393  * Returns: Whether any GPA mappings were modified, which would require
0394  *      derived mappings (GVA page tables & TLB enties) to be
0395  *      invalidated.
0396  */
0397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
0398 {
0399     return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
0400                     start_gfn << PAGE_SHIFT,
0401                     end_gfn << PAGE_SHIFT);
0402 }
0403 
0404 /**
0405  * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
0406  * @kvm:    The KVM pointer
0407  * @slot:   The memory slot associated with mask
0408  * @gfn_offset: The gfn offset in memory slot
0409  * @mask:   The mask of dirty pages at offset 'gfn_offset' in this memory
0410  *      slot to be write protected
0411  *
0412  * Walks bits set in mask write protects the associated pte's. Caller must
0413  * acquire @kvm->mmu_lock.
0414  */
0415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
0416         struct kvm_memory_slot *slot,
0417         gfn_t gfn_offset, unsigned long mask)
0418 {
0419     gfn_t base_gfn = slot->base_gfn + gfn_offset;
0420     gfn_t start = base_gfn +  __ffs(mask);
0421     gfn_t end = base_gfn + __fls(mask);
0422 
0423     kvm_mips_mkclean_gpa_pt(kvm, start, end);
0424 }
0425 
0426 /*
0427  * kvm_mips_mkold_gpa_pt.
0428  * Mark a range of guest physical address space old (all accesses fault) in the
0429  * VM's GPA page table to allow detection of commonly used pages.
0430  */
0431 
0432 BUILD_PTE_RANGE_OP(mkold, pte_mkold)
0433 
0434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
0435                  gfn_t end_gfn)
0436 {
0437     return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
0438                   start_gfn << PAGE_SHIFT,
0439                   end_gfn << PAGE_SHIFT);
0440 }
0441 
0442 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
0443 {
0444     kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
0445     return true;
0446 }
0447 
0448 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0449 {
0450     gpa_t gpa = range->start << PAGE_SHIFT;
0451     pte_t hva_pte = range->pte;
0452     pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0453     pte_t old_pte;
0454 
0455     if (!gpa_pte)
0456         return false;
0457 
0458     /* Mapping may need adjusting depending on memslot flags */
0459     old_pte = *gpa_pte;
0460     if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
0461         hva_pte = pte_mkclean(hva_pte);
0462     else if (range->slot->flags & KVM_MEM_READONLY)
0463         hva_pte = pte_wrprotect(hva_pte);
0464 
0465     set_pte(gpa_pte, hva_pte);
0466 
0467     /* Replacing an absent or old page doesn't need flushes */
0468     if (!pte_present(old_pte) || !pte_young(old_pte))
0469         return false;
0470 
0471     /* Pages swapped, aged, moved, or cleaned require flushes */
0472     return !pte_present(hva_pte) ||
0473            !pte_young(hva_pte) ||
0474            pte_pfn(old_pte) != pte_pfn(hva_pte) ||
0475            (pte_dirty(old_pte) && !pte_dirty(hva_pte));
0476 }
0477 
0478 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0479 {
0480     return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
0481 }
0482 
0483 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
0484 {
0485     gpa_t gpa = range->start << PAGE_SHIFT;
0486     pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0487 
0488     if (!gpa_pte)
0489         return false;
0490     return pte_young(*gpa_pte);
0491 }
0492 
0493 /**
0494  * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
0495  * @vcpu:       VCPU pointer.
0496  * @gpa:        Guest physical address of fault.
0497  * @write_fault:    Whether the fault was due to a write.
0498  * @out_entry:      New PTE for @gpa (written on success unless NULL).
0499  * @out_buddy:      New PTE for @gpa's buddy (written on success unless
0500  *          NULL).
0501  *
0502  * Perform fast path GPA fault handling, doing all that can be done without
0503  * calling into KVM. This handles marking old pages young (for idle page
0504  * tracking), and dirtying of clean pages (for dirty page logging).
0505  *
0506  * Returns: 0 on success, in which case we can update derived mappings and
0507  *      resume guest execution.
0508  *      -EFAULT on failure due to absent GPA mapping or write to
0509  *      read-only page, in which case KVM must be consulted.
0510  */
0511 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
0512                    bool write_fault,
0513                    pte_t *out_entry, pte_t *out_buddy)
0514 {
0515     struct kvm *kvm = vcpu->kvm;
0516     gfn_t gfn = gpa >> PAGE_SHIFT;
0517     pte_t *ptep;
0518     kvm_pfn_t pfn = 0;  /* silence bogus GCC warning */
0519     bool pfn_valid = false;
0520     int ret = 0;
0521 
0522     spin_lock(&kvm->mmu_lock);
0523 
0524     /* Fast path - just check GPA page table for an existing entry */
0525     ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
0526     if (!ptep || !pte_present(*ptep)) {
0527         ret = -EFAULT;
0528         goto out;
0529     }
0530 
0531     /* Track access to pages marked old */
0532     if (!pte_young(*ptep)) {
0533         set_pte(ptep, pte_mkyoung(*ptep));
0534         pfn = pte_pfn(*ptep);
0535         pfn_valid = true;
0536         /* call kvm_set_pfn_accessed() after unlock */
0537     }
0538     if (write_fault && !pte_dirty(*ptep)) {
0539         if (!pte_write(*ptep)) {
0540             ret = -EFAULT;
0541             goto out;
0542         }
0543 
0544         /* Track dirtying of writeable pages */
0545         set_pte(ptep, pte_mkdirty(*ptep));
0546         pfn = pte_pfn(*ptep);
0547         mark_page_dirty(kvm, gfn);
0548         kvm_set_pfn_dirty(pfn);
0549     }
0550 
0551     if (out_entry)
0552         *out_entry = *ptep;
0553     if (out_buddy)
0554         *out_buddy = *ptep_buddy(ptep);
0555 
0556 out:
0557     spin_unlock(&kvm->mmu_lock);
0558     if (pfn_valid)
0559         kvm_set_pfn_accessed(pfn);
0560     return ret;
0561 }
0562 
0563 /**
0564  * kvm_mips_map_page() - Map a guest physical page.
0565  * @vcpu:       VCPU pointer.
0566  * @gpa:        Guest physical address of fault.
0567  * @write_fault:    Whether the fault was due to a write.
0568  * @out_entry:      New PTE for @gpa (written on success unless NULL).
0569  * @out_buddy:      New PTE for @gpa's buddy (written on success unless
0570  *          NULL).
0571  *
0572  * Handle GPA faults by creating a new GPA mapping (or updating an existing
0573  * one).
0574  *
0575  * This takes care of marking pages young or dirty (idle/dirty page tracking),
0576  * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
0577  * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
0578  * caller.
0579  *
0580  * Returns: 0 on success, in which case the caller may use the @out_entry
0581  *      and @out_buddy PTEs to update derived mappings and resume guest
0582  *      execution.
0583  *      -EFAULT if there is no memory region at @gpa or a write was
0584  *      attempted to a read-only memory region. This is usually handled
0585  *      as an MMIO access.
0586  */
0587 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
0588                  bool write_fault,
0589                  pte_t *out_entry, pte_t *out_buddy)
0590 {
0591     struct kvm *kvm = vcpu->kvm;
0592     struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
0593     gfn_t gfn = gpa >> PAGE_SHIFT;
0594     int srcu_idx, err;
0595     kvm_pfn_t pfn;
0596     pte_t *ptep, entry, old_pte;
0597     bool writeable;
0598     unsigned long prot_bits;
0599     unsigned long mmu_seq;
0600 
0601     /* Try the fast path to handle old / clean pages */
0602     srcu_idx = srcu_read_lock(&kvm->srcu);
0603     err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
0604                       out_buddy);
0605     if (!err)
0606         goto out;
0607 
0608     /* We need a minimum of cached pages ready for page table creation */
0609     err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
0610     if (err)
0611         goto out;
0612 
0613 retry:
0614     /*
0615      * Used to check for invalidations in progress, of the pfn that is
0616      * returned by pfn_to_pfn_prot below.
0617      */
0618     mmu_seq = kvm->mmu_invalidate_seq;
0619     /*
0620      * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads
0621      * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
0622      * risk the page we get a reference to getting unmapped before we have a
0623      * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
0624      *
0625      * This smp_rmb() pairs with the effective smp_wmb() of the combination
0626      * of the pte_unmap_unlock() after the PTE is zapped, and the
0627      * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
0628      * mmu_invalidate_seq is incremented.
0629      */
0630     smp_rmb();
0631 
0632     /* Slow path - ask KVM core whether we can access this GPA */
0633     pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
0634     if (is_error_noslot_pfn(pfn)) {
0635         err = -EFAULT;
0636         goto out;
0637     }
0638 
0639     spin_lock(&kvm->mmu_lock);
0640     /* Check if an invalidation has taken place since we got pfn */
0641     if (mmu_invalidate_retry(kvm, mmu_seq)) {
0642         /*
0643          * This can happen when mappings are changed asynchronously, but
0644          * also synchronously if a COW is triggered by
0645          * gfn_to_pfn_prot().
0646          */
0647         spin_unlock(&kvm->mmu_lock);
0648         kvm_release_pfn_clean(pfn);
0649         goto retry;
0650     }
0651 
0652     /* Ensure page tables are allocated */
0653     ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
0654 
0655     /* Set up the PTE */
0656     prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
0657     if (writeable) {
0658         prot_bits |= _PAGE_WRITE;
0659         if (write_fault) {
0660             prot_bits |= __WRITEABLE;
0661             mark_page_dirty(kvm, gfn);
0662             kvm_set_pfn_dirty(pfn);
0663         }
0664     }
0665     entry = pfn_pte(pfn, __pgprot(prot_bits));
0666 
0667     /* Write the PTE */
0668     old_pte = *ptep;
0669     set_pte(ptep, entry);
0670 
0671     err = 0;
0672     if (out_entry)
0673         *out_entry = *ptep;
0674     if (out_buddy)
0675         *out_buddy = *ptep_buddy(ptep);
0676 
0677     spin_unlock(&kvm->mmu_lock);
0678     kvm_release_pfn_clean(pfn);
0679     kvm_set_pfn_accessed(pfn);
0680 out:
0681     srcu_read_unlock(&kvm->srcu, srcu_idx);
0682     return err;
0683 }
0684 
0685 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
0686                       struct kvm_vcpu *vcpu,
0687                       bool write_fault)
0688 {
0689     int ret;
0690 
0691     ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
0692     if (ret)
0693         return ret;
0694 
0695     /* Invalidate this entry in the TLB */
0696     return kvm_vz_host_tlb_inv(vcpu, badvaddr);
0697 }
0698 
0699 /**
0700  * kvm_mips_migrate_count() - Migrate timer.
0701  * @vcpu:   Virtual CPU.
0702  *
0703  * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
0704  * if it was running prior to being cancelled.
0705  *
0706  * Must be called when the VCPU is migrated to a different CPU to ensure that
0707  * timer expiry during guest execution interrupts the guest and causes the
0708  * interrupt to be delivered in a timely manner.
0709  */
0710 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
0711 {
0712     if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
0713         hrtimer_restart(&vcpu->arch.comparecount_timer);
0714 }
0715 
0716 /* Restore ASID once we are scheduled back after preemption */
0717 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
0718 {
0719     unsigned long flags;
0720 
0721     kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
0722 
0723     local_irq_save(flags);
0724 
0725     vcpu->cpu = cpu;
0726     if (vcpu->arch.last_sched_cpu != cpu) {
0727         kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
0728               vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
0729         /*
0730          * Migrate the timer interrupt to the current CPU so that it
0731          * always interrupts the guest and synchronously triggers a
0732          * guest timer interrupt.
0733          */
0734         kvm_mips_migrate_count(vcpu);
0735     }
0736 
0737     /* restore guest state to registers */
0738     kvm_mips_callbacks->vcpu_load(vcpu, cpu);
0739 
0740     local_irq_restore(flags);
0741 }
0742 
0743 /* ASID can change if another task is scheduled during preemption */
0744 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
0745 {
0746     unsigned long flags;
0747     int cpu;
0748 
0749     local_irq_save(flags);
0750 
0751     cpu = smp_processor_id();
0752     vcpu->arch.last_sched_cpu = cpu;
0753     vcpu->cpu = -1;
0754 
0755     /* save guest state in registers */
0756     kvm_mips_callbacks->vcpu_put(vcpu, cpu);
0757 
0758     local_irq_restore(flags);
0759 }