Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *
0004  * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
0005  */
0006 
0007 #include <linux/types.h>
0008 #include <linux/string.h>
0009 #include <linux/kvm.h>
0010 #include <linux/kvm_host.h>
0011 #include <linux/hugetlb.h>
0012 #include <linux/module.h>
0013 #include <linux/log2.h>
0014 #include <linux/sizes.h>
0015 
0016 #include <asm/trace.h>
0017 #include <asm/kvm_ppc.h>
0018 #include <asm/kvm_book3s.h>
0019 #include <asm/book3s/64/mmu-hash.h>
0020 #include <asm/hvcall.h>
0021 #include <asm/synch.h>
0022 #include <asm/ppc-opcode.h>
0023 #include <asm/pte-walk.h>
0024 
0025 /* Translate address of a vmalloc'd thing to a linear map address */
0026 static void *real_vmalloc_addr(void *addr)
0027 {
0028     return __va(ppc_find_vmap_phys((unsigned long)addr));
0029 }
0030 
0031 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
0032 static int global_invalidates(struct kvm *kvm)
0033 {
0034     int global;
0035     int cpu;
0036 
0037     /*
0038      * If there is only one vcore, and it's currently running,
0039      * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
0040      * we can use tlbiel as long as we mark all other physical
0041      * cores as potentially having stale TLB entries for this lpid.
0042      * Otherwise, don't use tlbiel.
0043      */
0044     if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
0045         global = 0;
0046     else
0047         global = 1;
0048 
0049     /* LPID has been switched to host if in virt mode so can't do local */
0050     if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
0051         global = 1;
0052 
0053     if (!global) {
0054         /* any other core might now have stale TLB entries... */
0055         smp_wmb();
0056         cpumask_setall(&kvm->arch.need_tlb_flush);
0057         cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
0058         cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
0059     }
0060 
0061     return global;
0062 }
0063 
0064 /*
0065  * Add this HPTE into the chain for the real page.
0066  * Must be called with the chain locked; it unlocks the chain.
0067  */
0068 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
0069                  unsigned long *rmap, long pte_index, int realmode)
0070 {
0071     struct revmap_entry *head, *tail;
0072     unsigned long i;
0073 
0074     if (*rmap & KVMPPC_RMAP_PRESENT) {
0075         i = *rmap & KVMPPC_RMAP_INDEX;
0076         head = &kvm->arch.hpt.rev[i];
0077         if (realmode)
0078             head = real_vmalloc_addr(head);
0079         tail = &kvm->arch.hpt.rev[head->back];
0080         if (realmode)
0081             tail = real_vmalloc_addr(tail);
0082         rev->forw = i;
0083         rev->back = head->back;
0084         tail->forw = pte_index;
0085         head->back = pte_index;
0086     } else {
0087         rev->forw = rev->back = pte_index;
0088         *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
0089             pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
0090     }
0091     unlock_rmap(rmap);
0092 }
0093 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
0094 
0095 /* Update the dirty bitmap of a memslot */
0096 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
0097                  unsigned long gfn, unsigned long psize)
0098 {
0099     unsigned long npages;
0100 
0101     if (!psize || !memslot->dirty_bitmap)
0102         return;
0103     npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
0104     gfn -= memslot->base_gfn;
0105     set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
0106 }
0107 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
0108 
0109 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
0110                 unsigned long hpte_v, unsigned long hpte_gr)
0111 {
0112     struct kvm_memory_slot *memslot;
0113     unsigned long gfn;
0114     unsigned long psize;
0115 
0116     psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
0117     gfn = hpte_rpn(hpte_gr, psize);
0118     memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
0119     if (memslot && memslot->dirty_bitmap)
0120         kvmppc_update_dirty_map(memslot, gfn, psize);
0121 }
0122 
0123 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */
0124 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
0125                       unsigned long hpte_gr,
0126                       struct kvm_memory_slot **memslotp,
0127                       unsigned long *gfnp)
0128 {
0129     struct kvm_memory_slot *memslot;
0130     unsigned long *rmap;
0131     unsigned long gfn;
0132 
0133     gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
0134     memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
0135     if (memslotp)
0136         *memslotp = memslot;
0137     if (gfnp)
0138         *gfnp = gfn;
0139     if (!memslot)
0140         return NULL;
0141 
0142     rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
0143     return rmap;
0144 }
0145 
0146 /* Remove this HPTE from the chain for a real page */
0147 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
0148                 struct revmap_entry *rev,
0149                 unsigned long hpte_v, unsigned long hpte_r)
0150 {
0151     struct revmap_entry *next, *prev;
0152     unsigned long ptel, head;
0153     unsigned long *rmap;
0154     unsigned long rcbits;
0155     struct kvm_memory_slot *memslot;
0156     unsigned long gfn;
0157 
0158     rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
0159     ptel = rev->guest_rpte |= rcbits;
0160     rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
0161     if (!rmap)
0162         return;
0163     lock_rmap(rmap);
0164 
0165     head = *rmap & KVMPPC_RMAP_INDEX;
0166     next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
0167     prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
0168     next->back = rev->back;
0169     prev->forw = rev->forw;
0170     if (head == pte_index) {
0171         head = rev->forw;
0172         if (head == pte_index)
0173             *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
0174         else
0175             *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
0176     }
0177     *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
0178     if (rcbits & HPTE_R_C)
0179         kvmppc_update_dirty_map(memslot, gfn,
0180                     kvmppc_actual_pgsz(hpte_v, hpte_r));
0181     unlock_rmap(rmap);
0182 }
0183 
0184 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
0185                long pte_index, unsigned long pteh, unsigned long ptel,
0186                pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
0187 {
0188     unsigned long i, pa, gpa, gfn, psize;
0189     unsigned long slot_fn, hva;
0190     __be64 *hpte;
0191     struct revmap_entry *rev;
0192     unsigned long g_ptel;
0193     struct kvm_memory_slot *memslot;
0194     unsigned hpage_shift;
0195     bool is_ci;
0196     unsigned long *rmap;
0197     pte_t *ptep;
0198     unsigned int writing;
0199     unsigned long mmu_seq;
0200     unsigned long rcbits;
0201 
0202     if (kvm_is_radix(kvm))
0203         return H_FUNCTION;
0204     /*
0205      * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
0206      * these functions should work together -- must ensure a guest can not
0207      * cause problems with the TLBIE that KVM executes.
0208      */
0209     if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
0210         /* B=0b1x is a reserved value, disallow it. */
0211         return H_PARAMETER;
0212     }
0213     psize = kvmppc_actual_pgsz(pteh, ptel);
0214     if (!psize)
0215         return H_PARAMETER;
0216     writing = hpte_is_writable(ptel);
0217     pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
0218     ptel &= ~HPTE_GR_RESERVED;
0219     g_ptel = ptel;
0220 
0221     /* used later to detect if we might have been invalidated */
0222     mmu_seq = kvm->mmu_invalidate_seq;
0223     smp_rmb();
0224 
0225     /* Find the memslot (if any) for this address */
0226     gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
0227     gfn = gpa >> PAGE_SHIFT;
0228     memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
0229     pa = 0;
0230     is_ci = false;
0231     rmap = NULL;
0232     if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
0233         /* Emulated MMIO - mark this with key=31 */
0234         pteh |= HPTE_V_ABSENT;
0235         ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
0236         goto do_insert;
0237     }
0238 
0239     /* Check if the requested page fits entirely in the memslot. */
0240     if (!slot_is_aligned(memslot, psize))
0241         return H_PARAMETER;
0242     slot_fn = gfn - memslot->base_gfn;
0243     rmap = &memslot->arch.rmap[slot_fn];
0244 
0245     /* Translate to host virtual address */
0246     hva = __gfn_to_hva_memslot(memslot, gfn);
0247 
0248     arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
0249     ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
0250     if (ptep) {
0251         pte_t pte;
0252         unsigned int host_pte_size;
0253 
0254         if (hpage_shift)
0255             host_pte_size = 1ul << hpage_shift;
0256         else
0257             host_pte_size = PAGE_SIZE;
0258         /*
0259          * We should always find the guest page size
0260          * to <= host page size, if host is using hugepage
0261          */
0262         if (host_pte_size < psize) {
0263             arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
0264             return H_PARAMETER;
0265         }
0266         pte = kvmppc_read_update_linux_pte(ptep, writing);
0267         if (pte_present(pte) && !pte_protnone(pte)) {
0268             if (writing && !__pte_write(pte))
0269                 /* make the actual HPTE be read-only */
0270                 ptel = hpte_make_readonly(ptel);
0271             is_ci = pte_ci(pte);
0272             pa = pte_pfn(pte) << PAGE_SHIFT;
0273             pa |= hva & (host_pte_size - 1);
0274             pa |= gpa & ~PAGE_MASK;
0275         }
0276     }
0277     arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
0278 
0279     ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
0280     ptel |= pa;
0281 
0282     if (pa)
0283         pteh |= HPTE_V_VALID;
0284     else {
0285         pteh |= HPTE_V_ABSENT;
0286         ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
0287     }
0288 
0289     /*If we had host pte mapping then  Check WIMG */
0290     if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
0291         if (is_ci)
0292             return H_PARAMETER;
0293         /*
0294          * Allow guest to map emulated device memory as
0295          * uncacheable, but actually make it cacheable.
0296          */
0297         ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
0298         ptel |= HPTE_R_M;
0299     }
0300 
0301     /* Find and lock the HPTEG slot to use */
0302  do_insert:
0303     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0304         return H_PARAMETER;
0305     if (likely((flags & H_EXACT) == 0)) {
0306         pte_index &= ~7UL;
0307         hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0308         for (i = 0; i < 8; ++i) {
0309             if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
0310                 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
0311                       HPTE_V_ABSENT))
0312                 break;
0313             hpte += 2;
0314         }
0315         if (i == 8) {
0316             /*
0317              * Since try_lock_hpte doesn't retry (not even stdcx.
0318              * failures), it could be that there is a free slot
0319              * but we transiently failed to lock it.  Try again,
0320              * actually locking each slot and checking it.
0321              */
0322             hpte -= 16;
0323             for (i = 0; i < 8; ++i) {
0324                 u64 pte;
0325                 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0326                     cpu_relax();
0327                 pte = be64_to_cpu(hpte[0]);
0328                 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
0329                     break;
0330                 __unlock_hpte(hpte, pte);
0331                 hpte += 2;
0332             }
0333             if (i == 8)
0334                 return H_PTEG_FULL;
0335         }
0336         pte_index += i;
0337     } else {
0338         hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0339         if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
0340                    HPTE_V_ABSENT)) {
0341             /* Lock the slot and check again */
0342             u64 pte;
0343 
0344             while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0345                 cpu_relax();
0346             pte = be64_to_cpu(hpte[0]);
0347             if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
0348                 __unlock_hpte(hpte, pte);
0349                 return H_PTEG_FULL;
0350             }
0351         }
0352     }
0353 
0354     /* Save away the guest's idea of the second HPTE dword */
0355     rev = &kvm->arch.hpt.rev[pte_index];
0356     if (realmode)
0357         rev = real_vmalloc_addr(rev);
0358     if (rev) {
0359         rev->guest_rpte = g_ptel;
0360         note_hpte_modification(kvm, rev);
0361     }
0362 
0363     /* Link HPTE into reverse-map chain */
0364     if (pteh & HPTE_V_VALID) {
0365         if (realmode)
0366             rmap = real_vmalloc_addr(rmap);
0367         lock_rmap(rmap);
0368         /* Check for pending invalidations under the rmap chain lock */
0369         if (mmu_invalidate_retry(kvm, mmu_seq)) {
0370             /* inval in progress, write a non-present HPTE */
0371             pteh |= HPTE_V_ABSENT;
0372             pteh &= ~HPTE_V_VALID;
0373             ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
0374             unlock_rmap(rmap);
0375         } else {
0376             kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
0377                         realmode);
0378             /* Only set R/C in real HPTE if already set in *rmap */
0379             rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
0380             ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
0381         }
0382     }
0383 
0384     /* Convert to new format on P9 */
0385     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0386         ptel = hpte_old_to_new_r(pteh, ptel);
0387         pteh = hpte_old_to_new_v(pteh);
0388     }
0389     hpte[1] = cpu_to_be64(ptel);
0390 
0391     /* Write the first HPTE dword, unlocking the HPTE and making it valid */
0392     eieio();
0393     __unlock_hpte(hpte, pteh);
0394     asm volatile("ptesync" : : : "memory");
0395 
0396     *pte_idx_ret = pte_index;
0397     return H_SUCCESS;
0398 }
0399 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
0400 
0401 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
0402             long pte_index, unsigned long pteh, unsigned long ptel)
0403 {
0404     return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
0405                  vcpu->arch.pgdir, true,
0406                  &vcpu->arch.regs.gpr[4]);
0407 }
0408 EXPORT_SYMBOL_GPL(kvmppc_h_enter);
0409 
0410 #ifdef __BIG_ENDIAN__
0411 #define LOCK_TOKEN  (*(u32 *)(&get_paca()->lock_token))
0412 #else
0413 #define LOCK_TOKEN  (*(u32 *)(&get_paca()->paca_index))
0414 #endif
0415 
0416 static inline int is_mmio_hpte(unsigned long v, unsigned long r)
0417 {
0418     return ((v & HPTE_V_ABSENT) &&
0419         (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
0420         (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
0421 }
0422 
0423 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
0424 {
0425 
0426     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0427         /* Radix flush for a hash guest */
0428 
0429         unsigned long rb,rs,prs,r,ric;
0430 
0431         rb = PPC_BIT(52); /* IS = 2 */
0432         rs = 0;  /* lpid = 0 */
0433         prs = 0; /* partition scoped */
0434         r = 1;   /* radix format */
0435         ric = 0; /* RIC_FLSUH_TLB */
0436 
0437         /*
0438          * Need the extra ptesync to make sure we don't
0439          * re-order the tlbie
0440          */
0441         asm volatile("ptesync": : :"memory");
0442         asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0443                  : : "r"(rb), "i"(r), "i"(prs),
0444                    "i"(ric), "r"(rs) : "memory");
0445     }
0446 
0447     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0448         asm volatile("ptesync": : :"memory");
0449         asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
0450                  "r" (rb_value), "r" (lpid));
0451     }
0452 }
0453 
0454 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
0455               long npages, int global, bool need_sync)
0456 {
0457     long i;
0458 
0459     /*
0460      * We use the POWER9 5-operand versions of tlbie and tlbiel here.
0461      * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
0462      * the RS field, this is backwards-compatible with P7 and P8.
0463      */
0464     if (global) {
0465         if (need_sync)
0466             asm volatile("ptesync" : : : "memory");
0467         for (i = 0; i < npages; ++i) {
0468             asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
0469                      "r" (rbvalues[i]), "r" (kvm->arch.lpid));
0470         }
0471 
0472         fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
0473         asm volatile("eieio; tlbsync; ptesync" : : : "memory");
0474     } else {
0475         if (need_sync)
0476             asm volatile("ptesync" : : : "memory");
0477         for (i = 0; i < npages; ++i) {
0478             asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
0479                      "r" (rbvalues[i]), "r" (0));
0480         }
0481         asm volatile("ptesync" : : : "memory");
0482     }
0483 }
0484 
0485 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
0486             unsigned long pte_index, unsigned long avpn,
0487             unsigned long *hpret)
0488 {
0489     __be64 *hpte;
0490     unsigned long v, r, rb;
0491     struct revmap_entry *rev;
0492     u64 pte, orig_pte, pte_r;
0493 
0494     if (kvm_is_radix(kvm))
0495         return H_FUNCTION;
0496     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0497         return H_PARAMETER;
0498     hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0499     while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0500         cpu_relax();
0501     pte = orig_pte = be64_to_cpu(hpte[0]);
0502     pte_r = be64_to_cpu(hpte[1]);
0503     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0504         pte = hpte_new_to_old_v(pte, pte_r);
0505         pte_r = hpte_new_to_old_r(pte_r);
0506     }
0507     if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
0508         ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
0509         ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
0510         __unlock_hpte(hpte, orig_pte);
0511         return H_NOT_FOUND;
0512     }
0513 
0514     rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0515     v = pte & ~HPTE_V_HVLOCK;
0516     if (v & HPTE_V_VALID) {
0517         hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
0518         rb = compute_tlbie_rb(v, pte_r, pte_index);
0519         do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
0520         /*
0521          * The reference (R) and change (C) bits in a HPT
0522          * entry can be set by hardware at any time up until
0523          * the HPTE is invalidated and the TLB invalidation
0524          * sequence has completed.  This means that when
0525          * removing a HPTE, we need to re-read the HPTE after
0526          * the invalidation sequence has completed in order to
0527          * obtain reliable values of R and C.
0528          */
0529         remove_revmap_chain(kvm, pte_index, rev, v,
0530                     be64_to_cpu(hpte[1]));
0531     }
0532     r = rev->guest_rpte & ~HPTE_GR_RESERVED;
0533     note_hpte_modification(kvm, rev);
0534     unlock_hpte(hpte, 0);
0535 
0536     if (is_mmio_hpte(v, pte_r))
0537         atomic64_inc(&kvm->arch.mmio_update);
0538 
0539     if (v & HPTE_V_ABSENT)
0540         v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
0541     hpret[0] = v;
0542     hpret[1] = r;
0543     return H_SUCCESS;
0544 }
0545 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
0546 
0547 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
0548              unsigned long pte_index, unsigned long avpn)
0549 {
0550     return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
0551                   &vcpu->arch.regs.gpr[4]);
0552 }
0553 EXPORT_SYMBOL_GPL(kvmppc_h_remove);
0554 
0555 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
0556 {
0557     struct kvm *kvm = vcpu->kvm;
0558     unsigned long *args = &vcpu->arch.regs.gpr[4];
0559     __be64 *hp, *hptes[4];
0560     unsigned long tlbrb[4];
0561     long int i, j, k, n, found, indexes[4];
0562     unsigned long flags, req, pte_index, rcbits;
0563     int global;
0564     long int ret = H_SUCCESS;
0565     struct revmap_entry *rev, *revs[4];
0566     u64 hp0, hp1;
0567 
0568     if (kvm_is_radix(kvm))
0569         return H_FUNCTION;
0570     global = global_invalidates(kvm);
0571     for (i = 0; i < 4 && ret == H_SUCCESS; ) {
0572         n = 0;
0573         for (; i < 4; ++i) {
0574             j = i * 2;
0575             pte_index = args[j];
0576             flags = pte_index >> 56;
0577             pte_index &= ((1ul << 56) - 1);
0578             req = flags >> 6;
0579             flags &= 3;
0580             if (req == 3) {     /* no more requests */
0581                 i = 4;
0582                 break;
0583             }
0584             if (req != 1 || flags == 3 ||
0585                 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
0586                 /* parameter error */
0587                 args[j] = ((0xa0 | flags) << 56) + pte_index;
0588                 ret = H_PARAMETER;
0589                 break;
0590             }
0591             hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
0592             /* to avoid deadlock, don't spin except for first */
0593             if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
0594                 if (n)
0595                     break;
0596                 while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
0597                     cpu_relax();
0598             }
0599             found = 0;
0600             hp0 = be64_to_cpu(hp[0]);
0601             hp1 = be64_to_cpu(hp[1]);
0602             if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0603                 hp0 = hpte_new_to_old_v(hp0, hp1);
0604                 hp1 = hpte_new_to_old_r(hp1);
0605             }
0606             if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
0607                 switch (flags & 3) {
0608                 case 0:     /* absolute */
0609                     found = 1;
0610                     break;
0611                 case 1:     /* andcond */
0612                     if (!(hp0 & args[j + 1]))
0613                         found = 1;
0614                     break;
0615                 case 2:     /* AVPN */
0616                     if ((hp0 & ~0x7fUL) == args[j + 1])
0617                         found = 1;
0618                     break;
0619                 }
0620             }
0621             if (!found) {
0622                 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
0623                 args[j] = ((0x90 | flags) << 56) + pte_index;
0624                 continue;
0625             }
0626 
0627             args[j] = ((0x80 | flags) << 56) + pte_index;
0628             rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0629             note_hpte_modification(kvm, rev);
0630 
0631             if (!(hp0 & HPTE_V_VALID)) {
0632                 /* insert R and C bits from PTE */
0633                 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
0634                 args[j] |= rcbits << (56 - 5);
0635                 hp[0] = 0;
0636                 if (is_mmio_hpte(hp0, hp1))
0637                     atomic64_inc(&kvm->arch.mmio_update);
0638                 continue;
0639             }
0640 
0641             /* leave it locked */
0642             hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
0643             tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
0644             indexes[n] = j;
0645             hptes[n] = hp;
0646             revs[n] = rev;
0647             ++n;
0648         }
0649 
0650         if (!n)
0651             break;
0652 
0653         /* Now that we've collected a batch, do the tlbies */
0654         do_tlbies(kvm, tlbrb, n, global, true);
0655 
0656         /* Read PTE low words after tlbie to get final R/C values */
0657         for (k = 0; k < n; ++k) {
0658             j = indexes[k];
0659             pte_index = args[j] & ((1ul << 56) - 1);
0660             hp = hptes[k];
0661             rev = revs[k];
0662             remove_revmap_chain(kvm, pte_index, rev,
0663                 be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
0664             rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
0665             args[j] |= rcbits << (56 - 5);
0666             __unlock_hpte(hp, 0);
0667         }
0668     }
0669 
0670     return ret;
0671 }
0672 EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
0673 
0674 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
0675               unsigned long pte_index, unsigned long avpn)
0676 {
0677     struct kvm *kvm = vcpu->kvm;
0678     __be64 *hpte;
0679     struct revmap_entry *rev;
0680     unsigned long v, r, rb, mask, bits;
0681     u64 pte_v, pte_r;
0682 
0683     if (kvm_is_radix(kvm))
0684         return H_FUNCTION;
0685     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0686         return H_PARAMETER;
0687 
0688     hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0689     while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0690         cpu_relax();
0691     v = pte_v = be64_to_cpu(hpte[0]);
0692     if (cpu_has_feature(CPU_FTR_ARCH_300))
0693         v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
0694     if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
0695         ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
0696         __unlock_hpte(hpte, pte_v);
0697         return H_NOT_FOUND;
0698     }
0699 
0700     pte_r = be64_to_cpu(hpte[1]);
0701     bits = (flags << 55) & HPTE_R_PP0;
0702     bits |= (flags << 48) & HPTE_R_KEY_HI;
0703     bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
0704 
0705     /* Update guest view of 2nd HPTE dword */
0706     mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
0707         HPTE_R_KEY_HI | HPTE_R_KEY_LO;
0708     rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0709     if (rev) {
0710         r = (rev->guest_rpte & ~mask) | bits;
0711         rev->guest_rpte = r;
0712         note_hpte_modification(kvm, rev);
0713     }
0714 
0715     /* Update HPTE */
0716     if (v & HPTE_V_VALID) {
0717         /*
0718          * If the page is valid, don't let it transition from
0719          * readonly to writable.  If it should be writable, we'll
0720          * take a trap and let the page fault code sort it out.
0721          */
0722         r = (pte_r & ~mask) | bits;
0723         if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
0724             r = hpte_make_readonly(r);
0725         /* If the PTE is changing, invalidate it first */
0726         if (r != pte_r) {
0727             rb = compute_tlbie_rb(v, r, pte_index);
0728             hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
0729                           HPTE_V_ABSENT);
0730             do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
0731             /* Don't lose R/C bit updates done by hardware */
0732             r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
0733             hpte[1] = cpu_to_be64(r);
0734         }
0735     }
0736     unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
0737     asm volatile("ptesync" : : : "memory");
0738     if (is_mmio_hpte(v, pte_r))
0739         atomic64_inc(&kvm->arch.mmio_update);
0740 
0741     return H_SUCCESS;
0742 }
0743 EXPORT_SYMBOL_GPL(kvmppc_h_protect);
0744 
0745 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
0746            unsigned long pte_index)
0747 {
0748     struct kvm *kvm = vcpu->kvm;
0749     __be64 *hpte;
0750     unsigned long v, r;
0751     int i, n = 1;
0752     struct revmap_entry *rev = NULL;
0753 
0754     if (kvm_is_radix(kvm))
0755         return H_FUNCTION;
0756     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0757         return H_PARAMETER;
0758     if (flags & H_READ_4) {
0759         pte_index &= ~3;
0760         n = 4;
0761     }
0762     rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0763     for (i = 0; i < n; ++i, ++pte_index) {
0764         hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0765         v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
0766         r = be64_to_cpu(hpte[1]);
0767         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0768             v = hpte_new_to_old_v(v, r);
0769             r = hpte_new_to_old_r(r);
0770         }
0771         if (v & HPTE_V_ABSENT) {
0772             v &= ~HPTE_V_ABSENT;
0773             v |= HPTE_V_VALID;
0774         }
0775         if (v & HPTE_V_VALID) {
0776             r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
0777             r &= ~HPTE_GR_RESERVED;
0778         }
0779         vcpu->arch.regs.gpr[4 + i * 2] = v;
0780         vcpu->arch.regs.gpr[5 + i * 2] = r;
0781     }
0782     return H_SUCCESS;
0783 }
0784 EXPORT_SYMBOL_GPL(kvmppc_h_read);
0785 
0786 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
0787             unsigned long pte_index)
0788 {
0789     struct kvm *kvm = vcpu->kvm;
0790     __be64 *hpte;
0791     unsigned long v, r, gr;
0792     struct revmap_entry *rev;
0793     unsigned long *rmap;
0794     long ret = H_NOT_FOUND;
0795 
0796     if (kvm_is_radix(kvm))
0797         return H_FUNCTION;
0798     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0799         return H_PARAMETER;
0800 
0801     rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0802     hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0803     while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0804         cpu_relax();
0805     v = be64_to_cpu(hpte[0]);
0806     r = be64_to_cpu(hpte[1]);
0807     if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
0808         goto out;
0809 
0810     gr = rev->guest_rpte;
0811     if (rev->guest_rpte & HPTE_R_R) {
0812         rev->guest_rpte &= ~HPTE_R_R;
0813         note_hpte_modification(kvm, rev);
0814     }
0815     if (v & HPTE_V_VALID) {
0816         gr |= r & (HPTE_R_R | HPTE_R_C);
0817         if (r & HPTE_R_R) {
0818             kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
0819             rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
0820             if (rmap) {
0821                 lock_rmap(rmap);
0822                 *rmap |= KVMPPC_RMAP_REFERENCED;
0823                 unlock_rmap(rmap);
0824             }
0825         }
0826     }
0827     vcpu->arch.regs.gpr[4] = gr;
0828     ret = H_SUCCESS;
0829  out:
0830     unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
0831     return ret;
0832 }
0833 EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
0834 
0835 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
0836             unsigned long pte_index)
0837 {
0838     struct kvm *kvm = vcpu->kvm;
0839     __be64 *hpte;
0840     unsigned long v, r, gr;
0841     struct revmap_entry *rev;
0842     long ret = H_NOT_FOUND;
0843 
0844     if (kvm_is_radix(kvm))
0845         return H_FUNCTION;
0846     if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
0847         return H_PARAMETER;
0848 
0849     rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
0850     hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
0851     while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
0852         cpu_relax();
0853     v = be64_to_cpu(hpte[0]);
0854     r = be64_to_cpu(hpte[1]);
0855     if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
0856         goto out;
0857 
0858     gr = rev->guest_rpte;
0859     if (gr & HPTE_R_C) {
0860         rev->guest_rpte &= ~HPTE_R_C;
0861         note_hpte_modification(kvm, rev);
0862     }
0863     if (v & HPTE_V_VALID) {
0864         /* need to make it temporarily absent so C is stable */
0865         hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
0866         kvmppc_invalidate_hpte(kvm, hpte, pte_index);
0867         r = be64_to_cpu(hpte[1]);
0868         gr |= r & (HPTE_R_R | HPTE_R_C);
0869         if (r & HPTE_R_C) {
0870             hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
0871             eieio();
0872             kvmppc_set_dirty_from_hpte(kvm, v, gr);
0873         }
0874     }
0875     vcpu->arch.regs.gpr[4] = gr;
0876     ret = H_SUCCESS;
0877  out:
0878     unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
0879     return ret;
0880 }
0881 EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
0882 
0883 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
0884               unsigned long gpa, int writing, unsigned long *hpa,
0885               struct kvm_memory_slot **memslot_p)
0886 {
0887     struct kvm *kvm = vcpu->kvm;
0888     struct kvm_memory_slot *memslot;
0889     unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
0890     unsigned int shift;
0891     pte_t *ptep, pte;
0892 
0893     /* Find the memslot for this address */
0894     gfn = gpa >> PAGE_SHIFT;
0895     memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
0896     if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
0897         return H_PARAMETER;
0898 
0899     /* Translate to host virtual address */
0900     hva = __gfn_to_hva_memslot(memslot, gfn);
0901 
0902     /* Try to find the host pte for that virtual address */
0903     ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
0904     if (!ptep)
0905         return H_TOO_HARD;
0906     pte = kvmppc_read_update_linux_pte(ptep, writing);
0907     if (!pte_present(pte))
0908         return H_TOO_HARD;
0909 
0910     /* Convert to a physical address */
0911     if (shift)
0912         psize = 1UL << shift;
0913     pa = pte_pfn(pte) << PAGE_SHIFT;
0914     pa |= hva & (psize - 1);
0915     pa |= gpa & ~PAGE_MASK;
0916 
0917     if (hpa)
0918         *hpa = pa;
0919     if (memslot_p)
0920         *memslot_p = memslot;
0921 
0922     return H_SUCCESS;
0923 }
0924 
0925 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
0926                        unsigned long dest)
0927 {
0928     struct kvm_memory_slot *memslot;
0929     struct kvm *kvm = vcpu->kvm;
0930     unsigned long pa, mmu_seq;
0931     long ret = H_SUCCESS;
0932     int i;
0933 
0934     /* Used later to detect if we might have been invalidated */
0935     mmu_seq = kvm->mmu_invalidate_seq;
0936     smp_rmb();
0937 
0938     arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
0939 
0940     ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
0941     if (ret != H_SUCCESS)
0942         goto out_unlock;
0943 
0944     /* Zero the page */
0945     for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
0946         dcbz((void *)pa);
0947     kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
0948 
0949 out_unlock:
0950     arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
0951     return ret;
0952 }
0953 
0954 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
0955                        unsigned long dest, unsigned long src)
0956 {
0957     unsigned long dest_pa, src_pa, mmu_seq;
0958     struct kvm_memory_slot *dest_memslot;
0959     struct kvm *kvm = vcpu->kvm;
0960     long ret = H_SUCCESS;
0961 
0962     /* Used later to detect if we might have been invalidated */
0963     mmu_seq = kvm->mmu_invalidate_seq;
0964     smp_rmb();
0965 
0966     arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
0967     ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
0968     if (ret != H_SUCCESS)
0969         goto out_unlock;
0970 
0971     ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
0972     if (ret != H_SUCCESS)
0973         goto out_unlock;
0974 
0975     /* Copy the page */
0976     memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
0977 
0978     kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
0979 
0980 out_unlock:
0981     arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
0982     return ret;
0983 }
0984 
0985 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
0986                unsigned long dest, unsigned long src)
0987 {
0988     struct kvm *kvm = vcpu->kvm;
0989     u64 pg_mask = SZ_4K - 1;    /* 4K page size */
0990     long ret = H_SUCCESS;
0991 
0992     /* Don't handle radix mode here, go up to the virtual mode handler */
0993     if (kvm_is_radix(kvm))
0994         return H_TOO_HARD;
0995 
0996     /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
0997     if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
0998               H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
0999         return H_PARAMETER;
1000 
1001     /* dest (and src if copy_page flag set) must be page aligned */
1002     if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
1003         return H_PARAMETER;
1004 
1005     /* zero and/or copy the page as determined by the flags */
1006     if (flags & H_COPY_PAGE)
1007         ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
1008     else if (flags & H_ZERO_PAGE)
1009         ret = kvmppc_do_h_page_init_zero(vcpu, dest);
1010 
1011     /* We can ignore the other flags */
1012 
1013     return ret;
1014 }
1015 
1016 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
1017             unsigned long pte_index)
1018 {
1019     unsigned long rb;
1020     u64 hp0, hp1;
1021 
1022     hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
1023     hp0 = be64_to_cpu(hptep[0]);
1024     hp1 = be64_to_cpu(hptep[1]);
1025     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1026         hp0 = hpte_new_to_old_v(hp0, hp1);
1027         hp1 = hpte_new_to_old_r(hp1);
1028     }
1029     rb = compute_tlbie_rb(hp0, hp1, pte_index);
1030     do_tlbies(kvm, &rb, 1, 1, true);
1031 }
1032 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
1033 
1034 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
1035                unsigned long pte_index)
1036 {
1037     unsigned long rb;
1038     unsigned char rbyte;
1039     u64 hp0, hp1;
1040 
1041     hp0 = be64_to_cpu(hptep[0]);
1042     hp1 = be64_to_cpu(hptep[1]);
1043     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1044         hp0 = hpte_new_to_old_v(hp0, hp1);
1045         hp1 = hpte_new_to_old_r(hp1);
1046     }
1047     rb = compute_tlbie_rb(hp0, hp1, pte_index);
1048     rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
1049     /* modify only the second-last byte, which contains the ref bit */
1050     *((char *)hptep + 14) = rbyte;
1051     do_tlbies(kvm, &rb, 1, 1, false);
1052 }
1053 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
1054 
1055 static int slb_base_page_shift[4] = {
1056     24, /* 16M */
1057     16, /* 64k */
1058     34, /* 16G */
1059     20, /* 1M, unsupported */
1060 };
1061 
1062 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
1063         unsigned long eaddr, unsigned long slb_v, long mmio_update)
1064 {
1065     struct mmio_hpte_cache_entry *entry = NULL;
1066     unsigned int pshift;
1067     unsigned int i;
1068 
1069     for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
1070         entry = &vcpu->arch.mmio_cache.entry[i];
1071         if (entry->mmio_update == mmio_update) {
1072             pshift = entry->slb_base_pshift;
1073             if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
1074                 entry->slb_v == slb_v)
1075                 return entry;
1076         }
1077     }
1078     return NULL;
1079 }
1080 
1081 static struct mmio_hpte_cache_entry *
1082             next_mmio_cache_entry(struct kvm_vcpu *vcpu)
1083 {
1084     unsigned int index = vcpu->arch.mmio_cache.index;
1085 
1086     vcpu->arch.mmio_cache.index++;
1087     if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
1088         vcpu->arch.mmio_cache.index = 0;
1089 
1090     return &vcpu->arch.mmio_cache.entry[index];
1091 }
1092 
1093 /* When called from virtmode, this func should be protected by
1094  * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
1095  * can trigger deadlock issue.
1096  */
1097 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
1098                   unsigned long valid)
1099 {
1100     unsigned int i;
1101     unsigned int pshift;
1102     unsigned long somask;
1103     unsigned long vsid, hash;
1104     unsigned long avpn;
1105     __be64 *hpte;
1106     unsigned long mask, val;
1107     unsigned long v, r, orig_v;
1108 
1109     /* Get page shift, work out hash and AVPN etc. */
1110     mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
1111     val = 0;
1112     pshift = 12;
1113     if (slb_v & SLB_VSID_L) {
1114         mask |= HPTE_V_LARGE;
1115         val |= HPTE_V_LARGE;
1116         pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
1117     }
1118     if (slb_v & SLB_VSID_B_1T) {
1119         somask = (1UL << 40) - 1;
1120         vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
1121         vsid ^= vsid << 25;
1122     } else {
1123         somask = (1UL << 28) - 1;
1124         vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
1125     }
1126     hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
1127     avpn = slb_v & ~(somask >> 16); /* also includes B */
1128     avpn |= (eaddr & somask) >> 16;
1129 
1130     if (pshift >= 24)
1131         avpn &= ~((1UL << (pshift - 16)) - 1);
1132     else
1133         avpn &= ~0x7fUL;
1134     val |= avpn;
1135 
1136     for (;;) {
1137         hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
1138 
1139         for (i = 0; i < 16; i += 2) {
1140             /* Read the PTE racily */
1141             v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1142             if (cpu_has_feature(CPU_FTR_ARCH_300))
1143                 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
1144 
1145             /* Check valid/absent, hash, segment size and AVPN */
1146             if (!(v & valid) || (v & mask) != val)
1147                 continue;
1148 
1149             /* Lock the PTE and read it under the lock */
1150             while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
1151                 cpu_relax();
1152             v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1153             r = be64_to_cpu(hpte[i+1]);
1154             if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1155                 v = hpte_new_to_old_v(v, r);
1156                 r = hpte_new_to_old_r(r);
1157             }
1158 
1159             /*
1160              * Check the HPTE again, including base page size
1161              */
1162             if ((v & valid) && (v & mask) == val &&
1163                 kvmppc_hpte_base_page_shift(v, r) == pshift)
1164                 /* Return with the HPTE still locked */
1165                 return (hash << 3) + (i >> 1);
1166 
1167             __unlock_hpte(&hpte[i], orig_v);
1168         }
1169 
1170         if (val & HPTE_V_SECONDARY)
1171             break;
1172         val |= HPTE_V_SECONDARY;
1173         hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
1174     }
1175     return -1;
1176 }
1177 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
1178 
1179 /*
1180  * Called in real mode to check whether an HPTE not found fault
1181  * is due to accessing a paged-out page or an emulated MMIO page,
1182  * or if a protection fault is due to accessing a page that the
1183  * guest wanted read/write access to but which we made read-only.
1184  * Returns a possibly modified status (DSISR) value if not
1185  * (i.e. pass the interrupt to the guest),
1186  * -1 to pass the fault up to host kernel mode code, -2 to do that
1187  * and also load the instruction word (for MMIO emulation),
1188  * or 0 if we should make the guest retry the access.
1189  */
1190 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
1191               unsigned long slb_v, unsigned int status, bool data)
1192 {
1193     struct kvm *kvm = vcpu->kvm;
1194     long int index;
1195     unsigned long v, r, gr, orig_v;
1196     __be64 *hpte;
1197     unsigned long valid;
1198     struct revmap_entry *rev;
1199     unsigned long pp, key;
1200     struct mmio_hpte_cache_entry *cache_entry = NULL;
1201     long mmio_update = 0;
1202 
1203     /* For protection fault, expect to find a valid HPTE */
1204     valid = HPTE_V_VALID;
1205     if (status & DSISR_NOHPTE) {
1206         valid |= HPTE_V_ABSENT;
1207         mmio_update = atomic64_read(&kvm->arch.mmio_update);
1208         cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
1209     }
1210     if (cache_entry) {
1211         index = cache_entry->pte_index;
1212         v = cache_entry->hpte_v;
1213         r = cache_entry->hpte_r;
1214         gr = cache_entry->rpte;
1215     } else {
1216         index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
1217         if (index < 0) {
1218             if (status & DSISR_NOHPTE)
1219                 return status;  /* there really was no HPTE */
1220             return 0;   /* for prot fault, HPTE disappeared */
1221         }
1222         hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
1223         v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
1224         r = be64_to_cpu(hpte[1]);
1225         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1226             v = hpte_new_to_old_v(v, r);
1227             r = hpte_new_to_old_r(r);
1228         }
1229         rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
1230         gr = rev->guest_rpte;
1231 
1232         unlock_hpte(hpte, orig_v);
1233     }
1234 
1235     /* For not found, if the HPTE is valid by now, retry the instruction */
1236     if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
1237         return 0;
1238 
1239     /* Check access permissions to the page */
1240     pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
1241     key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
1242     status &= ~DSISR_NOHPTE;    /* DSISR_NOHPTE == SRR1_ISI_NOPT */
1243     if (!data) {
1244         if (gr & (HPTE_R_N | HPTE_R_G))
1245             return status | SRR1_ISI_N_G_OR_CIP;
1246         if (!hpte_read_permission(pp, slb_v & key))
1247             return status | SRR1_ISI_PROT;
1248     } else if (status & DSISR_ISSTORE) {
1249         /* check write permission */
1250         if (!hpte_write_permission(pp, slb_v & key))
1251             return status | DSISR_PROTFAULT;
1252     } else {
1253         if (!hpte_read_permission(pp, slb_v & key))
1254             return status | DSISR_PROTFAULT;
1255     }
1256 
1257     /* Check storage key, if applicable */
1258     if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
1259         unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
1260         if (status & DSISR_ISSTORE)
1261             perm >>= 1;
1262         if (perm & 1)
1263             return status | DSISR_KEYFAULT;
1264     }
1265 
1266     /* Save HPTE info for virtual-mode handler */
1267     vcpu->arch.pgfault_addr = addr;
1268     vcpu->arch.pgfault_index = index;
1269     vcpu->arch.pgfault_hpte[0] = v;
1270     vcpu->arch.pgfault_hpte[1] = r;
1271     vcpu->arch.pgfault_cache = cache_entry;
1272 
1273     /* Check the storage key to see if it is possibly emulated MMIO */
1274     if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
1275         (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
1276         if (!cache_entry) {
1277             unsigned int pshift = 12;
1278             unsigned int pshift_index;
1279 
1280             if (slb_v & SLB_VSID_L) {
1281                 pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
1282                 pshift = slb_base_page_shift[pshift_index];
1283             }
1284             cache_entry = next_mmio_cache_entry(vcpu);
1285             cache_entry->eaddr = addr;
1286             cache_entry->slb_base_pshift = pshift;
1287             cache_entry->pte_index = index;
1288             cache_entry->hpte_v = v;
1289             cache_entry->hpte_r = r;
1290             cache_entry->rpte = gr;
1291             cache_entry->slb_v = slb_v;
1292             cache_entry->mmio_update = mmio_update;
1293         }
1294         if (data && (vcpu->arch.shregs.msr & MSR_IR))
1295             return -2;  /* MMIO emulation - load instr word */
1296     }
1297 
1298     return -1;      /* send fault up to host kernel mode */
1299 }
1300 EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);