mm/book3s64/hash_tlb.c

0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * This file contains the routines for flushing entries from the
0004  * TLB and MMU hash table.
0005  *
0006  *  Derived from arch/ppc64/mm/init.c:
0007  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
0008  *
0009  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
0010  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
0011  *    Copyright (C) 1996 Paul Mackerras
0012  *
0013  *  Derived from "arch/i386/mm/init.c"
0014  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
0015  *
0016  *  Dave Engebretsen <engebret@us.ibm.com>
0017  *      Rework for PPC64 port.
0018  */
0019
0020 #include <linux/kernel.h>
0021 #include <linux/mm.h>
0022 #include <linux/percpu.h>
0023 #include <linux/hardirq.h>
0024 #include <asm/tlbflush.h>
0025 #include <asm/tlb.h>
0026 #include <asm/bug.h>
0027 #include <asm/pte-walk.h>
0028
0029
0030 #include <trace/events/thp.h>
0031
0032 DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
0033
0034 /*
0035  * A linux PTE was changed and the corresponding hash table entry
0036  * neesd to be flushed. This function will either perform the flush
0037  * immediately or will batch it up if the current CPU has an active
0038  * batch on it.
0039  */
0040 void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
0041              pte_t *ptep, unsigned long pte, int huge)
0042 {
0043     unsigned long vpn;
0044     struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
0045     unsigned long vsid;
0046     unsigned int psize;
0047     int ssize;
0048     real_pte_t rpte;
0049     int i, offset;
0050
0051     i = batch->index;
0052
0053     /*
0054      * Get page size (maybe move back to caller).
0055      *
0056      * NOTE: when using special 64K mappings in 4K environment like
0057      * for SPEs, we obtain the page size from the slice, which thus
0058      * must still exist (and thus the VMA not reused) at the time
0059      * of this call
0060      */
0061     if (huge) {
0062 #ifdef CONFIG_HUGETLB_PAGE
0063         psize = get_slice_psize(mm, addr);
0064         /* Mask the address for the correct page size */
0065         addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
0066         if (unlikely(psize == MMU_PAGE_16G))
0067             offset = PTRS_PER_PUD;
0068         else
0069             offset = PTRS_PER_PMD;
0070 #else
0071         BUG();
0072         psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
0073 #endif
0074     } else {
0075         psize = pte_pagesize_index(mm, addr, pte);
0076         /*
0077          * Mask the address for the standard page size.  If we
0078          * have a 64k page kernel, but the hardware does not
0079          * support 64k pages, this might be different from the
0080          * hardware page size encoded in the slice table.
0081          */
0082         addr &= PAGE_MASK;
0083         offset = PTRS_PER_PTE;
0084     }
0085
0086
0087     /* Build full vaddr */
0088     if (!is_kernel_addr(addr)) {
0089         ssize = user_segment_size(addr);
0090         vsid = get_user_vsid(&mm->context, addr, ssize);
0091     } else {
0092         vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
0093         ssize = mmu_kernel_ssize;
0094     }
0095     WARN_ON(vsid == 0);
0096     vpn = hpt_vpn(addr, vsid, ssize);
0097     rpte = __real_pte(__pte(pte), ptep, offset);
0098
0099     /*
0100      * Check if we have an active batch on this CPU. If not, just
0101      * flush now and return.
0102      */
0103     if (!batch->active) {
0104         flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
0105         put_cpu_var(ppc64_tlb_batch);
0106         return;
0107     }
0108
0109     /*
0110      * This can happen when we are in the middle of a TLB batch and
0111      * we encounter memory pressure (eg copy_page_range when it tries
0112      * to allocate a new pte). If we have to reclaim memory and end
0113      * up scanning and resetting referenced bits then our batch context
0114      * will change mid stream.
0115      *
0116      * We also need to ensure only one page size is present in a given
0117      * batch
0118      */
0119     if (i != 0 && (mm != batch->mm || batch->psize != psize ||
0120                batch->ssize != ssize)) {
0121         __flush_tlb_pending(batch);
0122         i = 0;
0123     }
0124     if (i == 0) {
0125         batch->mm = mm;
0126         batch->psize = psize;
0127         batch->ssize = ssize;
0128     }
0129     batch->pte[i] = rpte;
0130     batch->vpn[i] = vpn;
0131     batch->index = ++i;
0132     if (i >= PPC64_TLB_BATCH_NR)
0133         __flush_tlb_pending(batch);
0134     put_cpu_var(ppc64_tlb_batch);
0135 }
0136
0137 /*
0138  * This function is called when terminating an mmu batch or when a batch
0139  * is full. It will perform the flush of all the entries currently stored
0140  * in a batch.
0141  *
0142  * Must be called from within some kind of spinlock/non-preempt region...
0143  */
0144 void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
0145 {
0146     int i, local;
0147
0148     i = batch->index;
0149     local = mm_is_thread_local(batch->mm);
0150     if (i == 1)
0151         flush_hash_page(batch->vpn[0], batch->pte[0],
0152                 batch->psize, batch->ssize, local);
0153     else
0154         flush_hash_range(i, local);
0155     batch->index = 0;
0156 }
0157
0158 void hash__tlb_flush(struct mmu_gather *tlb)
0159 {
0160     struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
0161
0162     /*
0163      * If there's a TLB batch pending, then we must flush it because the
0164      * pages are going to be freed and we really don't want to have a CPU
0165      * access a freed page because it has a stale TLB
0166      */
0167     if (tlbbatch->index)
0168         __flush_tlb_pending(tlbbatch);
0169
0170     put_cpu_var(ppc64_tlb_batch);
0171 }
0172
0173 /**
0174  * __flush_hash_table_range - Flush all HPTEs for a given address range
0175  *                            from the hash table (and the TLB). But keeps
0176  *                            the linux PTEs intact.
0177  *
0178  * @start   : starting address
0179  * @end         : ending address (not included in the flush)
0180  *
0181  * This function is mostly to be used by some IO hotplug code in order
0182  * to remove all hash entries from a given address range used to map IO
0183  * space on a removed PCI-PCI bidge without tearing down the full mapping
0184  * since 64K pages may overlap with other bridges when using 64K pages
0185  * with 4K HW pages on IO space.
0186  *
0187  * Because of that usage pattern, it is implemented for small size rather
0188  * than speed.
0189  */
0190 void __flush_hash_table_range(unsigned long start, unsigned long end)
0191 {
0192     int hugepage_shift;
0193     unsigned long flags;
0194
0195     start = ALIGN_DOWN(start, PAGE_SIZE);
0196     end = ALIGN(end, PAGE_SIZE);
0197
0198
0199     /*
0200      * Note: Normally, we should only ever use a batch within a
0201      * PTE locked section. This violates the rule, but will work
0202      * since we don't actually modify the PTEs, we just flush the
0203      * hash while leaving the PTEs intact (including their reference
0204      * to being hashed). This is not the most performance oriented
0205      * way to do things but is fine for our needs here.
0206      */
0207     local_irq_save(flags);
0208     arch_enter_lazy_mmu_mode();
0209     for (; start < end; start += PAGE_SIZE) {
0210         pte_t *ptep = find_init_mm_pte(start, &hugepage_shift);
0211         unsigned long pte;
0212
0213         if (ptep == NULL)
0214             continue;
0215         pte = pte_val(*ptep);
0216         if (!(pte & H_PAGE_HASHPTE))
0217             continue;
0218         hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift);
0219     }
0220     arch_leave_lazy_mmu_mode();
0221     local_irq_restore(flags);
0222 }
0223
0224 void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
0225 {
0226     pte_t *pte;
0227     pte_t *start_pte;
0228     unsigned long flags;
0229
0230     addr = ALIGN_DOWN(addr, PMD_SIZE);
0231     /*
0232      * Note: Normally, we should only ever use a batch within a
0233      * PTE locked section. This violates the rule, but will work
0234      * since we don't actually modify the PTEs, we just flush the
0235      * hash while leaving the PTEs intact (including their reference
0236      * to being hashed). This is not the most performance oriented
0237      * way to do things but is fine for our needs here.
0238      */
0239     local_irq_save(flags);
0240     arch_enter_lazy_mmu_mode();
0241     start_pte = pte_offset_map(pmd, addr);
0242     for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
0243         unsigned long pteval = pte_val(*pte);
0244         if (pteval & H_PAGE_HASHPTE)
0245             hpte_need_flush(mm, addr, pte, pteval, 0);
0246         addr += PAGE_SIZE;
0247     }
0248     arch_leave_lazy_mmu_mode();
0249     local_irq_restore(flags);
0250 }