Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * This file contains the routines for handling the MMU on those
0004  * PowerPC implementations where the MMU is not using the hash
0005  * table, such as 8xx, 4xx, BookE's etc...
0006  *
0007  * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
0008  *                IBM Corp.
0009  *
0010  *  Derived from previous arch/powerpc/mm/mmu_context.c
0011  *  and arch/powerpc/include/asm/mmu_context.h
0012  *
0013  * TODO:
0014  *
0015  *   - The global context lock will not scale very well
0016  *   - The maps should be dynamically allocated to allow for processors
0017  *     that support more PID bits at runtime
0018  *   - Implement flush_tlb_mm() by making the context stale and picking
0019  *     a new one
0020  *   - More aggressively clear stale map bits and maybe find some way to
0021  *     also clear mm->cpu_vm_mask bits when processes are migrated
0022  */
0023 
0024 #include <linux/kernel.h>
0025 #include <linux/mm.h>
0026 #include <linux/init.h>
0027 #include <linux/spinlock.h>
0028 #include <linux/memblock.h>
0029 #include <linux/notifier.h>
0030 #include <linux/cpu.h>
0031 #include <linux/slab.h>
0032 
0033 #include <asm/mmu_context.h>
0034 #include <asm/tlbflush.h>
0035 #include <asm/smp.h>
0036 #include <asm/kup.h>
0037 
0038 #include <mm/mmu_decl.h>
0039 
0040 /*
0041  * Room for two PTE table pointers, usually the kernel and current user
0042  * pointer to their respective root page table (pgdir).
0043  */
0044 void *abatron_pteptrs[2];
0045 
0046 /*
0047  * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
0048  * A better way would be to keep track of tasks that own contexts, and implement
0049  * an LRU usage. That way very active tasks don't always have to pay the TLB
0050  * reload overhead. The kernel pages are mapped shared, so the kernel can run on
0051  * behalf of any task that makes a kernel entry. Shared does not mean they are
0052  * not protected, just that the ASID comparison is not performed. -- Dan
0053  *
0054  * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
0055  * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
0056  * is disabled, so we can use a TID of zero to represent all kernel pages as
0057  * shared among all contexts. -- Dan
0058  *
0059  * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
0060  * normally never have to steal though the facility is present if needed.
0061  * -- BenH
0062  */
0063 #define FIRST_CONTEXT 1
0064 #if defined(CONFIG_PPC_8xx)
0065 #define LAST_CONTEXT 16
0066 #elif defined(CONFIG_PPC_47x)
0067 #define LAST_CONTEXT 65535
0068 #else
0069 #define LAST_CONTEXT 255
0070 #endif
0071 
0072 static unsigned int next_context, nr_free_contexts;
0073 static unsigned long *context_map;
0074 static unsigned long *stale_map[NR_CPUS];
0075 static struct mm_struct **context_mm;
0076 static DEFINE_RAW_SPINLOCK(context_lock);
0077 
0078 #define CTX_MAP_SIZE    \
0079     (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
0080 
0081 
0082 /* Steal a context from a task that has one at the moment.
0083  *
0084  * This is used when we are running out of available PID numbers
0085  * on the processors.
0086  *
0087  * This isn't an LRU system, it just frees up each context in
0088  * turn (sort-of pseudo-random replacement :).  This would be the
0089  * place to implement an LRU scheme if anyone was motivated to do it.
0090  *  -- paulus
0091  *
0092  * For context stealing, we use a slightly different approach for
0093  * SMP and UP. Basically, the UP one is simpler and doesn't use
0094  * the stale map as we can just flush the local CPU
0095  *  -- benh
0096  */
0097 static unsigned int steal_context_smp(unsigned int id)
0098 {
0099     struct mm_struct *mm;
0100     unsigned int cpu, max, i;
0101 
0102     max = LAST_CONTEXT - FIRST_CONTEXT;
0103 
0104     /* Attempt to free next_context first and then loop until we manage */
0105     while (max--) {
0106         /* Pick up the victim mm */
0107         mm = context_mm[id];
0108 
0109         /* We have a candidate victim, check if it's active, on SMP
0110          * we cannot steal active contexts
0111          */
0112         if (mm->context.active) {
0113             id++;
0114             if (id > LAST_CONTEXT)
0115                 id = FIRST_CONTEXT;
0116             continue;
0117         }
0118 
0119         /* Mark this mm has having no context anymore */
0120         mm->context.id = MMU_NO_CONTEXT;
0121 
0122         /* Mark it stale on all CPUs that used this mm. For threaded
0123          * implementations, we set it on all threads on each core
0124          * represented in the mask. A future implementation will use
0125          * a core map instead but this will do for now.
0126          */
0127         for_each_cpu(cpu, mm_cpumask(mm)) {
0128             for (i = cpu_first_thread_sibling(cpu);
0129                  i <= cpu_last_thread_sibling(cpu); i++) {
0130                 if (stale_map[i])
0131                     __set_bit(id, stale_map[i]);
0132             }
0133             cpu = i - 1;
0134         }
0135         return id;
0136     }
0137 
0138     /* This will happen if you have more CPUs than available contexts,
0139      * all we can do here is wait a bit and try again
0140      */
0141     raw_spin_unlock(&context_lock);
0142     cpu_relax();
0143     raw_spin_lock(&context_lock);
0144 
0145     /* This will cause the caller to try again */
0146     return MMU_NO_CONTEXT;
0147 }
0148 
0149 static unsigned int steal_all_contexts(void)
0150 {
0151     struct mm_struct *mm;
0152     int cpu = smp_processor_id();
0153     unsigned int id;
0154 
0155     for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
0156         /* Pick up the victim mm */
0157         mm = context_mm[id];
0158 
0159         /* Mark this mm as having no context anymore */
0160         mm->context.id = MMU_NO_CONTEXT;
0161         if (id != FIRST_CONTEXT) {
0162             context_mm[id] = NULL;
0163             __clear_bit(id, context_map);
0164         }
0165         if (IS_ENABLED(CONFIG_SMP))
0166             __clear_bit(id, stale_map[cpu]);
0167     }
0168 
0169     /* Flush the TLB for all contexts (not to be used on SMP) */
0170     _tlbil_all();
0171 
0172     nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
0173 
0174     return FIRST_CONTEXT;
0175 }
0176 
0177 /* Note that this will also be called on SMP if all other CPUs are
0178  * offlined, which means that it may be called for cpu != 0. For
0179  * this to work, we somewhat assume that CPUs that are onlined
0180  * come up with a fully clean TLB (or are cleaned when offlined)
0181  */
0182 static unsigned int steal_context_up(unsigned int id)
0183 {
0184     struct mm_struct *mm;
0185     int cpu = smp_processor_id();
0186 
0187     /* Pick up the victim mm */
0188     mm = context_mm[id];
0189 
0190     /* Flush the TLB for that context */
0191     local_flush_tlb_mm(mm);
0192 
0193     /* Mark this mm has having no context anymore */
0194     mm->context.id = MMU_NO_CONTEXT;
0195 
0196     /* XXX This clear should ultimately be part of local_flush_tlb_mm */
0197     if (IS_ENABLED(CONFIG_SMP))
0198         __clear_bit(id, stale_map[cpu]);
0199 
0200     return id;
0201 }
0202 
0203 static void set_context(unsigned long id, pgd_t *pgd)
0204 {
0205     if (IS_ENABLED(CONFIG_PPC_8xx)) {
0206         s16 offset = (s16)(__pa(swapper_pg_dir));
0207 
0208         /*
0209          * Register M_TWB will contain base address of level 1 table minus the
0210          * lower part of the kernel PGDIR base address, so that all accesses to
0211          * level 1 table are done relative to lower part of kernel PGDIR base
0212          * address.
0213          */
0214         mtspr(SPRN_M_TWB, __pa(pgd) - offset);
0215 
0216         /* Update context */
0217         mtspr(SPRN_M_CASID, id - 1);
0218 
0219         /* sync */
0220         mb();
0221     } else if (kuap_is_disabled()) {
0222         if (IS_ENABLED(CONFIG_40x))
0223             mb();   /* sync */
0224 
0225         mtspr(SPRN_PID, id);
0226         isync();
0227     }
0228 }
0229 
0230 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
0231             struct task_struct *tsk)
0232 {
0233     unsigned int id;
0234     unsigned int i, cpu = smp_processor_id();
0235     unsigned long *map;
0236 
0237     /* No lockless fast path .. yet */
0238     raw_spin_lock(&context_lock);
0239 
0240     if (IS_ENABLED(CONFIG_SMP)) {
0241         /* Mark us active and the previous one not anymore */
0242         next->context.active++;
0243         if (prev) {
0244             WARN_ON(prev->context.active < 1);
0245             prev->context.active--;
0246         }
0247     }
0248 
0249  again:
0250 
0251     /* If we already have a valid assigned context, skip all that */
0252     id = next->context.id;
0253     if (likely(id != MMU_NO_CONTEXT))
0254         goto ctxt_ok;
0255 
0256     /* We really don't have a context, let's try to acquire one */
0257     id = next_context;
0258     if (id > LAST_CONTEXT)
0259         id = FIRST_CONTEXT;
0260     map = context_map;
0261 
0262     /* No more free contexts, let's try to steal one */
0263     if (nr_free_contexts == 0) {
0264         if (num_online_cpus() > 1) {
0265             id = steal_context_smp(id);
0266             if (id == MMU_NO_CONTEXT)
0267                 goto again;
0268             goto stolen;
0269         }
0270         if (IS_ENABLED(CONFIG_PPC_8xx))
0271             id = steal_all_contexts();
0272         else
0273             id = steal_context_up(id);
0274         goto stolen;
0275     }
0276     nr_free_contexts--;
0277 
0278     /* We know there's at least one free context, try to find it */
0279     while (__test_and_set_bit(id, map)) {
0280         id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
0281         if (id > LAST_CONTEXT)
0282             id = FIRST_CONTEXT;
0283     }
0284  stolen:
0285     next_context = id + 1;
0286     context_mm[id] = next;
0287     next->context.id = id;
0288 
0289  ctxt_ok:
0290 
0291     /* If that context got marked stale on this CPU, then flush the
0292      * local TLB for it and unmark it before we use it
0293      */
0294     if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
0295         local_flush_tlb_mm(next);
0296 
0297         /* XXX This clear should ultimately be part of local_flush_tlb_mm */
0298         for (i = cpu_first_thread_sibling(cpu);
0299              i <= cpu_last_thread_sibling(cpu); i++) {
0300             if (stale_map[i])
0301                 __clear_bit(id, stale_map[i]);
0302         }
0303     }
0304 
0305     /* Flick the MMU and release lock */
0306     if (IS_ENABLED(CONFIG_BDI_SWITCH))
0307         abatron_pteptrs[1] = next->pgd;
0308     set_context(id, next->pgd);
0309 #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
0310     tsk->thread.pid = id;
0311 #endif
0312     raw_spin_unlock(&context_lock);
0313 }
0314 
0315 /*
0316  * Set up the context for a new address space.
0317  */
0318 int init_new_context(struct task_struct *t, struct mm_struct *mm)
0319 {
0320     mm->context.id = MMU_NO_CONTEXT;
0321     mm->context.active = 0;
0322     pte_frag_set(&mm->context, NULL);
0323     return 0;
0324 }
0325 
0326 /*
0327  * We're finished using the context for an address space.
0328  */
0329 void destroy_context(struct mm_struct *mm)
0330 {
0331     unsigned long flags;
0332     unsigned int id;
0333 
0334     if (mm->context.id == MMU_NO_CONTEXT)
0335         return;
0336 
0337     WARN_ON(mm->context.active != 0);
0338 
0339     raw_spin_lock_irqsave(&context_lock, flags);
0340     id = mm->context.id;
0341     if (id != MMU_NO_CONTEXT) {
0342         __clear_bit(id, context_map);
0343         mm->context.id = MMU_NO_CONTEXT;
0344         context_mm[id] = NULL;
0345         nr_free_contexts++;
0346     }
0347     raw_spin_unlock_irqrestore(&context_lock, flags);
0348 }
0349 
0350 static int mmu_ctx_cpu_prepare(unsigned int cpu)
0351 {
0352     /* We don't touch CPU 0 map, it's allocated at aboot and kept
0353      * around forever
0354      */
0355     if (cpu == boot_cpuid)
0356         return 0;
0357 
0358     stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
0359     return 0;
0360 }
0361 
0362 static int mmu_ctx_cpu_dead(unsigned int cpu)
0363 {
0364 #ifdef CONFIG_HOTPLUG_CPU
0365     if (cpu == boot_cpuid)
0366         return 0;
0367 
0368     kfree(stale_map[cpu]);
0369     stale_map[cpu] = NULL;
0370 
0371     /* We also clear the cpu_vm_mask bits of CPUs going away */
0372     clear_tasks_mm_cpumask(cpu);
0373 #endif
0374     return 0;
0375 }
0376 
0377 /*
0378  * Initialize the context management stuff.
0379  */
0380 void __init mmu_context_init(void)
0381 {
0382     /* Mark init_mm as being active on all possible CPUs since
0383      * we'll get called with prev == init_mm the first time
0384      * we schedule on a given CPU
0385      */
0386     init_mm.context.active = NR_CPUS;
0387 
0388     /*
0389      * Allocate the maps used by context management
0390      */
0391     context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
0392     if (!context_map)
0393         panic("%s: Failed to allocate %zu bytes\n", __func__,
0394               CTX_MAP_SIZE);
0395     context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
0396                     SMP_CACHE_BYTES);
0397     if (!context_mm)
0398         panic("%s: Failed to allocate %zu bytes\n", __func__,
0399               sizeof(void *) * (LAST_CONTEXT + 1));
0400     if (IS_ENABLED(CONFIG_SMP)) {
0401         stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
0402         if (!stale_map[boot_cpuid])
0403             panic("%s: Failed to allocate %zu bytes\n", __func__,
0404                   CTX_MAP_SIZE);
0405 
0406         cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
0407                       "powerpc/mmu/ctx:prepare",
0408                       mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
0409     }
0410 
0411     printk(KERN_INFO
0412            "MMU: Allocated %zu bytes of context maps for %d contexts\n",
0413            2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
0414            LAST_CONTEXT - FIRST_CONTEXT + 1);
0415 
0416     /*
0417      * Some processors have too few contexts to reserve one for
0418      * init_mm, and require using context 0 for a normal task.
0419      * Other processors reserve the use of context zero for the kernel.
0420      * This code assumes FIRST_CONTEXT < 32.
0421      */
0422     context_map[0] = (1 << FIRST_CONTEXT) - 1;
0423     next_context = FIRST_CONTEXT;
0424     nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
0425 }