Back to home page

LXR

 
 

    


0001 /*
0002  *  This file contains ioremap and related functions for 64-bit machines.
0003  *
0004  *  Derived from arch/ppc64/mm/init.c
0005  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
0006  *
0007  *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
0008  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
0009  *    Copyright (C) 1996 Paul Mackerras
0010  *
0011  *  Derived from "arch/i386/mm/init.c"
0012  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
0013  *
0014  *  Dave Engebretsen <engebret@us.ibm.com>
0015  *      Rework for PPC64 port.
0016  *
0017  *  This program is free software; you can redistribute it and/or
0018  *  modify it under the terms of the GNU General Public License
0019  *  as published by the Free Software Foundation; either version
0020  *  2 of the License, or (at your option) any later version.
0021  *
0022  */
0023 
0024 #include <linux/signal.h>
0025 #include <linux/sched.h>
0026 #include <linux/kernel.h>
0027 #include <linux/errno.h>
0028 #include <linux/string.h>
0029 #include <linux/export.h>
0030 #include <linux/types.h>
0031 #include <linux/mman.h>
0032 #include <linux/mm.h>
0033 #include <linux/swap.h>
0034 #include <linux/stddef.h>
0035 #include <linux/vmalloc.h>
0036 #include <linux/memblock.h>
0037 #include <linux/slab.h>
0038 #include <linux/hugetlb.h>
0039 
0040 #include <asm/pgalloc.h>
0041 #include <asm/page.h>
0042 #include <asm/prom.h>
0043 #include <asm/io.h>
0044 #include <asm/mmu_context.h>
0045 #include <asm/pgtable.h>
0046 #include <asm/mmu.h>
0047 #include <asm/smp.h>
0048 #include <asm/machdep.h>
0049 #include <asm/tlb.h>
0050 #include <asm/processor.h>
0051 #include <asm/cputable.h>
0052 #include <asm/sections.h>
0053 #include <asm/firmware.h>
0054 #include <asm/dma.h>
0055 
0056 #include "mmu_decl.h"
0057 
0058 #ifdef CONFIG_PPC_STD_MMU_64
0059 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
0060 #error TASK_SIZE_USER64 exceeds user VSID range
0061 #endif
0062 #endif
0063 
0064 #ifdef CONFIG_PPC_BOOK3S_64
0065 /*
0066  * partition table and process table for ISA 3.0
0067  */
0068 struct prtb_entry *process_tb;
0069 struct patb_entry *partition_tb;
0070 /*
0071  * page table size
0072  */
0073 unsigned long __pte_index_size;
0074 EXPORT_SYMBOL(__pte_index_size);
0075 unsigned long __pmd_index_size;
0076 EXPORT_SYMBOL(__pmd_index_size);
0077 unsigned long __pud_index_size;
0078 EXPORT_SYMBOL(__pud_index_size);
0079 unsigned long __pgd_index_size;
0080 EXPORT_SYMBOL(__pgd_index_size);
0081 unsigned long __pmd_cache_index;
0082 EXPORT_SYMBOL(__pmd_cache_index);
0083 unsigned long __pte_table_size;
0084 EXPORT_SYMBOL(__pte_table_size);
0085 unsigned long __pmd_table_size;
0086 EXPORT_SYMBOL(__pmd_table_size);
0087 unsigned long __pud_table_size;
0088 EXPORT_SYMBOL(__pud_table_size);
0089 unsigned long __pgd_table_size;
0090 EXPORT_SYMBOL(__pgd_table_size);
0091 unsigned long __pmd_val_bits;
0092 EXPORT_SYMBOL(__pmd_val_bits);
0093 unsigned long __pud_val_bits;
0094 EXPORT_SYMBOL(__pud_val_bits);
0095 unsigned long __pgd_val_bits;
0096 EXPORT_SYMBOL(__pgd_val_bits);
0097 unsigned long __kernel_virt_start;
0098 EXPORT_SYMBOL(__kernel_virt_start);
0099 unsigned long __kernel_virt_size;
0100 EXPORT_SYMBOL(__kernel_virt_size);
0101 unsigned long __vmalloc_start;
0102 EXPORT_SYMBOL(__vmalloc_start);
0103 unsigned long __vmalloc_end;
0104 EXPORT_SYMBOL(__vmalloc_end);
0105 struct page *vmemmap;
0106 EXPORT_SYMBOL(vmemmap);
0107 unsigned long __pte_frag_nr;
0108 EXPORT_SYMBOL(__pte_frag_nr);
0109 unsigned long __pte_frag_size_shift;
0110 EXPORT_SYMBOL(__pte_frag_size_shift);
0111 unsigned long ioremap_bot;
0112 #else /* !CONFIG_PPC_BOOK3S_64 */
0113 unsigned long ioremap_bot = IOREMAP_BASE;
0114 #endif
0115 
0116 /**
0117  * __ioremap_at - Low level function to establish the page tables
0118  *                for an IO mapping
0119  */
0120 void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
0121                 unsigned long flags)
0122 {
0123     unsigned long i;
0124 
0125     /* Make sure we have the base flags */
0126     if ((flags & _PAGE_PRESENT) == 0)
0127         flags |= pgprot_val(PAGE_KERNEL);
0128 
0129     /* We don't support the 4K PFN hack with ioremap */
0130     if (flags & H_PAGE_4K_PFN)
0131         return NULL;
0132 
0133     WARN_ON(pa & ~PAGE_MASK);
0134     WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
0135     WARN_ON(size & ~PAGE_MASK);
0136 
0137     for (i = 0; i < size; i += PAGE_SIZE)
0138         if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
0139             return NULL;
0140 
0141     return (void __iomem *)ea;
0142 }
0143 
0144 /**
0145  * __iounmap_from - Low level function to tear down the page tables
0146  *                  for an IO mapping. This is used for mappings that
0147  *                  are manipulated manually, like partial unmapping of
0148  *                  PCI IOs or ISA space.
0149  */
0150 void __iounmap_at(void *ea, unsigned long size)
0151 {
0152     WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
0153     WARN_ON(size & ~PAGE_MASK);
0154 
0155     unmap_kernel_range((unsigned long)ea, size);
0156 }
0157 
0158 void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
0159                 unsigned long flags, void *caller)
0160 {
0161     phys_addr_t paligned;
0162     void __iomem *ret;
0163 
0164     /*
0165      * Choose an address to map it to.
0166      * Once the imalloc system is running, we use it.
0167      * Before that, we map using addresses going
0168      * up from ioremap_bot.  imalloc will use
0169      * the addresses from ioremap_bot through
0170      * IMALLOC_END
0171      * 
0172      */
0173     paligned = addr & PAGE_MASK;
0174     size = PAGE_ALIGN(addr + size) - paligned;
0175 
0176     if ((size == 0) || (paligned == 0))
0177         return NULL;
0178 
0179     if (slab_is_available()) {
0180         struct vm_struct *area;
0181 
0182         area = __get_vm_area_caller(size, VM_IOREMAP,
0183                         ioremap_bot, IOREMAP_END,
0184                         caller);
0185         if (area == NULL)
0186             return NULL;
0187 
0188         area->phys_addr = paligned;
0189         ret = __ioremap_at(paligned, area->addr, size, flags);
0190         if (!ret)
0191             vunmap(area->addr);
0192     } else {
0193         ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
0194         if (ret)
0195             ioremap_bot += size;
0196     }
0197 
0198     if (ret)
0199         ret += addr & ~PAGE_MASK;
0200     return ret;
0201 }
0202 
0203 void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
0204              unsigned long flags)
0205 {
0206     return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
0207 }
0208 
0209 void __iomem * ioremap(phys_addr_t addr, unsigned long size)
0210 {
0211     unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
0212     void *caller = __builtin_return_address(0);
0213 
0214     if (ppc_md.ioremap)
0215         return ppc_md.ioremap(addr, size, flags, caller);
0216     return __ioremap_caller(addr, size, flags, caller);
0217 }
0218 
0219 void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
0220 {
0221     unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
0222     void *caller = __builtin_return_address(0);
0223 
0224     if (ppc_md.ioremap)
0225         return ppc_md.ioremap(addr, size, flags, caller);
0226     return __ioremap_caller(addr, size, flags, caller);
0227 }
0228 
0229 void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
0230                  unsigned long flags)
0231 {
0232     void *caller = __builtin_return_address(0);
0233 
0234     /* writeable implies dirty for kernel addresses */
0235     if (flags & _PAGE_WRITE)
0236         flags |= _PAGE_DIRTY;
0237 
0238     /* we don't want to let _PAGE_EXEC leak out */
0239     flags &= ~_PAGE_EXEC;
0240     /*
0241      * Force kernel mapping.
0242      */
0243 #if defined(CONFIG_PPC_BOOK3S_64)
0244     flags |= _PAGE_PRIVILEGED;
0245 #else
0246     flags &= ~_PAGE_USER;
0247 #endif
0248 
0249 
0250 #ifdef _PAGE_BAP_SR
0251     /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
0252      * which means that we just cleared supervisor access... oops ;-) This
0253      * restores it
0254      */
0255     flags |= _PAGE_BAP_SR;
0256 #endif
0257 
0258     if (ppc_md.ioremap)
0259         return ppc_md.ioremap(addr, size, flags, caller);
0260     return __ioremap_caller(addr, size, flags, caller);
0261 }
0262 
0263 
0264 /*  
0265  * Unmap an IO region and remove it from imalloc'd list.
0266  * Access to IO memory should be serialized by driver.
0267  */
0268 void __iounmap(volatile void __iomem *token)
0269 {
0270     void *addr;
0271 
0272     if (!slab_is_available())
0273         return;
0274     
0275     addr = (void *) ((unsigned long __force)
0276              PCI_FIX_ADDR(token) & PAGE_MASK);
0277     if ((unsigned long)addr < ioremap_bot) {
0278         printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
0279                " at 0x%p\n", addr);
0280         return;
0281     }
0282     vunmap(addr);
0283 }
0284 
0285 void iounmap(volatile void __iomem *token)
0286 {
0287     if (ppc_md.iounmap)
0288         ppc_md.iounmap(token);
0289     else
0290         __iounmap(token);
0291 }
0292 
0293 EXPORT_SYMBOL(ioremap);
0294 EXPORT_SYMBOL(ioremap_wc);
0295 EXPORT_SYMBOL(ioremap_prot);
0296 EXPORT_SYMBOL(__ioremap);
0297 EXPORT_SYMBOL(__ioremap_at);
0298 EXPORT_SYMBOL(iounmap);
0299 EXPORT_SYMBOL(__iounmap);
0300 EXPORT_SYMBOL(__iounmap_at);
0301 
0302 #ifndef __PAGETABLE_PUD_FOLDED
0303 /* 4 level page table */
0304 struct page *pgd_page(pgd_t pgd)
0305 {
0306     if (pgd_huge(pgd))
0307         return pte_page(pgd_pte(pgd));
0308     return virt_to_page(pgd_page_vaddr(pgd));
0309 }
0310 #endif
0311 
0312 struct page *pud_page(pud_t pud)
0313 {
0314     if (pud_huge(pud))
0315         return pte_page(pud_pte(pud));
0316     return virt_to_page(pud_page_vaddr(pud));
0317 }
0318 
0319 /*
0320  * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
0321  * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
0322  */
0323 struct page *pmd_page(pmd_t pmd)
0324 {
0325     if (pmd_trans_huge(pmd) || pmd_huge(pmd))
0326         return pte_page(pmd_pte(pmd));
0327     return virt_to_page(pmd_page_vaddr(pmd));
0328 }
0329 
0330 #ifdef CONFIG_PPC_64K_PAGES
0331 static pte_t *get_from_cache(struct mm_struct *mm)
0332 {
0333     void *pte_frag, *ret;
0334 
0335     spin_lock(&mm->page_table_lock);
0336     ret = mm->context.pte_frag;
0337     if (ret) {
0338         pte_frag = ret + PTE_FRAG_SIZE;
0339         /*
0340          * If we have taken up all the fragments mark PTE page NULL
0341          */
0342         if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
0343             pte_frag = NULL;
0344         mm->context.pte_frag = pte_frag;
0345     }
0346     spin_unlock(&mm->page_table_lock);
0347     return (pte_t *)ret;
0348 }
0349 
0350 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
0351 {
0352     void *ret = NULL;
0353     struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
0354     if (!page)
0355         return NULL;
0356     if (!kernel && !pgtable_page_ctor(page)) {
0357         __free_page(page);
0358         return NULL;
0359     }
0360 
0361     ret = page_address(page);
0362     spin_lock(&mm->page_table_lock);
0363     /*
0364      * If we find pgtable_page set, we return
0365      * the allocated page with single fragement
0366      * count.
0367      */
0368     if (likely(!mm->context.pte_frag)) {
0369         set_page_count(page, PTE_FRAG_NR);
0370         mm->context.pte_frag = ret + PTE_FRAG_SIZE;
0371     }
0372     spin_unlock(&mm->page_table_lock);
0373 
0374     return (pte_t *)ret;
0375 }
0376 
0377 pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
0378 {
0379     pte_t *pte;
0380 
0381     pte = get_from_cache(mm);
0382     if (pte)
0383         return pte;
0384 
0385     return __alloc_for_cache(mm, kernel);
0386 }
0387 #endif /* CONFIG_PPC_64K_PAGES */
0388 
0389 void pte_fragment_free(unsigned long *table, int kernel)
0390 {
0391     struct page *page = virt_to_page(table);
0392     if (put_page_testzero(page)) {
0393         if (!kernel)
0394             pgtable_page_dtor(page);
0395         free_hot_cold_page(page, 0);
0396     }
0397 }
0398 
0399 #ifdef CONFIG_SMP
0400 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
0401 {
0402     unsigned long pgf = (unsigned long)table;
0403 
0404     BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
0405     pgf |= shift;
0406     tlb_remove_table(tlb, (void *)pgf);
0407 }
0408 
0409 void __tlb_remove_table(void *_table)
0410 {
0411     void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
0412     unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
0413 
0414     if (!shift)
0415         /* PTE page needs special handling */
0416         pte_fragment_free(table, 0);
0417     else {
0418         BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
0419         kmem_cache_free(PGT_CACHE(shift), table);
0420     }
0421 }
0422 #else
0423 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
0424 {
0425     if (!shift) {
0426         /* PTE page needs special handling */
0427         pte_fragment_free(table, 0);
0428     } else {
0429         BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
0430         kmem_cache_free(PGT_CACHE(shift), table);
0431     }
0432 }
0433 #endif
0434 
0435 #ifdef CONFIG_PPC_BOOK3S_64
0436 void __init mmu_partition_table_init(void)
0437 {
0438     unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
0439 
0440     BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
0441     partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
0442                         MEMBLOCK_ALLOC_ANYWHERE));
0443 
0444     /* Initialize the Partition Table with no entries */
0445     memset((void *)partition_tb, 0, patb_size);
0446 
0447     /*
0448      * update partition table control register,
0449      * 64 K size.
0450      */
0451     mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
0452 }
0453 
0454 void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
0455                    unsigned long dw1)
0456 {
0457     partition_tb[lpid].patb0 = cpu_to_be64(dw0);
0458     partition_tb[lpid].patb1 = cpu_to_be64(dw1);
0459 
0460     /* Global flush of TLBs and partition table caches for this lpid */
0461     asm volatile("ptesync" : : : "memory");
0462     asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
0463              "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
0464     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
0465 }
0466 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
0467 #endif /* CONFIG_PPC_BOOK3S_64 */