sparc/mm/init_64.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  arch/sparc64/mm/init.c
0004  *
0005  *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
0006  *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
0007  */
0008
0009 #include <linux/extable.h>
0010 #include <linux/kernel.h>
0011 #include <linux/sched.h>
0012 #include <linux/string.h>
0013 #include <linux/init.h>
0014 #include <linux/memblock.h>
0015 #include <linux/mm.h>
0016 #include <linux/hugetlb.h>
0017 #include <linux/initrd.h>
0018 #include <linux/swap.h>
0019 #include <linux/pagemap.h>
0020 #include <linux/poison.h>
0021 #include <linux/fs.h>
0022 #include <linux/seq_file.h>
0023 #include <linux/kprobes.h>
0024 #include <linux/cache.h>
0025 #include <linux/sort.h>
0026 #include <linux/ioport.h>
0027 #include <linux/percpu.h>
0028 #include <linux/mmzone.h>
0029 #include <linux/gfp.h>
0030 #include <linux/bootmem_info.h>
0031
0032 #include <asm/head.h>
0033 #include <asm/page.h>
0034 #include <asm/pgalloc.h>
0035 #include <asm/oplib.h>
0036 #include <asm/iommu.h>
0037 #include <asm/io.h>
0038 #include <linux/uaccess.h>
0039 #include <asm/mmu_context.h>
0040 #include <asm/tlbflush.h>
0041 #include <asm/dma.h>
0042 #include <asm/starfire.h>
0043 #include <asm/tlb.h>
0044 #include <asm/spitfire.h>
0045 #include <asm/sections.h>
0046 #include <asm/tsb.h>
0047 #include <asm/hypervisor.h>
0048 #include <asm/prom.h>
0049 #include <asm/mdesc.h>
0050 #include <asm/cpudata.h>
0051 #include <asm/setup.h>
0052 #include <asm/irq.h>
0053
0054 #include "init_64.h"
0055
0056 unsigned long kern_linear_pte_xor[4] __read_mostly;
0057 static unsigned long page_cache4v_flag;
0058
0059 /* A bitmap, two bits for every 256MB of physical memory.  These two
0060  * bits determine what page size we use for kernel linear
0061  * translations.  They form an index into kern_linear_pte_xor[].  The
0062  * value in the indexed slot is XOR'd with the TLB miss virtual
0063  * address to form the resulting TTE.  The mapping is:
0064  *
0065  *  0   ==> 4MB
0066  *  1   ==> 256MB
0067  *  2   ==> 2GB
0068  *  3   ==> 16GB
0069  *
0070  * All sun4v chips support 256MB pages.  Only SPARC-T4 and later
0071  * support 2GB pages, and hopefully future cpus will support the 16GB
0072  * pages as well.  For slots 2 and 3, we encode a 256MB TTE xor there
0073  * if these larger page sizes are not supported by the cpu.
0074  *
0075  * It would be nice to determine this from the machine description
0076  * 'cpu' properties, but we need to have this table setup before the
0077  * MDESC is initialized.
0078  */
0079
0080 #ifndef CONFIG_DEBUG_PAGEALLOC
0081 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
0082  * Space is allocated for this right after the trap table in
0083  * arch/sparc64/kernel/head.S
0084  */
0085 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
0086 #endif
0087 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
0088
0089 static unsigned long cpu_pgsz_mask;
0090
0091 #define MAX_BANKS   1024
0092
0093 static struct linux_prom64_registers pavail[MAX_BANKS];
0094 static int pavail_ents;
0095
0096 u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];
0097
0098 static int cmp_p64(const void *a, const void *b)
0099 {
0100     const struct linux_prom64_registers *x = a, *y = b;
0101
0102     if (x->phys_addr > y->phys_addr)
0103         return 1;
0104     if (x->phys_addr < y->phys_addr)
0105         return -1;
0106     return 0;
0107 }
0108
0109 static void __init read_obp_memory(const char *property,
0110                    struct linux_prom64_registers *regs,
0111                    int *num_ents)
0112 {
0113     phandle node = prom_finddevice("/memory");
0114     int prop_size = prom_getproplen(node, property);
0115     int ents, ret, i;
0116
0117     ents = prop_size / sizeof(struct linux_prom64_registers);
0118     if (ents > MAX_BANKS) {
0119         prom_printf("The machine has more %s property entries than "
0120                 "this kernel can support (%d).\n",
0121                 property, MAX_BANKS);
0122         prom_halt();
0123     }
0124
0125     ret = prom_getproperty(node, property, (char *) regs, prop_size);
0126     if (ret == -1) {
0127         prom_printf("Couldn't get %s property from /memory.\n",
0128                 property);
0129         prom_halt();
0130     }
0131
0132     /* Sanitize what we got from the firmware, by page aligning
0133      * everything.
0134      */
0135     for (i = 0; i < ents; i++) {
0136         unsigned long base, size;
0137
0138         base = regs[i].phys_addr;
0139         size = regs[i].reg_size;
0140
0141         size &= PAGE_MASK;
0142         if (base & ~PAGE_MASK) {
0143             unsigned long new_base = PAGE_ALIGN(base);
0144
0145             size -= new_base - base;
0146             if ((long) size < 0L)
0147                 size = 0UL;
0148             base = new_base;
0149         }
0150         if (size == 0UL) {
0151             /* If it is empty, simply get rid of it.
0152              * This simplifies the logic of the other
0153              * functions that process these arrays.
0154              */
0155             memmove(&regs[i], &regs[i + 1],
0156                 (ents - i - 1) * sizeof(regs[0]));
0157             i--;
0158             ents--;
0159             continue;
0160         }
0161         regs[i].phys_addr = base;
0162         regs[i].reg_size = size;
0163     }
0164
0165     *num_ents = ents;
0166
0167     sort(regs, ents, sizeof(struct linux_prom64_registers),
0168          cmp_p64, NULL);
0169 }
0170
0171 /* Kernel physical address base and size in bytes.  */
0172 unsigned long kern_base __read_mostly;
0173 unsigned long kern_size __read_mostly;
0174
0175 /* Initial ramdisk setup */
0176 extern unsigned long sparc_ramdisk_image64;
0177 extern unsigned int sparc_ramdisk_image;
0178 extern unsigned int sparc_ramdisk_size;
0179
0180 struct page *mem_map_zero __read_mostly;
0181 EXPORT_SYMBOL(mem_map_zero);
0182
0183 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
0184
0185 unsigned long sparc64_kern_pri_context __read_mostly;
0186 unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
0187 unsigned long sparc64_kern_sec_context __read_mostly;
0188
0189 int num_kernel_image_mappings;
0190
0191 #ifdef CONFIG_DEBUG_DCFLUSH
0192 atomic_t dcpage_flushes = ATOMIC_INIT(0);
0193 #ifdef CONFIG_SMP
0194 atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
0195 #endif
0196 #endif
0197
0198 inline void flush_dcache_page_impl(struct page *page)
0199 {
0200     BUG_ON(tlb_type == hypervisor);
0201 #ifdef CONFIG_DEBUG_DCFLUSH
0202     atomic_inc(&dcpage_flushes);
0203 #endif
0204
0205 #ifdef DCACHE_ALIASING_POSSIBLE
0206     __flush_dcache_page(page_address(page),
0207                 ((tlb_type == spitfire) &&
0208                  page_mapping_file(page) != NULL));
0209 #else
0210     if (page_mapping_file(page) != NULL &&
0211         tlb_type == spitfire)
0212         __flush_icache_page(__pa(page_address(page)));
0213 #endif
0214 }
0215
0216 #define PG_dcache_dirty     PG_arch_1
0217 #define PG_dcache_cpu_shift 32UL
0218 #define PG_dcache_cpu_mask  \
0219     ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
0220
0221 #define dcache_dirty_cpu(page) \
0222     (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
0223
0224 static inline void set_dcache_dirty(struct page *page, int this_cpu)
0225 {
0226     unsigned long mask = this_cpu;
0227     unsigned long non_cpu_bits;
0228
0229     non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
0230     mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);
0231
0232     __asm__ __volatile__("1:\n\t"
0233                  "ldx   [%2], %%g7\n\t"
0234                  "and   %%g7, %1, %%g1\n\t"
0235                  "or    %%g1, %0, %%g1\n\t"
0236                  "casx  [%2], %%g7, %%g1\n\t"
0237                  "cmp   %%g7, %%g1\n\t"
0238                  "bne,pn    %%xcc, 1b\n\t"
0239                  " nop"
0240                  : /* no outputs */
0241                  : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
0242                  : "g1", "g7");
0243 }
0244
0245 static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
0246 {
0247     unsigned long mask = (1UL << PG_dcache_dirty);
0248
0249     __asm__ __volatile__("! test_and_clear_dcache_dirty\n"
0250                  "1:\n\t"
0251                  "ldx   [%2], %%g7\n\t"
0252                  "srlx  %%g7, %4, %%g1\n\t"
0253                  "and   %%g1, %3, %%g1\n\t"
0254                  "cmp   %%g1, %0\n\t"
0255                  "bne,pn    %%icc, 2f\n\t"
0256                  " andn %%g7, %1, %%g1\n\t"
0257                  "casx  [%2], %%g7, %%g1\n\t"
0258                  "cmp   %%g7, %%g1\n\t"
0259                  "bne,pn    %%xcc, 1b\n\t"
0260                  " nop\n"
0261                  "2:"
0262                  : /* no outputs */
0263                  : "r" (cpu), "r" (mask), "r" (&page->flags),
0264                    "i" (PG_dcache_cpu_mask),
0265                    "i" (PG_dcache_cpu_shift)
0266                  : "g1", "g7");
0267 }
0268
0269 static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
0270 {
0271     unsigned long tsb_addr = (unsigned long) ent;
0272
0273     if (tlb_type == cheetah_plus || tlb_type == hypervisor)
0274         tsb_addr = __pa(tsb_addr);
0275
0276     __tsb_insert(tsb_addr, tag, pte);
0277 }
0278
0279 unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
0280
0281 static void flush_dcache(unsigned long pfn)
0282 {
0283     struct page *page;
0284
0285     page = pfn_to_page(pfn);
0286     if (page) {
0287         unsigned long pg_flags;
0288
0289         pg_flags = page->flags;
0290         if (pg_flags & (1UL << PG_dcache_dirty)) {
0291             int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
0292                    PG_dcache_cpu_mask);
0293             int this_cpu = get_cpu();
0294
0295             /* This is just to optimize away some function calls
0296              * in the SMP case.
0297              */
0298             if (cpu == this_cpu)
0299                 flush_dcache_page_impl(page);
0300             else
0301                 smp_flush_dcache_page_impl(page, cpu);
0302
0303             clear_dcache_dirty_cpu(page, cpu);
0304
0305             put_cpu();
0306         }
0307     }
0308 }
0309
0310 /* mm->context.lock must be held */
0311 static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
0312                     unsigned long tsb_hash_shift, unsigned long address,
0313                     unsigned long tte)
0314 {
0315     struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
0316     unsigned long tag;
0317
0318     if (unlikely(!tsb))
0319         return;
0320
0321     tsb += ((address >> tsb_hash_shift) &
0322         (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
0323     tag = (address >> 22UL);
0324     tsb_insert(tsb, tag, tte);
0325 }
0326
0327 #ifdef CONFIG_HUGETLB_PAGE
0328 static int __init hugetlbpage_init(void)
0329 {
0330     hugetlb_add_hstate(HPAGE_64K_SHIFT - PAGE_SHIFT);
0331     hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
0332     hugetlb_add_hstate(HPAGE_256MB_SHIFT - PAGE_SHIFT);
0333     hugetlb_add_hstate(HPAGE_2GB_SHIFT - PAGE_SHIFT);
0334
0335     return 0;
0336 }
0337
0338 arch_initcall(hugetlbpage_init);
0339
0340 static void __init pud_huge_patch(void)
0341 {
0342     struct pud_huge_patch_entry *p;
0343     unsigned long addr;
0344
0345     p = &__pud_huge_patch;
0346     addr = p->addr;
0347     *(unsigned int *)addr = p->insn;
0348
0349     __asm__ __volatile__("flush %0" : : "r" (addr));
0350 }
0351
0352 bool __init arch_hugetlb_valid_size(unsigned long size)
0353 {
0354     unsigned int hugepage_shift = ilog2(size);
0355     unsigned short hv_pgsz_idx;
0356     unsigned int hv_pgsz_mask;
0357
0358     switch (hugepage_shift) {
0359     case HPAGE_16GB_SHIFT:
0360         hv_pgsz_mask = HV_PGSZ_MASK_16GB;
0361         hv_pgsz_idx = HV_PGSZ_IDX_16GB;
0362         pud_huge_patch();
0363         break;
0364     case HPAGE_2GB_SHIFT:
0365         hv_pgsz_mask = HV_PGSZ_MASK_2GB;
0366         hv_pgsz_idx = HV_PGSZ_IDX_2GB;
0367         break;
0368     case HPAGE_256MB_SHIFT:
0369         hv_pgsz_mask = HV_PGSZ_MASK_256MB;
0370         hv_pgsz_idx = HV_PGSZ_IDX_256MB;
0371         break;
0372     case HPAGE_SHIFT:
0373         hv_pgsz_mask = HV_PGSZ_MASK_4MB;
0374         hv_pgsz_idx = HV_PGSZ_IDX_4MB;
0375         break;
0376     case HPAGE_64K_SHIFT:
0377         hv_pgsz_mask = HV_PGSZ_MASK_64K;
0378         hv_pgsz_idx = HV_PGSZ_IDX_64K;
0379         break;
0380     default:
0381         hv_pgsz_mask = 0;
0382     }
0383
0384     if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U)
0385         return false;
0386
0387     return true;
0388 }
0389 #endif  /* CONFIG_HUGETLB_PAGE */
0390
0391 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
0392 {
0393     struct mm_struct *mm;
0394     unsigned long flags;
0395     bool is_huge_tsb;
0396     pte_t pte = *ptep;
0397
0398     if (tlb_type != hypervisor) {
0399         unsigned long pfn = pte_pfn(pte);
0400
0401         if (pfn_valid(pfn))
0402             flush_dcache(pfn);
0403     }
0404
0405     mm = vma->vm_mm;
0406
0407     /* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
0408     if (!pte_accessible(mm, pte))
0409         return;
0410
0411     spin_lock_irqsave(&mm->context.lock, flags);
0412
0413     is_huge_tsb = false;
0414 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
0415     if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
0416         unsigned long hugepage_size = PAGE_SIZE;
0417
0418         if (is_vm_hugetlb_page(vma))
0419             hugepage_size = huge_page_size(hstate_vma(vma));
0420
0421         if (hugepage_size >= PUD_SIZE) {
0422             unsigned long mask = 0x1ffc00000UL;
0423
0424             /* Transfer bits [32:22] from address to resolve
0425              * at 4M granularity.
0426              */
0427             pte_val(pte) &= ~mask;
0428             pte_val(pte) |= (address & mask);
0429         } else if (hugepage_size >= PMD_SIZE) {
0430             /* We are fabricating 8MB pages using 4MB
0431              * real hw pages.
0432              */
0433             pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
0434         }
0435
0436         if (hugepage_size >= PMD_SIZE) {
0437             __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
0438                 REAL_HPAGE_SHIFT, address, pte_val(pte));
0439             is_huge_tsb = true;
0440         }
0441     }
0442 #endif
0443     if (!is_huge_tsb)
0444         __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
0445                     address, pte_val(pte));
0446
0447     spin_unlock_irqrestore(&mm->context.lock, flags);
0448 }
0449
0450 void flush_dcache_page(struct page *page)
0451 {
0452     struct address_space *mapping;
0453     int this_cpu;
0454
0455     if (tlb_type == hypervisor)
0456         return;
0457
0458     /* Do not bother with the expensive D-cache flush if it
0459      * is merely the zero page.  The 'bigcore' testcase in GDB
0460      * causes this case to run millions of times.
0461      */
0462     if (page == ZERO_PAGE(0))
0463         return;
0464
0465     this_cpu = get_cpu();
0466
0467     mapping = page_mapping_file(page);
0468     if (mapping && !mapping_mapped(mapping)) {
0469         int dirty = test_bit(PG_dcache_dirty, &page->flags);
0470         if (dirty) {
0471             int dirty_cpu = dcache_dirty_cpu(page);
0472
0473             if (dirty_cpu == this_cpu)
0474                 goto out;
0475             smp_flush_dcache_page_impl(page, dirty_cpu);
0476         }
0477         set_dcache_dirty(page, this_cpu);
0478     } else {
0479         /* We could delay the flush for the !page_mapping
0480          * case too.  But that case is for exec env/arg
0481          * pages and those are %99 certainly going to get
0482          * faulted into the tlb (and thus flushed) anyways.
0483          */
0484         flush_dcache_page_impl(page);
0485     }
0486
0487 out:
0488     put_cpu();
0489 }
0490 EXPORT_SYMBOL(flush_dcache_page);
0491
0492 void __kprobes flush_icache_range(unsigned long start, unsigned long end)
0493 {
0494     /* Cheetah and Hypervisor platform cpus have coherent I-cache. */
0495     if (tlb_type == spitfire) {
0496         unsigned long kaddr;
0497
0498         /* This code only runs on Spitfire cpus so this is
0499          * why we can assume _PAGE_PADDR_4U.
0500          */
0501         for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
0502             unsigned long paddr, mask = _PAGE_PADDR_4U;
0503
0504             if (kaddr >= PAGE_OFFSET)
0505                 paddr = kaddr & mask;
0506             else {
0507                 pte_t *ptep = virt_to_kpte(kaddr);
0508
0509                 paddr = pte_val(*ptep) & mask;
0510             }
0511             __flush_icache_page(paddr);
0512         }
0513     }
0514 }
0515 EXPORT_SYMBOL(flush_icache_range);
0516
0517 void mmu_info(struct seq_file *m)
0518 {
0519     static const char *pgsz_strings[] = {
0520         "8K", "64K", "512K", "4MB", "32MB",
0521         "256MB", "2GB", "16GB",
0522     };
0523     int i, printed;
0524
0525     if (tlb_type == cheetah)
0526         seq_printf(m, "MMU Type\t: Cheetah\n");
0527     else if (tlb_type == cheetah_plus)
0528         seq_printf(m, "MMU Type\t: Cheetah+\n");
0529     else if (tlb_type == spitfire)
0530         seq_printf(m, "MMU Type\t: Spitfire\n");
0531     else if (tlb_type == hypervisor)
0532         seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
0533     else
0534         seq_printf(m, "MMU Type\t: ???\n");
0535
0536     seq_printf(m, "MMU PGSZs\t: ");
0537     printed = 0;
0538     for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
0539         if (cpu_pgsz_mask & (1UL << i)) {
0540             seq_printf(m, "%s%s",
0541                    printed ? "," : "", pgsz_strings[i]);
0542             printed++;
0543         }
0544     }
0545     seq_putc(m, '\n');
0546
0547 #ifdef CONFIG_DEBUG_DCFLUSH
0548     seq_printf(m, "DCPageFlushes\t: %d\n",
0549            atomic_read(&dcpage_flushes));
0550 #ifdef CONFIG_SMP
0551     seq_printf(m, "DCPageFlushesXC\t: %d\n",
0552            atomic_read(&dcpage_flushes_xcall));
0553 #endif /* CONFIG_SMP */
0554 #endif /* CONFIG_DEBUG_DCFLUSH */
0555 }
0556
0557 struct linux_prom_translation prom_trans[512] __read_mostly;
0558 unsigned int prom_trans_ents __read_mostly;
0559
0560 unsigned long kern_locked_tte_data;
0561
0562 /* The obp translations are saved based on 8k pagesize, since obp can
0563  * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
0564  * HI_OBP_ADDRESS range are handled in ktlb.S.
0565  */
0566 static inline int in_obp_range(unsigned long vaddr)
0567 {
0568     return (vaddr >= LOW_OBP_ADDRESS &&
0569         vaddr < HI_OBP_ADDRESS);
0570 }
0571
0572 static int cmp_ptrans(const void *a, const void *b)
0573 {
0574     const struct linux_prom_translation *x = a, *y = b;
0575
0576     if (x->virt > y->virt)
0577         return 1;
0578     if (x->virt < y->virt)
0579         return -1;
0580     return 0;
0581 }
0582
0583 /* Read OBP translations property into 'prom_trans[]'.  */
0584 static void __init read_obp_translations(void)
0585 {
0586     int n, node, ents, first, last, i;
0587
0588     node = prom_finddevice("/virtual-memory");
0589     n = prom_getproplen(node, "translations");
0590     if (unlikely(n == 0 || n == -1)) {
0591         prom_printf("prom_mappings: Couldn't get size.\n");
0592         prom_halt();
0593     }
0594     if (unlikely(n > sizeof(prom_trans))) {
0595         prom_printf("prom_mappings: Size %d is too big.\n", n);
0596         prom_halt();
0597     }
0598
0599     if ((n = prom_getproperty(node, "translations",
0600                   (char *)&prom_trans[0],
0601                   sizeof(prom_trans))) == -1) {
0602         prom_printf("prom_mappings: Couldn't get property.\n");
0603         prom_halt();
0604     }
0605
0606     n = n / sizeof(struct linux_prom_translation);
0607
0608     ents = n;
0609
0610     sort(prom_trans, ents, sizeof(struct linux_prom_translation),
0611          cmp_ptrans, NULL);
0612
0613     /* Now kick out all the non-OBP entries.  */
0614     for (i = 0; i < ents; i++) {
0615         if (in_obp_range(prom_trans[i].virt))
0616             break;
0617     }
0618     first = i;
0619     for (; i < ents; i++) {
0620         if (!in_obp_range(prom_trans[i].virt))
0621             break;
0622     }
0623     last = i;
0624
0625     for (i = 0; i < (last - first); i++) {
0626         struct linux_prom_translation *src = &prom_trans[i + first];
0627         struct linux_prom_translation *dest = &prom_trans[i];
0628
0629         *dest = *src;
0630     }
0631     for (; i < ents; i++) {
0632         struct linux_prom_translation *dest = &prom_trans[i];
0633         dest->virt = dest->size = dest->data = 0x0UL;
0634     }
0635
0636     prom_trans_ents = last - first;
0637
0638     if (tlb_type == spitfire) {
0639         /* Clear diag TTE bits. */
0640         for (i = 0; i < prom_trans_ents; i++)
0641             prom_trans[i].data &= ~0x0003fe0000000000UL;
0642     }
0643
0644     /* Force execute bit on.  */
0645     for (i = 0; i < prom_trans_ents; i++)
0646         prom_trans[i].data |= (tlb_type == hypervisor ?
0647                        _PAGE_EXEC_4V : _PAGE_EXEC_4U);
0648 }
0649
0650 static void __init hypervisor_tlb_lock(unsigned long vaddr,
0651                        unsigned long pte,
0652                        unsigned long mmu)
0653 {
0654     unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);
0655
0656     if (ret != 0) {
0657         prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
0658                 "errors with %lx\n", vaddr, 0, pte, mmu, ret);
0659         prom_halt();
0660     }
0661 }
0662
0663 static unsigned long kern_large_tte(unsigned long paddr);
0664
0665 static void __init remap_kernel(void)
0666 {
0667     unsigned long phys_page, tte_vaddr, tte_data;
0668     int i, tlb_ent = sparc64_highest_locked_tlbent();
0669
0670     tte_vaddr = (unsigned long) KERNBASE;
0671     phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
0672     tte_data = kern_large_tte(phys_page);
0673
0674     kern_locked_tte_data = tte_data;
0675
0676     /* Now lock us into the TLBs via Hypervisor or OBP. */
0677     if (tlb_type == hypervisor) {
0678         for (i = 0; i < num_kernel_image_mappings; i++) {
0679             hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
0680             hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
0681             tte_vaddr += 0x400000;
0682             tte_data += 0x400000;
0683         }
0684     } else {
0685         for (i = 0; i < num_kernel_image_mappings; i++) {
0686             prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
0687             prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
0688             tte_vaddr += 0x400000;
0689             tte_data += 0x400000;
0690         }
0691         sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
0692     }
0693     if (tlb_type == cheetah_plus) {
0694         sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
0695                         CTX_CHEETAH_PLUS_NUC);
0696         sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
0697         sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
0698     }
0699 }
0700
0701
0702 static void __init inherit_prom_mappings(void)
0703 {
0704     /* Now fixup OBP's idea about where we really are mapped. */
0705     printk("Remapping the kernel... ");
0706     remap_kernel();
0707     printk("done.\n");
0708 }
0709
0710 void prom_world(int enter)
0711 {
0712     /*
0713      * No need to change the address space any more, just flush
0714      * the register windows
0715      */
0716     __asm__ __volatile__("flushw");
0717 }
0718
0719 void __flush_dcache_range(unsigned long start, unsigned long end)
0720 {
0721     unsigned long va;
0722
0723     if (tlb_type == spitfire) {
0724         int n = 0;
0725
0726         for (va = start; va < end; va += 32) {
0727             spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
0728             if (++n >= 512)
0729                 break;
0730         }
0731     } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
0732         start = __pa(start);
0733         end = __pa(end);
0734         for (va = start; va < end; va += 32)
0735             __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
0736                          "membar #Sync"
0737                          : /* no outputs */
0738                          : "r" (va),
0739                            "i" (ASI_DCACHE_INVALIDATE));
0740     }
0741 }
0742 EXPORT_SYMBOL(__flush_dcache_range);
0743
0744 /* get_new_mmu_context() uses "cache + 1".  */
0745 DEFINE_SPINLOCK(ctx_alloc_lock);
0746 unsigned long tlb_context_cache = CTX_FIRST_VERSION;
0747 #define MAX_CTX_NR  (1UL << CTX_NR_BITS)
0748 #define CTX_BMAP_SLOTS  BITS_TO_LONGS(MAX_CTX_NR)
0749 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
0750 DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
0751
0752 static void mmu_context_wrap(void)
0753 {
0754     unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
0755     unsigned long new_ver, new_ctx, old_ctx;
0756     struct mm_struct *mm;
0757     int cpu;
0758
0759     bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
0760
0761     /* Reserve kernel context */
0762     set_bit(0, mmu_context_bmap);
0763
0764     new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
0765     if (unlikely(new_ver == 0))
0766         new_ver = CTX_FIRST_VERSION;
0767     tlb_context_cache = new_ver;
0768
0769     /*
0770      * Make sure that any new mm that are added into per_cpu_secondary_mm,
0771      * are going to go through get_new_mmu_context() path.
0772      */
0773     mb();
0774
0775     /*
0776      * Updated versions to current on those CPUs that had valid secondary
0777      * contexts
0778      */
0779     for_each_online_cpu(cpu) {
0780         /*
0781          * If a new mm is stored after we took this mm from the array,
0782          * it will go into get_new_mmu_context() path, because we
0783          * already bumped the version in tlb_context_cache.
0784          */
0785         mm = per_cpu(per_cpu_secondary_mm, cpu);
0786
0787         if (unlikely(!mm || mm == &init_mm))
0788             continue;
0789
0790         old_ctx = mm->context.sparc64_ctx_val;
0791         if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
0792             new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
0793             set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
0794             mm->context.sparc64_ctx_val = new_ctx;
0795         }
0796     }
0797 }
0798
0799 /* Caller does TLB context flushing on local CPU if necessary.
0800  * The caller also ensures that CTX_VALID(mm->context) is false.
0801  *
0802  * We must be careful about boundary cases so that we never
0803  * let the user have CTX 0 (nucleus) or we ever use a CTX
0804  * version of zero (and thus NO_CONTEXT would not be caught
0805  * by version mis-match tests in mmu_context.h).
0806  *
0807  * Always invoked with interrupts disabled.
0808  */
0809 void get_new_mmu_context(struct mm_struct *mm)
0810 {
0811     unsigned long ctx, new_ctx;
0812     unsigned long orig_pgsz_bits;
0813
0814     spin_lock(&ctx_alloc_lock);
0815 retry:
0816     /* wrap might have happened, test again if our context became valid */
0817     if (unlikely(CTX_VALID(mm->context)))
0818         goto out;
0819     orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
0820     ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
0821     new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
0822     if (new_ctx >= (1 << CTX_NR_BITS)) {
0823         new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
0824         if (new_ctx >= ctx) {
0825             mmu_context_wrap();
0826             goto retry;
0827         }
0828     }
0829     if (mm->context.sparc64_ctx_val)
0830         cpumask_clear(mm_cpumask(mm));
0831     mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
0832     new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
0833     tlb_context_cache = new_ctx;
0834     mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
0835 out:
0836     spin_unlock(&ctx_alloc_lock);
0837 }
0838
0839 static int numa_enabled = 1;
0840 static int numa_debug;
0841
0842 static int __init early_numa(char *p)
0843 {
0844     if (!p)
0845         return 0;
0846
0847     if (strstr(p, "off"))
0848         numa_enabled = 0;
0849
0850     if (strstr(p, "debug"))
0851         numa_debug = 1;
0852
0853     return 0;
0854 }
0855 early_param("numa", early_numa);
0856
0857 #define numadbg(f, a...) \
0858 do {    if (numa_debug) \
0859         printk(KERN_INFO f, ## a); \
0860 } while (0)
0861
0862 static void __init find_ramdisk(unsigned long phys_base)
0863 {
0864 #ifdef CONFIG_BLK_DEV_INITRD
0865     if (sparc_ramdisk_image || sparc_ramdisk_image64) {
0866         unsigned long ramdisk_image;
0867
0868         /* Older versions of the bootloader only supported a
0869          * 32-bit physical address for the ramdisk image
0870          * location, stored at sparc_ramdisk_image.  Newer
0871          * SILO versions set sparc_ramdisk_image to zero and
0872          * provide a full 64-bit physical address at
0873          * sparc_ramdisk_image64.
0874          */
0875         ramdisk_image = sparc_ramdisk_image;
0876         if (!ramdisk_image)
0877             ramdisk_image = sparc_ramdisk_image64;
0878
0879         /* Another bootloader quirk.  The bootloader normalizes
0880          * the physical address to KERNBASE, so we have to
0881          * factor that back out and add in the lowest valid
0882          * physical page address to get the true physical address.
0883          */
0884         ramdisk_image -= KERNBASE;
0885         ramdisk_image += phys_base;
0886
0887         numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
0888             ramdisk_image, sparc_ramdisk_size);
0889
0890         initrd_start = ramdisk_image;
0891         initrd_end = ramdisk_image + sparc_ramdisk_size;
0892
0893         memblock_reserve(initrd_start, sparc_ramdisk_size);
0894
0895         initrd_start += PAGE_OFFSET;
0896         initrd_end += PAGE_OFFSET;
0897     }
0898 #endif
0899 }
0900
0901 struct node_mem_mask {
0902     unsigned long mask;
0903     unsigned long match;
0904 };
0905 static struct node_mem_mask node_masks[MAX_NUMNODES];
0906 static int num_node_masks;
0907
0908 #ifdef CONFIG_NUMA
0909
0910 struct mdesc_mlgroup {
0911     u64 node;
0912     u64 latency;
0913     u64 match;
0914     u64 mask;
0915 };
0916
0917 static struct mdesc_mlgroup *mlgroups;
0918 static int num_mlgroups;
0919
0920 int numa_cpu_lookup_table[NR_CPUS];
0921 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
0922
0923 struct mdesc_mblock {
0924     u64 base;
0925     u64 size;
0926     u64 offset; /* RA-to-PA */
0927 };
0928 static struct mdesc_mblock *mblocks;
0929 static int num_mblocks;
0930
0931 static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
0932 {
0933     struct mdesc_mblock *m = NULL;
0934     int i;
0935
0936     for (i = 0; i < num_mblocks; i++) {
0937         m = &mblocks[i];
0938
0939         if (addr >= m->base &&
0940             addr < (m->base + m->size)) {
0941             break;
0942         }
0943     }
0944
0945     return m;
0946 }
0947
0948 static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
0949 {
0950     int prev_nid, new_nid;
0951
0952     prev_nid = NUMA_NO_NODE;
0953     for ( ; start < end; start += PAGE_SIZE) {
0954         for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
0955             struct node_mem_mask *p = &node_masks[new_nid];
0956
0957             if ((start & p->mask) == p->match) {
0958                 if (prev_nid == NUMA_NO_NODE)
0959                     prev_nid = new_nid;
0960                 break;
0961             }
0962         }
0963
0964         if (new_nid == num_node_masks) {
0965             prev_nid = 0;
0966             WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
0967                   start);
0968             break;
0969         }
0970
0971         if (prev_nid != new_nid)
0972             break;
0973     }
0974     *nid = prev_nid;
0975
0976     return start > end ? end : start;
0977 }
0978
0979 static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
0980 {
0981     u64 ret_end, pa_start, m_mask, m_match, m_end;
0982     struct mdesc_mblock *mblock;
0983     int _nid, i;
0984
0985     if (tlb_type != hypervisor)
0986         return memblock_nid_range_sun4u(start, end, nid);
0987
0988     mblock = addr_to_mblock(start);
0989     if (!mblock) {
0990         WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
0991               start);
0992
0993         _nid = 0;
0994         ret_end = end;
0995         goto done;
0996     }
0997
0998     pa_start = start + mblock->offset;
0999     m_match = 0;
1000     m_mask = 0;
1001
1002     for (_nid = 0; _nid < num_node_masks; _nid++) {
1003         struct node_mem_mask *const m = &node_masks[_nid];
1004
1005         if ((pa_start & m->mask) == m->match) {
1006             m_match = m->match;
1007             m_mask = m->mask;
1008             break;
1009         }
1010     }
1011
1012     if (num_node_masks == _nid) {
1013         /* We could not find NUMA group, so default to 0, but lets
1014          * search for latency group, so we could calculate the correct
1015          * end address that we return
1016          */
1017         _nid = 0;
1018
1019         for (i = 0; i < num_mlgroups; i++) {
1020             struct mdesc_mlgroup *const m = &mlgroups[i];
1021
1022             if ((pa_start & m->mask) == m->match) {
1023                 m_match = m->match;
1024                 m_mask = m->mask;
1025                 break;
1026             }
1027         }
1028
1029         if (i == num_mlgroups) {
1030             WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
1031                   start);
1032
1033             ret_end = end;
1034             goto done;
1035         }
1036     }
1037
1038     /*
1039      * Each latency group has match and mask, and each memory block has an
1040      * offset.  An address belongs to a latency group if its address matches
1041      * the following formula: ((addr + offset) & mask) == match
1042      * It is, however, slow to check every single page if it matches a
1043      * particular latency group. As optimization we calculate end value by
1044      * using bit arithmetics.
1045      */
1046     m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
1047     m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
1048     ret_end = m_end > end ? end : m_end;
1049
1050 done:
1051     *nid = _nid;
1052     return ret_end;
1053 }
1054 #endif
1055
1056 /* This must be invoked after performing all of the necessary
1057  * memblock_set_node() calls for 'nid'.  We need to be able to get
1058  * correct data from get_pfn_range_for_nid().
1059  */
1060 static void __init allocate_node_data(int nid)
1061 {
1062     struct pglist_data *p;
1063     unsigned long start_pfn, end_pfn;
1064 #ifdef CONFIG_NUMA
1065
1066     NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data),
1067                          SMP_CACHE_BYTES, nid);
1068     if (!NODE_DATA(nid)) {
1069         prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
1070         prom_halt();
1071     }
1072
1073     NODE_DATA(nid)->node_id = nid;
1074 #endif
1075
1076     p = NODE_DATA(nid);
1077
1078     get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
1079     p->node_start_pfn = start_pfn;
1080     p->node_spanned_pages = end_pfn - start_pfn;
1081 }
1082
1083 static void init_node_masks_nonnuma(void)
1084 {
1085 #ifdef CONFIG_NUMA
1086     int i;
1087 #endif
1088
1089     numadbg("Initializing tables for non-numa.\n");
1090
1091     node_masks[0].mask = 0;
1092     node_masks[0].match = 0;
1093     num_node_masks = 1;
1094
1095 #ifdef CONFIG_NUMA
1096     for (i = 0; i < NR_CPUS; i++)
1097         numa_cpu_lookup_table[i] = 0;
1098
1099     cpumask_setall(&numa_cpumask_lookup_table[0]);
1100 #endif
1101 }
1102
1103 #ifdef CONFIG_NUMA
1104 struct pglist_data *node_data[MAX_NUMNODES];
1105
1106 EXPORT_SYMBOL(numa_cpu_lookup_table);
1107 EXPORT_SYMBOL(numa_cpumask_lookup_table);
1108 EXPORT_SYMBOL(node_data);
1109
1110 static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
1111                    u32 cfg_handle)
1112 {
1113     u64 arc;
1114
1115     mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
1116         u64 target = mdesc_arc_target(md, arc);
1117         const u64 *val;
1118
1119         val = mdesc_get_property(md, target,
1120                      "cfg-handle", NULL);
1121         if (val && *val == cfg_handle)
1122             return 0;
1123     }
1124     return -ENODEV;
1125 }
1126
1127 static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
1128                     u32 cfg_handle)
1129 {
1130     u64 arc, candidate, best_latency = ~(u64)0;
1131
1132     candidate = MDESC_NODE_NULL;
1133     mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
1134         u64 target = mdesc_arc_target(md, arc);
1135         const char *name = mdesc_node_name(md, target);
1136         const u64 *val;
1137
1138         if (strcmp(name, "pio-latency-group"))
1139             continue;
1140
1141         val = mdesc_get_property(md, target, "latency", NULL);
1142         if (!val)
1143             continue;
1144
1145         if (*val < best_latency) {
1146             candidate = target;
1147             best_latency = *val;
1148         }
1149     }
1150
1151     if (candidate == MDESC_NODE_NULL)
1152         return -ENODEV;
1153
1154     return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
1155 }
1156
1157 int of_node_to_nid(struct device_node *dp)
1158 {
1159     const struct linux_prom64_registers *regs;
1160     struct mdesc_handle *md;
1161     u32 cfg_handle;
1162     int count, nid;
1163     u64 grp;
1164
1165     /* This is the right thing to do on currently supported
1166      * SUN4U NUMA platforms as well, as the PCI controller does
1167      * not sit behind any particular memory controller.
1168      */
1169     if (!mlgroups)
1170         return -1;
1171
1172     regs = of_get_property(dp, "reg", NULL);
1173     if (!regs)
1174         return -1;
1175
1176     cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;
1177
1178     md = mdesc_grab();
1179
1180     count = 0;
1181     nid = NUMA_NO_NODE;
1182     mdesc_for_each_node_by_name(md, grp, "group") {
1183         if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
1184             nid = count;
1185             break;
1186         }
1187         count++;
1188     }
1189
1190     mdesc_release(md);
1191
1192     return nid;
1193 }
1194
1195 static void __init add_node_ranges(void)
1196 {
1197     phys_addr_t start, end;
1198     unsigned long prev_max;
1199     u64 i;
1200
1201 memblock_resized:
1202     prev_max = memblock.memory.max;
1203
1204     for_each_mem_range(i, &start, &end) {
1205         while (start < end) {
1206             unsigned long this_end;
1207             int nid;
1208
1209             this_end = memblock_nid_range(start, end, &nid);
1210
1211             numadbg("Setting memblock NUMA node nid[%d] "
1212                 "start[%llx] end[%lx]\n",
1213                 nid, start, this_end);
1214
1215             memblock_set_node(start, this_end - start,
1216                       &memblock.memory, nid);
1217             if (memblock.memory.max != prev_max)
1218                 goto memblock_resized;
1219             start = this_end;
1220         }
1221     }
1222 }
1223
1224 static int __init grab_mlgroups(struct mdesc_handle *md)
1225 {
1226     unsigned long paddr;
1227     int count = 0;
1228     u64 node;
1229
1230     mdesc_for_each_node_by_name(md, node, "memory-latency-group")
1231         count++;
1232     if (!count)
1233         return -ENOENT;
1234
1235     paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mlgroup),
1236                     SMP_CACHE_BYTES);
1237     if (!paddr)
1238         return -ENOMEM;
1239
1240     mlgroups = __va(paddr);
1241     num_mlgroups = count;
1242
1243     count = 0;
1244     mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
1245         struct mdesc_mlgroup *m = &mlgroups[count++];
1246         const u64 *val;
1247
1248         m->node = node;
1249
1250         val = mdesc_get_property(md, node, "latency", NULL);
1251         m->latency = *val;
1252         val = mdesc_get_property(md, node, "address-match", NULL);
1253         m->match = *val;
1254         val = mdesc_get_property(md, node, "address-mask", NULL);
1255         m->mask = *val;
1256
1257         numadbg("MLGROUP[%d]: node[%llx] latency[%llx] "
1258             "match[%llx] mask[%llx]\n",
1259             count - 1, m->node, m->latency, m->match, m->mask);
1260     }
1261
1262     return 0;
1263 }
1264
1265 static int __init grab_mblocks(struct mdesc_handle *md)
1266 {
1267     unsigned long paddr;
1268     int count = 0;
1269     u64 node;
1270
1271     mdesc_for_each_node_by_name(md, node, "mblock")
1272         count++;
1273     if (!count)
1274         return -ENOENT;
1275
1276     paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mblock),
1277                     SMP_CACHE_BYTES);
1278     if (!paddr)
1279         return -ENOMEM;
1280
1281     mblocks = __va(paddr);
1282     num_mblocks = count;
1283
1284     count = 0;
1285     mdesc_for_each_node_by_name(md, node, "mblock") {
1286         struct mdesc_mblock *m = &mblocks[count++];
1287         const u64 *val;
1288
1289         val = mdesc_get_property(md, node, "base", NULL);
1290         m->base = *val;
1291         val = mdesc_get_property(md, node, "size", NULL);
1292         m->size = *val;
1293         val = mdesc_get_property(md, node,
1294                      "address-congruence-offset", NULL);
1295
1296         /* The address-congruence-offset property is optional.
1297          * Explicity zero it be identifty this.
1298          */
1299         if (val)
1300             m->offset = *val;
1301         else
1302             m->offset = 0UL;
1303
1304         numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n",
1305             count - 1, m->base, m->size, m->offset);
1306     }
1307
1308     return 0;
1309 }
1310
1311 static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
1312                            u64 grp, cpumask_t *mask)
1313 {
1314     u64 arc;
1315
1316     cpumask_clear(mask);
1317
1318     mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
1319         u64 target = mdesc_arc_target(md, arc);
1320         const char *name = mdesc_node_name(md, target);
1321         const u64 *id;
1322
1323         if (strcmp(name, "cpu"))
1324             continue;
1325         id = mdesc_get_property(md, target, "id", NULL);
1326         if (*id < nr_cpu_ids)
1327             cpumask_set_cpu(*id, mask);
1328     }
1329 }
1330
1331 static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
1332 {
1333     int i;
1334
1335     for (i = 0; i < num_mlgroups; i++) {
1336         struct mdesc_mlgroup *m = &mlgroups[i];
1337         if (m->node == node)
1338             return m;
1339     }
1340     return NULL;
1341 }
1342
1343 int __node_distance(int from, int to)
1344 {
1345     if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) {
1346         pr_warn("Returning default NUMA distance value for %d->%d\n",
1347             from, to);
1348         return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
1349     }
1350     return numa_latency[from][to];
1351 }
1352 EXPORT_SYMBOL(__node_distance);
1353
1354 static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
1355 {
1356     int i;
1357
1358     for (i = 0; i < MAX_NUMNODES; i++) {
1359         struct node_mem_mask *n = &node_masks[i];
1360
1361         if ((grp->mask == n->mask) && (grp->match == n->match))
1362             break;
1363     }
1364     return i;
1365 }
1366
1367 static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
1368                          u64 grp, int index)
1369 {
1370     u64 arc;
1371
1372     mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
1373         int tnode;
1374         u64 target = mdesc_arc_target(md, arc);
1375         struct mdesc_mlgroup *m = find_mlgroup(target);
1376
1377         if (!m)
1378             continue;
1379         tnode = find_best_numa_node_for_mlgroup(m);
1380         if (tnode == MAX_NUMNODES)
1381             continue;
1382         numa_latency[index][tnode] = m->latency;
1383     }
1384 }
1385
1386 static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
1387                       int index)
1388 {
1389     struct mdesc_mlgroup *candidate = NULL;
1390     u64 arc, best_latency = ~(u64)0;
1391     struct node_mem_mask *n;
1392
1393     mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
1394         u64 target = mdesc_arc_target(md, arc);
1395         struct mdesc_mlgroup *m = find_mlgroup(target);
1396         if (!m)
1397             continue;
1398         if (m->latency < best_latency) {
1399             candidate = m;
1400             best_latency = m->latency;
1401         }
1402     }
1403     if (!candidate)
1404         return -ENOENT;
1405
1406     if (num_node_masks != index) {
1407         printk(KERN_ERR "Inconsistent NUMA state, "
1408                "index[%d] != num_node_masks[%d]\n",
1409                index, num_node_masks);
1410         return -EINVAL;
1411     }
1412
1413     n = &node_masks[num_node_masks++];
1414
1415     n->mask = candidate->mask;
1416     n->match = candidate->match;
1417
1418     numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n",
1419         index, n->mask, n->match, candidate->latency);
1420
1421     return 0;
1422 }
1423
1424 static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
1425                      int index)
1426 {
1427     cpumask_t mask;
1428     int cpu;
1429
1430     numa_parse_mdesc_group_cpus(md, grp, &mask);
1431
1432     for_each_cpu(cpu, &mask)
1433         numa_cpu_lookup_table[cpu] = index;
1434     cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
1435
1436     if (numa_debug) {
1437         printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
1438         for_each_cpu(cpu, &mask)
1439             printk("%d ", cpu);
1440         printk("]\n");
1441     }
1442
1443     return numa_attach_mlgroup(md, grp, index);
1444 }
1445
1446 static int __init numa_parse_mdesc(void)
1447 {
1448     struct mdesc_handle *md = mdesc_grab();
1449     int i, j, err, count;
1450     u64 node;
1451
1452     node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
1453     if (node == MDESC_NODE_NULL) {
1454         mdesc_release(md);
1455         return -ENOENT;
1456     }
1457
1458     err = grab_mblocks(md);
1459     if (err < 0)
1460         goto out;
1461
1462     err = grab_mlgroups(md);
1463     if (err < 0)
1464         goto out;
1465
1466     count = 0;
1467     mdesc_for_each_node_by_name(md, node, "group") {
1468         err = numa_parse_mdesc_group(md, node, count);
1469         if (err < 0)
1470             break;
1471         count++;
1472     }
1473
1474     count = 0;
1475     mdesc_for_each_node_by_name(md, node, "group") {
1476         find_numa_latencies_for_group(md, node, count);
1477         count++;
1478     }
1479
1480     /* Normalize numa latency matrix according to ACPI SLIT spec. */
1481     for (i = 0; i < MAX_NUMNODES; i++) {
1482         u64 self_latency = numa_latency[i][i];
1483
1484         for (j = 0; j < MAX_NUMNODES; j++) {
1485             numa_latency[i][j] =
1486                 (numa_latency[i][j] * LOCAL_DISTANCE) /
1487                 self_latency;
1488         }
1489     }
1490
1491     add_node_ranges();
1492
1493     for (i = 0; i < num_node_masks; i++) {
1494         allocate_node_data(i);
1495         node_set_online(i);
1496     }
1497
1498     err = 0;
1499 out:
1500     mdesc_release(md);
1501     return err;
1502 }
1503
1504 static int __init numa_parse_jbus(void)
1505 {
1506     unsigned long cpu, index;
1507
1508     /* NUMA node id is encoded in bits 36 and higher, and there is
1509      * a 1-to-1 mapping from CPU ID to NUMA node ID.
1510      */
1511     index = 0;
1512     for_each_present_cpu(cpu) {
1513         numa_cpu_lookup_table[cpu] = index;
1514         cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
1515         node_masks[index].mask = ~((1UL << 36UL) - 1UL);
1516         node_masks[index].match = cpu << 36UL;
1517
1518         index++;
1519     }
1520     num_node_masks = index;
1521
1522     add_node_ranges();
1523
1524     for (index = 0; index < num_node_masks; index++) {
1525         allocate_node_data(index);
1526         node_set_online(index);
1527     }
1528
1529     return 0;
1530 }
1531
1532 static int __init numa_parse_sun4u(void)
1533 {
1534     if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1535         unsigned long ver;
1536
1537         __asm__ ("rdpr %%ver, %0" : "=r" (ver));
1538         if ((ver >> 32UL) == __JALAPENO_ID ||
1539             (ver >> 32UL) == __SERRANO_ID)
1540             return numa_parse_jbus();
1541     }
1542     return -1;
1543 }
1544
1545 static int __init bootmem_init_numa(void)
1546 {
1547     int i, j;
1548     int err = -1;
1549
1550     numadbg("bootmem_init_numa()\n");
1551
1552     /* Some sane defaults for numa latency values */
1553     for (i = 0; i < MAX_NUMNODES; i++) {
1554         for (j = 0; j < MAX_NUMNODES; j++)
1555             numa_latency[i][j] = (i == j) ?
1556                 LOCAL_DISTANCE : REMOTE_DISTANCE;
1557     }
1558
1559     if (numa_enabled) {
1560         if (tlb_type == hypervisor)
1561             err = numa_parse_mdesc();
1562         else
1563             err = numa_parse_sun4u();
1564     }
1565     return err;
1566 }
1567
1568 #else
1569
1570 static int bootmem_init_numa(void)
1571 {
1572     return -1;
1573 }
1574
1575 #endif
1576
1577 static void __init bootmem_init_nonnuma(void)
1578 {
1579     unsigned long top_of_ram = memblock_end_of_DRAM();
1580     unsigned long total_ram = memblock_phys_mem_size();
1581
1582     numadbg("bootmem_init_nonnuma()\n");
1583
1584     printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
1585            top_of_ram, total_ram);
1586     printk(KERN_INFO "Memory hole size: %ldMB\n",
1587            (top_of_ram - total_ram) >> 20);
1588
1589     init_node_masks_nonnuma();
1590     memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
1591     allocate_node_data(0);
1592     node_set_online(0);
1593 }
1594
1595 static unsigned long __init bootmem_init(unsigned long phys_base)
1596 {
1597     unsigned long end_pfn;
1598
1599     end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1600     max_pfn = max_low_pfn = end_pfn;
1601     min_low_pfn = (phys_base >> PAGE_SHIFT);
1602
1603     if (bootmem_init_numa() < 0)
1604         bootmem_init_nonnuma();
1605
1606     /* Dump memblock with node info. */
1607     memblock_dump_all();
1608
1609     /* XXX cpu notifier XXX */
1610
1611     sparse_init();
1612
1613     return end_pfn;
1614 }
1615
1616 static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
1617 static int pall_ents __initdata;
1618
1619 static unsigned long max_phys_bits = 40;
1620
1621 bool kern_addr_valid(unsigned long addr)
1622 {
1623     pgd_t *pgd;
1624     p4d_t *p4d;
1625     pud_t *pud;
1626     pmd_t *pmd;
1627     pte_t *pte;
1628
1629     if ((long)addr < 0L) {
1630         unsigned long pa = __pa(addr);
1631
1632         if ((pa >> max_phys_bits) != 0UL)
1633             return false;
1634
1635         return pfn_valid(pa >> PAGE_SHIFT);
1636     }
1637
1638     if (addr >= (unsigned long) KERNBASE &&
1639         addr < (unsigned long)&_end)
1640         return true;
1641
1642     pgd = pgd_offset_k(addr);
1643     if (pgd_none(*pgd))
1644         return false;
1645
1646     p4d = p4d_offset(pgd, addr);
1647     if (p4d_none(*p4d))
1648         return false;
1649
1650     pud = pud_offset(p4d, addr);
1651     if (pud_none(*pud))
1652         return false;
1653
1654     if (pud_large(*pud))
1655         return pfn_valid(pud_pfn(*pud));
1656
1657     pmd = pmd_offset(pud, addr);
1658     if (pmd_none(*pmd))
1659         return false;
1660
1661     if (pmd_large(*pmd))
1662         return pfn_valid(pmd_pfn(*pmd));
1663
1664     pte = pte_offset_kernel(pmd, addr);
1665     if (pte_none(*pte))
1666         return false;
1667
1668     return pfn_valid(pte_pfn(*pte));
1669 }
1670 EXPORT_SYMBOL(kern_addr_valid);
1671
1672 static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
1673                           unsigned long vend,
1674                           pud_t *pud)
1675 {
1676     const unsigned long mask16gb = (1UL << 34) - 1UL;
1677     u64 pte_val = vstart;
1678
1679     /* Each PUD is 8GB */
1680     if ((vstart & mask16gb) ||
1681         (vend - vstart <= mask16gb)) {
1682         pte_val ^= kern_linear_pte_xor[2];
1683         pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
1684
1685         return vstart + PUD_SIZE;
1686     }
1687
1688     pte_val ^= kern_linear_pte_xor[3];
1689     pte_val |= _PAGE_PUD_HUGE;
1690
1691     vend = vstart + mask16gb + 1UL;
1692     while (vstart < vend) {
1693         pud_val(*pud) = pte_val;
1694
1695         pte_val += PUD_SIZE;
1696         vstart += PUD_SIZE;
1697         pud++;
1698     }
1699     return vstart;
1700 }
1701
1702 static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
1703                    bool guard)
1704 {
1705     if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
1706         return true;
1707
1708     return false;
1709 }
1710
1711 static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
1712                           unsigned long vend,
1713                           pmd_t *pmd)
1714 {
1715     const unsigned long mask256mb = (1UL << 28) - 1UL;
1716     const unsigned long mask2gb = (1UL << 31) - 1UL;
1717     u64 pte_val = vstart;
1718
1719     /* Each PMD is 8MB */
1720     if ((vstart & mask256mb) ||
1721         (vend - vstart <= mask256mb)) {
1722         pte_val ^= kern_linear_pte_xor[0];
1723         pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
1724
1725         return vstart + PMD_SIZE;
1726     }
1727
1728     if ((vstart & mask2gb) ||
1729         (vend - vstart <= mask2gb)) {
1730         pte_val ^= kern_linear_pte_xor[1];
1731         pte_val |= _PAGE_PMD_HUGE;
1732         vend = vstart + mask256mb + 1UL;
1733     } else {
1734         pte_val ^= kern_linear_pte_xor[2];
1735         pte_val |= _PAGE_PMD_HUGE;
1736         vend = vstart + mask2gb + 1UL;
1737     }
1738
1739     while (vstart < vend) {
1740         pmd_val(*pmd) = pte_val;
1741
1742         pte_val += PMD_SIZE;
1743         vstart += PMD_SIZE;
1744         pmd++;
1745     }
1746
1747     return vstart;
1748 }
1749
1750 static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
1751                    bool guard)
1752 {
1753     if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
1754         return true;
1755
1756     return false;
1757 }
1758
1759 static unsigned long __ref kernel_map_range(unsigned long pstart,
1760                         unsigned long pend, pgprot_t prot,
1761                         bool use_huge)
1762 {
1763     unsigned long vstart = PAGE_OFFSET + pstart;
1764     unsigned long vend = PAGE_OFFSET + pend;
1765     unsigned long alloc_bytes = 0UL;
1766
1767     if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
1768         prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n",
1769                 vstart, vend);
1770         prom_halt();
1771     }
1772
1773     while (vstart < vend) {
1774         unsigned long this_end, paddr = __pa(vstart);
1775         pgd_t *pgd = pgd_offset_k(vstart);
1776         p4d_t *p4d;
1777         pud_t *pud;
1778         pmd_t *pmd;
1779         pte_t *pte;
1780
1781         if (pgd_none(*pgd)) {
1782             pud_t *new;
1783
1784             new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
1785                           PAGE_SIZE);
1786             if (!new)
1787                 goto err_alloc;
1788             alloc_bytes += PAGE_SIZE;
1789             pgd_populate(&init_mm, pgd, new);
1790         }
1791
1792         p4d = p4d_offset(pgd, vstart);
1793         if (p4d_none(*p4d)) {
1794             pud_t *new;
1795
1796             new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
1797                           PAGE_SIZE);
1798             if (!new)
1799                 goto err_alloc;
1800             alloc_bytes += PAGE_SIZE;
1801             p4d_populate(&init_mm, p4d, new);
1802         }
1803
1804         pud = pud_offset(p4d, vstart);
1805         if (pud_none(*pud)) {
1806             pmd_t *new;
1807
1808             if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
1809                 vstart = kernel_map_hugepud(vstart, vend, pud);
1810                 continue;
1811             }
1812             new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
1813                           PAGE_SIZE);
1814             if (!new)
1815                 goto err_alloc;
1816             alloc_bytes += PAGE_SIZE;
1817             pud_populate(&init_mm, pud, new);
1818         }
1819
1820         pmd = pmd_offset(pud, vstart);
1821         if (pmd_none(*pmd)) {
1822             pte_t *new;
1823
1824             if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
1825                 vstart = kernel_map_hugepmd(vstart, vend, pmd);
1826                 continue;
1827             }
1828             new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
1829                           PAGE_SIZE);
1830             if (!new)
1831                 goto err_alloc;
1832             alloc_bytes += PAGE_SIZE;
1833             pmd_populate_kernel(&init_mm, pmd, new);
1834         }
1835
1836         pte = pte_offset_kernel(pmd, vstart);
1837         this_end = (vstart + PMD_SIZE) & PMD_MASK;
1838         if (this_end > vend)
1839             this_end = vend;
1840
1841         while (vstart < this_end) {
1842             pte_val(*pte) = (paddr | pgprot_val(prot));
1843
1844             vstart += PAGE_SIZE;
1845             paddr += PAGE_SIZE;
1846             pte++;
1847         }
1848     }
1849
1850     return alloc_bytes;
1851
1852 err_alloc:
1853     panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
1854           __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1855     return -ENOMEM;
1856 }
1857
1858 static void __init flush_all_kernel_tsbs(void)
1859 {
1860     int i;
1861
1862     for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
1863         struct tsb *ent = &swapper_tsb[i];
1864
1865         ent->tag = (1UL << TSB_TAG_INVALID_BIT);
1866     }
1867 #ifndef CONFIG_DEBUG_PAGEALLOC
1868     for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
1869         struct tsb *ent = &swapper_4m_tsb[i];
1870
1871         ent->tag = (1UL << TSB_TAG_INVALID_BIT);
1872     }
1873 #endif
1874 }
1875
1876 extern unsigned int kvmap_linear_patch[1];
1877
1878 static void __init kernel_physical_mapping_init(void)
1879 {
1880     unsigned long i, mem_alloced = 0UL;
1881     bool use_huge = true;
1882
1883 #ifdef CONFIG_DEBUG_PAGEALLOC
1884     use_huge = false;
1885 #endif
1886     for (i = 0; i < pall_ents; i++) {
1887         unsigned long phys_start, phys_end;
1888
1889         phys_start = pall[i].phys_addr;
1890         phys_end = phys_start + pall[i].reg_size;
1891
1892         mem_alloced += kernel_map_range(phys_start, phys_end,
1893                         PAGE_KERNEL, use_huge);
1894     }
1895
1896     printk("Allocated %ld bytes for kernel page tables.\n",
1897            mem_alloced);
1898
1899     kvmap_linear_patch[0] = 0x01000000; /* nop */
1900     flushi(&kvmap_linear_patch[0]);
1901
1902     flush_all_kernel_tsbs();
1903
1904     __flush_tlb_all();
1905 }
1906
1907 #ifdef CONFIG_DEBUG_PAGEALLOC
1908 void __kernel_map_pages(struct page *page, int numpages, int enable)
1909 {
1910     unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
1911     unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
1912
1913     kernel_map_range(phys_start, phys_end,
1914              (enable ? PAGE_KERNEL : __pgprot(0)), false);
1915
1916     flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
1917                    PAGE_OFFSET + phys_end);
1918
1919     /* we should perform an IPI and flush all tlbs,
1920      * but that can deadlock->flush only current cpu.
1921      */
1922     __flush_tlb_kernel_range(PAGE_OFFSET + phys_start,
1923                  PAGE_OFFSET + phys_end);
1924 }
1925 #endif
1926
1927 unsigned long __init find_ecache_flush_span(unsigned long size)
1928 {
1929     int i;
1930
1931     for (i = 0; i < pavail_ents; i++) {
1932         if (pavail[i].reg_size >= size)
1933             return pavail[i].phys_addr;
1934     }
1935
1936     return ~0UL;
1937 }
1938
1939 unsigned long PAGE_OFFSET;
1940 EXPORT_SYMBOL(PAGE_OFFSET);
1941
1942 unsigned long VMALLOC_END   = 0x0000010000000000UL;
1943 EXPORT_SYMBOL(VMALLOC_END);
1944
1945 unsigned long sparc64_va_hole_top =    0xfffff80000000000UL;
1946 unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
1947
1948 static void __init setup_page_offset(void)
1949 {
1950     if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1951         /* Cheetah/Panther support a full 64-bit virtual
1952          * address, so we can use all that our page tables
1953          * support.
1954          */
1955         sparc64_va_hole_top =    0xfff0000000000000UL;
1956         sparc64_va_hole_bottom = 0x0010000000000000UL;
1957
1958         max_phys_bits = 42;
1959     } else if (tlb_type == hypervisor) {
1960         switch (sun4v_chip_type) {
1961         case SUN4V_CHIP_NIAGARA1:
1962         case SUN4V_CHIP_NIAGARA2:
1963             /* T1 and T2 support 48-bit virtual addresses.  */
1964             sparc64_va_hole_top =    0xffff800000000000UL;
1965             sparc64_va_hole_bottom = 0x0000800000000000UL;
1966
1967             max_phys_bits = 39;
1968             break;
1969         case SUN4V_CHIP_NIAGARA3:
1970             /* T3 supports 48-bit virtual addresses.  */
1971             sparc64_va_hole_top =    0xffff800000000000UL;
1972             sparc64_va_hole_bottom = 0x0000800000000000UL;
1973
1974             max_phys_bits = 43;
1975             break;
1976         case SUN4V_CHIP_NIAGARA4:
1977         case SUN4V_CHIP_NIAGARA5:
1978         case SUN4V_CHIP_SPARC64X:
1979         case SUN4V_CHIP_SPARC_M6:
1980             /* T4 and later support 52-bit virtual addresses.  */
1981             sparc64_va_hole_top =    0xfff8000000000000UL;
1982             sparc64_va_hole_bottom = 0x0008000000000000UL;
1983             max_phys_bits = 47;
1984             break;
1985         case SUN4V_CHIP_SPARC_M7:
1986         case SUN4V_CHIP_SPARC_SN:
1987             /* M7 and later support 52-bit virtual addresses.  */
1988             sparc64_va_hole_top =    0xfff8000000000000UL;
1989             sparc64_va_hole_bottom = 0x0008000000000000UL;
1990             max_phys_bits = 49;
1991             break;
1992         case SUN4V_CHIP_SPARC_M8:
1993         default:
1994             /* M8 and later support 54-bit virtual addresses.
1995              * However, restricting M8 and above VA bits to 53
1996              * as 4-level page table cannot support more than
1997              * 53 VA bits.
1998              */
1999             sparc64_va_hole_top =    0xfff0000000000000UL;
2000             sparc64_va_hole_bottom = 0x0010000000000000UL;
2001             max_phys_bits = 51;
2002             break;
2003         }
2004     }
2005
2006     if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) {
2007         prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n",
2008                 max_phys_bits);
2009         prom_halt();
2010     }
2011
2012     PAGE_OFFSET = sparc64_va_hole_top;
2013     VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
2014                (sparc64_va_hole_bottom >> 2));
2015
2016     pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
2017         PAGE_OFFSET, max_phys_bits);
2018     pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
2019         VMALLOC_START, VMALLOC_END);
2020     pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
2021         VMEMMAP_BASE, VMEMMAP_BASE << 1);
2022 }
2023
2024 static void __init tsb_phys_patch(void)
2025 {
2026     struct tsb_ldquad_phys_patch_entry *pquad;
2027     struct tsb_phys_patch_entry *p;
2028
2029     pquad = &__tsb_ldquad_phys_patch;
2030     while (pquad < &__tsb_ldquad_phys_patch_end) {
2031         unsigned long addr = pquad->addr;
2032
2033         if (tlb_type == hypervisor)
2034             *(unsigned int *) addr = pquad->sun4v_insn;
2035         else
2036             *(unsigned int *) addr = pquad->sun4u_insn;
2037         wmb();
2038         __asm__ __volatile__("flush %0"
2039                      : /* no outputs */
2040                      : "r" (addr));
2041
2042         pquad++;
2043     }
2044
2045     p = &__tsb_phys_patch;
2046     while (p < &__tsb_phys_patch_end) {
2047         unsigned long addr = p->addr;
2048
2049         *(unsigned int *) addr = p->insn;
2050         wmb();
2051         __asm__ __volatile__("flush %0"
2052                      : /* no outputs */
2053                      : "r" (addr));
2054
2055         p++;
2056     }
2057 }
2058
2059 /* Don't mark as init, we give this to the Hypervisor.  */
2060 #ifndef CONFIG_DEBUG_PAGEALLOC
2061 #define NUM_KTSB_DESCR  2
2062 #else
2063 #define NUM_KTSB_DESCR  1
2064 #endif
2065 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
2066
2067 /* The swapper TSBs are loaded with a base sequence of:
2068  *
2069  *  sethi   %uhi(SYMBOL), REG1
2070  *  sethi   %hi(SYMBOL), REG2
2071  *  or  REG1, %ulo(SYMBOL), REG1
2072  *  or  REG2, %lo(SYMBOL), REG2
2073  *  sllx    REG1, 32, REG1
2074  *  or  REG1, REG2, REG1
2075  *
2076  * When we use physical addressing for the TSB accesses, we patch the
2077  * first four instructions in the above sequence.
2078  */
2079
2080 static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
2081 {
2082     unsigned long high_bits, low_bits;
2083
2084     high_bits = (pa >> 32) & 0xffffffff;
2085     low_bits = (pa >> 0) & 0xffffffff;
2086
2087     while (start < end) {
2088         unsigned int *ia = (unsigned int *)(unsigned long)*start;
2089
2090         ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
2091         __asm__ __volatile__("flush %0" : : "r" (ia));
2092
2093         ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
2094         __asm__ __volatile__("flush %0" : : "r" (ia + 1));
2095
2096         ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
2097         __asm__ __volatile__("flush %0" : : "r" (ia + 2));
2098
2099         ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
2100         __asm__ __volatile__("flush %0" : : "r" (ia + 3));
2101
2102         start++;
2103     }
2104 }
2105
2106 static void ktsb_phys_patch(void)
2107 {
2108     extern unsigned int __swapper_tsb_phys_patch;
2109     extern unsigned int __swapper_tsb_phys_patch_end;
2110     unsigned long ktsb_pa;
2111
2112     ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
2113     patch_one_ktsb_phys(&__swapper_tsb_phys_patch,
2114                 &__swapper_tsb_phys_patch_end, ktsb_pa);
2115 #ifndef CONFIG_DEBUG_PAGEALLOC
2116     {
2117     extern unsigned int __swapper_4m_tsb_phys_patch;
2118     extern unsigned int __swapper_4m_tsb_phys_patch_end;
2119     ktsb_pa = (kern_base +
2120            ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
2121     patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch,
2122                 &__swapper_4m_tsb_phys_patch_end, ktsb_pa);
2123     }
2124 #endif
2125 }
2126
2127 static void __init sun4v_ktsb_init(void)
2128 {
2129     unsigned long ktsb_pa;
2130
2131     /* First KTSB for PAGE_SIZE mappings.  */
2132     ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
2133
2134     switch (PAGE_SIZE) {
2135     case 8 * 1024:
2136     default:
2137         ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
2138         ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
2139         break;
2140
2141     case 64 * 1024:
2142         ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
2143         ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
2144         break;
2145
2146     case 512 * 1024:
2147         ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
2148         ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
2149         break;
2150
2151     case 4 * 1024 * 1024:
2152         ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
2153         ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
2154         break;
2155     }
2156
2157     ktsb_descr[0].assoc = 1;
2158     ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
2159     ktsb_descr[0].ctx_idx = 0;
2160     ktsb_descr[0].tsb_base = ktsb_pa;
2161     ktsb_descr[0].resv = 0;
2162
2163 #ifndef CONFIG_DEBUG_PAGEALLOC
2164     /* Second KTSB for 4MB/256MB/2GB/16GB mappings.  */
2165     ktsb_pa = (kern_base +
2166            ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
2167
2168     ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
2169     ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
2170                     HV_PGSZ_MASK_256MB |
2171                     HV_PGSZ_MASK_2GB |
2172                     HV_PGSZ_MASK_16GB) &
2173                    cpu_pgsz_mask);
2174     ktsb_descr[1].assoc = 1;
2175     ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
2176     ktsb_descr[1].ctx_idx = 0;
2177     ktsb_descr[1].tsb_base = ktsb_pa;
2178     ktsb_descr[1].resv = 0;
2179 #endif
2180 }
2181
2182 void sun4v_ktsb_register(void)
2183 {
2184     unsigned long pa, ret;
2185
2186     pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
2187
2188     ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa);
2189     if (ret != 0) {
2190         prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: "
2191                 "errors with %lx\n", pa, ret);
2192         prom_halt();
2193     }
2194 }
2195
2196 static void __init sun4u_linear_pte_xor_finalize(void)
2197 {
2198 #ifndef CONFIG_DEBUG_PAGEALLOC
2199     /* This is where we would add Panther support for
2200      * 32MB and 256MB pages.
2201      */
2202 #endif
2203 }
2204
2205 static void __init sun4v_linear_pte_xor_finalize(void)
2206 {
2207     unsigned long pagecv_flag;
2208
2209     /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
2210      * enables MCD error. Do not set bit 9 on M7 processor.
2211      */
2212     switch (sun4v_chip_type) {
2213     case SUN4V_CHIP_SPARC_M7:
2214     case SUN4V_CHIP_SPARC_M8:
2215     case SUN4V_CHIP_SPARC_SN:
2216         pagecv_flag = 0x00;
2217         break;
2218     default:
2219         pagecv_flag = _PAGE_CV_4V;
2220         break;
2221     }
2222 #ifndef CONFIG_DEBUG_PAGEALLOC
2223     if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
2224         kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
2225             PAGE_OFFSET;
2226         kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
2227                        _PAGE_P_4V | _PAGE_W_4V);
2228     } else {
2229         kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
2230     }
2231
2232     if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
2233         kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
2234             PAGE_OFFSET;
2235         kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
2236                        _PAGE_P_4V | _PAGE_W_4V);
2237     } else {
2238         kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
2239     }
2240
2241     if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
2242         kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
2243             PAGE_OFFSET;
2244         kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
2245                        _PAGE_P_4V | _PAGE_W_4V);
2246     } else {
2247         kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
2248     }
2249 #endif
2250 }
2251
2252 /* paging_init() sets up the page tables */
2253
2254 static unsigned long last_valid_pfn;
2255
2256 static void sun4u_pgprot_init(void);
2257 static void sun4v_pgprot_init(void);
2258
2259 #define _PAGE_CACHE_4U  (_PAGE_CP_4U | _PAGE_CV_4U)
2260 #define _PAGE_CACHE_4V  (_PAGE_CP_4V | _PAGE_CV_4V)
2261 #define __DIRTY_BITS_4U  (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
2262 #define __DIRTY_BITS_4V  (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
2263 #define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
2264 #define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
2265
2266 /* We need to exclude reserved regions. This exclusion will include
2267  * vmlinux and initrd. To be more precise the initrd size could be used to
2268  * compute a new lower limit because it is freed later during initialization.
2269  */
2270 static void __init reduce_memory(phys_addr_t limit_ram)
2271 {
2272     limit_ram += memblock_reserved_size();
2273     memblock_enforce_memory_limit(limit_ram);
2274 }
2275
2276 void __init paging_init(void)
2277 {
2278     unsigned long end_pfn, shift, phys_base;
2279     unsigned long real_end, i;
2280
2281     setup_page_offset();
2282
2283     /* These build time checkes make sure that the dcache_dirty_cpu()
2284      * page->flags usage will work.
2285      *
2286      * When a page gets marked as dcache-dirty, we store the
2287      * cpu number starting at bit 32 in the page->flags.  Also,
2288      * functions like clear_dcache_dirty_cpu use the cpu mask
2289      * in 13-bit signed-immediate instruction fields.
2290      */
2291
2292     /*
2293      * Page flags must not reach into upper 32 bits that are used
2294      * for the cpu number
2295      */
2296     BUILD_BUG_ON(NR_PAGEFLAGS > 32);
2297
2298     /*
2299      * The bit fields placed in the high range must not reach below
2300      * the 32 bit boundary. Otherwise we cannot place the cpu field
2301      * at the 32 bit boundary.
2302      */
2303     BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
2304         ilog2(roundup_pow_of_two(NR_CPUS)) > 32);
2305
2306     BUILD_BUG_ON(NR_CPUS > 4096);
2307
2308     kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
2309     kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
2310
2311     /* Invalidate both kernel TSBs.  */
2312     memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
2313 #ifndef CONFIG_DEBUG_PAGEALLOC
2314     memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
2315 #endif
2316
2317     /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
2318      * bit on M7 processor. This is a conflicting usage of the same
2319      * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
2320      * Detection error on all pages and this will lead to problems
2321      * later. Kernel does not run with MCD enabled and hence rest
2322      * of the required steps to fully configure memory corruption
2323      * detection are not taken. We need to ensure TTE.mcde is not
2324      * set on M7 processor. Compute the value of cacheability
2325      * flag for use later taking this into consideration.
2326      */
2327     switch (sun4v_chip_type) {
2328     case SUN4V_CHIP_SPARC_M7:
2329     case SUN4V_CHIP_SPARC_M8:
2330     case SUN4V_CHIP_SPARC_SN:
2331         page_cache4v_flag = _PAGE_CP_4V;
2332         break;
2333     default:
2334         page_cache4v_flag = _PAGE_CACHE_4V;
2335         break;
2336     }
2337
2338     if (tlb_type == hypervisor)
2339         sun4v_pgprot_init();
2340     else
2341         sun4u_pgprot_init();
2342
2343     if (tlb_type == cheetah_plus ||
2344         tlb_type == hypervisor) {
2345         tsb_phys_patch();
2346         ktsb_phys_patch();
2347     }
2348
2349     if (tlb_type == hypervisor)
2350         sun4v_patch_tlb_handlers();
2351
2352     /* Find available physical memory...
2353      *
2354      * Read it twice in order to work around a bug in openfirmware.
2355      * The call to grab this table itself can cause openfirmware to
2356      * allocate memory, which in turn can take away some space from
2357      * the list of available memory.  Reading it twice makes sure
2358      * we really do get the final value.
2359      */
2360     read_obp_translations();
2361     read_obp_memory("reg", &pall[0], &pall_ents);
2362     read_obp_memory("available", &pavail[0], &pavail_ents);
2363     read_obp_memory("available", &pavail[0], &pavail_ents);
2364
2365     phys_base = 0xffffffffffffffffUL;
2366     for (i = 0; i < pavail_ents; i++) {
2367         phys_base = min(phys_base, pavail[i].phys_addr);
2368         memblock_add(pavail[i].phys_addr, pavail[i].reg_size);
2369     }
2370
2371     memblock_reserve(kern_base, kern_size);
2372
2373     find_ramdisk(phys_base);
2374
2375     if (cmdline_memory_size)
2376         reduce_memory(cmdline_memory_size);
2377
2378     memblock_allow_resize();
2379     memblock_dump_all();
2380
2381     set_bit(0, mmu_context_bmap);
2382
2383     shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
2384
2385     real_end = (unsigned long)_end;
2386     num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB);
2387     printk("Kernel: Using %d locked TLB entries for main kernel image.\n",
2388            num_kernel_image_mappings);
2389
2390     /* Set kernel pgd to upper alias so physical page computations
2391      * work.
2392      */
2393     init_mm.pgd += ((shift) / (sizeof(pgd_t)));
2394
2395     memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
2396
2397     inherit_prom_mappings();
2398
2399     /* Ok, we can use our TLB miss and window trap handlers safely.  */
2400     setup_tba();
2401
2402     __flush_tlb_all();
2403
2404     prom_build_devicetree();
2405     of_populate_present_mask();
2406 #ifndef CONFIG_SMP
2407     of_fill_in_cpu_data();
2408 #endif
2409
2410     if (tlb_type == hypervisor) {
2411         sun4v_mdesc_init();
2412         mdesc_populate_present_mask(cpu_all_mask);
2413 #ifndef CONFIG_SMP
2414         mdesc_fill_in_cpu_data(cpu_all_mask);
2415 #endif
2416         mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
2417
2418         sun4v_linear_pte_xor_finalize();
2419
2420         sun4v_ktsb_init();
2421         sun4v_ktsb_register();
2422     } else {
2423         unsigned long impl, ver;
2424
2425         cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
2426                  HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
2427
2428         __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
2429         impl = ((ver >> 32) & 0xffff);
2430         if (impl == PANTHER_IMPL)
2431             cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
2432                       HV_PGSZ_MASK_256MB);
2433
2434         sun4u_linear_pte_xor_finalize();
2435     }
2436
2437     /* Flush the TLBs and the 4M TSB so that the updated linear
2438      * pte XOR settings are realized for all mappings.
2439      */
2440     __flush_tlb_all();
2441 #ifndef CONFIG_DEBUG_PAGEALLOC
2442     memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
2443 #endif
2444     __flush_tlb_all();
2445
2446     /* Setup bootmem... */
2447     last_valid_pfn = end_pfn = bootmem_init(phys_base);
2448
2449     kernel_physical_mapping_init();
2450
2451     {
2452         unsigned long max_zone_pfns[MAX_NR_ZONES];
2453
2454         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2455
2456         max_zone_pfns[ZONE_NORMAL] = end_pfn;
2457
2458         free_area_init(max_zone_pfns);
2459     }
2460
2461     printk("Booting Linux...\n");
2462 }
2463
2464 int page_in_phys_avail(unsigned long paddr)
2465 {
2466     int i;
2467
2468     paddr &= PAGE_MASK;
2469
2470     for (i = 0; i < pavail_ents; i++) {
2471         unsigned long start, end;
2472
2473         start = pavail[i].phys_addr;
2474         end = start + pavail[i].reg_size;
2475
2476         if (paddr >= start && paddr < end)
2477             return 1;
2478     }
2479     if (paddr >= kern_base && paddr < (kern_base + kern_size))
2480         return 1;
2481 #ifdef CONFIG_BLK_DEV_INITRD
2482     if (paddr >= __pa(initrd_start) &&
2483         paddr < __pa(PAGE_ALIGN(initrd_end)))
2484         return 1;
2485 #endif
2486
2487     return 0;
2488 }
2489
2490 static void __init register_page_bootmem_info(void)
2491 {
2492 #ifdef CONFIG_NUMA
2493     int i;
2494
2495     for_each_online_node(i)
2496         if (NODE_DATA(i)->node_spanned_pages)
2497             register_page_bootmem_info_node(NODE_DATA(i));
2498 #endif
2499 }
2500 void __init mem_init(void)
2501 {
2502     high_memory = __va(last_valid_pfn << PAGE_SHIFT);
2503
2504     memblock_free_all();
2505
2506     /*
2507      * Must be done after boot memory is put on freelist, because here we
2508      * might set fields in deferred struct pages that have not yet been
2509      * initialized, and memblock_free_all() initializes all the reserved
2510      * deferred pages for us.
2511      */
2512     register_page_bootmem_info();
2513
2514     /*
2515      * Set up the zero page, mark it reserved, so that page count
2516      * is not manipulated when freeing the page from user ptes.
2517      */
2518     mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
2519     if (mem_map_zero == NULL) {
2520         prom_printf("paging_init: Cannot alloc zero page.\n");
2521         prom_halt();
2522     }
2523     mark_page_reserved(mem_map_zero);
2524
2525
2526     if (tlb_type == cheetah || tlb_type == cheetah_plus)
2527         cheetah_ecache_flush_init();
2528 }
2529
2530 void free_initmem(void)
2531 {
2532     unsigned long addr, initend;
2533     int do_free = 1;
2534
2535     /* If the physical memory maps were trimmed by kernel command
2536      * line options, don't even try freeing this initmem stuff up.
2537      * The kernel image could have been in the trimmed out region
2538      * and if so the freeing below will free invalid page structs.
2539      */
2540     if (cmdline_memory_size)
2541         do_free = 0;
2542
2543     /*
2544      * The init section is aligned to 8k in vmlinux.lds. Page align for >8k pagesizes.
2545      */
2546     addr = PAGE_ALIGN((unsigned long)(__init_begin));
2547     initend = (unsigned long)(__init_end) & PAGE_MASK;
2548     for (; addr < initend; addr += PAGE_SIZE) {
2549         unsigned long page;
2550
2551         page = (addr +
2552             ((unsigned long) __va(kern_base)) -
2553             ((unsigned long) KERNBASE));
2554         memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
2555
2556         if (do_free)
2557             free_reserved_page(virt_to_page(page));
2558     }
2559 }
2560
2561 pgprot_t PAGE_KERNEL __read_mostly;
2562 EXPORT_SYMBOL(PAGE_KERNEL);
2563
2564 pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
2565 pgprot_t PAGE_COPY __read_mostly;
2566
2567 pgprot_t PAGE_SHARED __read_mostly;
2568 EXPORT_SYMBOL(PAGE_SHARED);
2569
2570 unsigned long pg_iobits __read_mostly;
2571
2572 unsigned long _PAGE_IE __read_mostly;
2573 EXPORT_SYMBOL(_PAGE_IE);
2574
2575 unsigned long _PAGE_E __read_mostly;
2576 EXPORT_SYMBOL(_PAGE_E);
2577
2578 unsigned long _PAGE_CACHE __read_mostly;
2579 EXPORT_SYMBOL(_PAGE_CACHE);
2580
2581 #ifdef CONFIG_SPARSEMEM_VMEMMAP
2582 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
2583                    int node, struct vmem_altmap *altmap)
2584 {
2585     unsigned long pte_base;
2586
2587     pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
2588             _PAGE_CP_4U | _PAGE_CV_4U |
2589             _PAGE_P_4U | _PAGE_W_4U);
2590     if (tlb_type == hypervisor)
2591         pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2592                 page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
2593
2594     pte_base |= _PAGE_PMD_HUGE;
2595
2596     vstart = vstart & PMD_MASK;
2597     vend = ALIGN(vend, PMD_SIZE);
2598     for (; vstart < vend; vstart += PMD_SIZE) {
2599         pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
2600         unsigned long pte;
2601         p4d_t *p4d;
2602         pud_t *pud;
2603         pmd_t *pmd;
2604
2605         if (!pgd)
2606             return -ENOMEM;
2607
2608         p4d = vmemmap_p4d_populate(pgd, vstart, node);
2609         if (!p4d)
2610             return -ENOMEM;
2611
2612         pud = vmemmap_pud_populate(p4d, vstart, node);
2613         if (!pud)
2614             return -ENOMEM;
2615
2616         pmd = pmd_offset(pud, vstart);
2617         pte = pmd_val(*pmd);
2618         if (!(pte & _PAGE_VALID)) {
2619             void *block = vmemmap_alloc_block(PMD_SIZE, node);
2620
2621             if (!block)
2622                 return -ENOMEM;
2623
2624             pmd_val(*pmd) = pte_base | __pa(block);
2625         }
2626     }
2627
2628     return 0;
2629 }
2630
2631 void vmemmap_free(unsigned long start, unsigned long end,
2632         struct vmem_altmap *altmap)
2633 {
2634 }
2635 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
2636
2637 /* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
2638 static pgprot_t protection_map[16] __ro_after_init;
2639
2640 static void prot_init_common(unsigned long page_none,
2641                  unsigned long page_shared,
2642                  unsigned long page_copy,
2643                  unsigned long page_readonly,
2644                  unsigned long page_exec_bit)
2645 {
2646     PAGE_COPY = __pgprot(page_copy);
2647     PAGE_SHARED = __pgprot(page_shared);
2648
2649     protection_map[0x0] = __pgprot(page_none);
2650     protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
2651     protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
2652     protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
2653     protection_map[0x4] = __pgprot(page_readonly);
2654     protection_map[0x5] = __pgprot(page_readonly);
2655     protection_map[0x6] = __pgprot(page_copy);
2656     protection_map[0x7] = __pgprot(page_copy);
2657     protection_map[0x8] = __pgprot(page_none);
2658     protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
2659     protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
2660     protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
2661     protection_map[0xc] = __pgprot(page_readonly);
2662     protection_map[0xd] = __pgprot(page_readonly);
2663     protection_map[0xe] = __pgprot(page_shared);
2664     protection_map[0xf] = __pgprot(page_shared);
2665 }
2666
2667 static void __init sun4u_pgprot_init(void)
2668 {
2669     unsigned long page_none, page_shared, page_copy, page_readonly;
2670     unsigned long page_exec_bit;
2671     int i;
2672
2673     PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
2674                 _PAGE_CACHE_4U | _PAGE_P_4U |
2675                 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
2676                 _PAGE_EXEC_4U);
2677     PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
2678                        _PAGE_CACHE_4U | _PAGE_P_4U |
2679                        __ACCESS_BITS_4U | __DIRTY_BITS_4U |
2680                        _PAGE_EXEC_4U | _PAGE_L_4U);
2681
2682     _PAGE_IE = _PAGE_IE_4U;
2683     _PAGE_E = _PAGE_E_4U;
2684     _PAGE_CACHE = _PAGE_CACHE_4U;
2685
2686     pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
2687              __ACCESS_BITS_4U | _PAGE_E_4U);
2688
2689 #ifdef CONFIG_DEBUG_PAGEALLOC
2690     kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
2691 #else
2692     kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
2693         PAGE_OFFSET;
2694 #endif
2695     kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
2696                    _PAGE_P_4U | _PAGE_W_4U);
2697
2698     for (i = 1; i < 4; i++)
2699         kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2700
2701     _PAGE_ALL_SZ_BITS =  (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
2702                   _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
2703                   _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
2704
2705
2706     page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
2707     page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2708                __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
2709     page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2710                __ACCESS_BITS_4U | _PAGE_EXEC_4U);
2711     page_readonly   = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2712                __ACCESS_BITS_4U | _PAGE_EXEC_4U);
2713
2714     page_exec_bit = _PAGE_EXEC_4U;
2715
2716     prot_init_common(page_none, page_shared, page_copy, page_readonly,
2717              page_exec_bit);
2718 }
2719
2720 static void __init sun4v_pgprot_init(void)
2721 {
2722     unsigned long page_none, page_shared, page_copy, page_readonly;
2723     unsigned long page_exec_bit;
2724     int i;
2725
2726     PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
2727                 page_cache4v_flag | _PAGE_P_4V |
2728                 __ACCESS_BITS_4V | __DIRTY_BITS_4V |
2729                 _PAGE_EXEC_4V);
2730     PAGE_KERNEL_LOCKED = PAGE_KERNEL;
2731
2732     _PAGE_IE = _PAGE_IE_4V;
2733     _PAGE_E = _PAGE_E_4V;
2734     _PAGE_CACHE = page_cache4v_flag;
2735
2736 #ifdef CONFIG_DEBUG_PAGEALLOC
2737     kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
2738 #else
2739     kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
2740         PAGE_OFFSET;
2741 #endif
2742     kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
2743                    _PAGE_W_4V);
2744
2745     for (i = 1; i < 4; i++)
2746         kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2747
2748     pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
2749              __ACCESS_BITS_4V | _PAGE_E_4V);
2750
2751     _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
2752                  _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
2753                  _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
2754                  _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
2755
2756     page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
2757     page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2758                __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
2759     page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2760                __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2761     page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2762              __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2763
2764     page_exec_bit = _PAGE_EXEC_4V;
2765
2766     prot_init_common(page_none, page_shared, page_copy, page_readonly,
2767              page_exec_bit);
2768 }
2769
2770 unsigned long pte_sz_bits(unsigned long sz)
2771 {
2772     if (tlb_type == hypervisor) {
2773         switch (sz) {
2774         case 8 * 1024:
2775         default:
2776             return _PAGE_SZ8K_4V;
2777         case 64 * 1024:
2778             return _PAGE_SZ64K_4V;
2779         case 512 * 1024:
2780             return _PAGE_SZ512K_4V;
2781         case 4 * 1024 * 1024:
2782             return _PAGE_SZ4MB_4V;
2783         }
2784     } else {
2785         switch (sz) {
2786         case 8 * 1024:
2787         default:
2788             return _PAGE_SZ8K_4U;
2789         case 64 * 1024:
2790             return _PAGE_SZ64K_4U;
2791         case 512 * 1024:
2792             return _PAGE_SZ512K_4U;
2793         case 4 * 1024 * 1024:
2794             return _PAGE_SZ4MB_4U;
2795         }
2796     }
2797 }
2798
2799 pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
2800 {
2801     pte_t pte;
2802
2803     pte_val(pte)  = page | pgprot_val(pgprot_noncached(prot));
2804     pte_val(pte) |= (((unsigned long)space) << 32);
2805     pte_val(pte) |= pte_sz_bits(page_size);
2806
2807     return pte;
2808 }
2809
2810 static unsigned long kern_large_tte(unsigned long paddr)
2811 {
2812     unsigned long val;
2813
2814     val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
2815            _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
2816            _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
2817     if (tlb_type == hypervisor)
2818         val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2819                page_cache4v_flag | _PAGE_P_4V |
2820                _PAGE_EXEC_4V | _PAGE_W_4V);
2821
2822     return val | paddr;
2823 }
2824
2825 /* If not locked, zap it. */
2826 void __flush_tlb_all(void)
2827 {
2828     unsigned long pstate;
2829     int i;
2830
2831     __asm__ __volatile__("flushw\n\t"
2832                  "rdpr  %%pstate, %0\n\t"
2833                  "wrpr  %0, %1, %%pstate"
2834                  : "=r" (pstate)
2835                  : "i" (PSTATE_IE));
2836     if (tlb_type == hypervisor) {
2837         sun4v_mmu_demap_all();
2838     } else if (tlb_type == spitfire) {
2839         for (i = 0; i < 64; i++) {
2840             /* Spitfire Errata #32 workaround */
2841             /* NOTE: Always runs on spitfire, so no
2842              *       cheetah+ page size encodings.
2843              */
2844             __asm__ __volatile__("stxa  %0, [%1] %2\n\t"
2845                          "flush %%g6"
2846                          : /* No outputs */
2847                          : "r" (0),
2848                          "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
2849
2850             if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
2851                 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
2852                              "membar #Sync"
2853                              : /* no outputs */
2854                              : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
2855                 spitfire_put_dtlb_data(i, 0x0UL);
2856             }
2857
2858             /* Spitfire Errata #32 workaround */
2859             /* NOTE: Always runs on spitfire, so no
2860              *       cheetah+ page size encodings.
2861              */
2862             __asm__ __volatile__("stxa  %0, [%1] %2\n\t"
2863                          "flush %%g6"
2864                          : /* No outputs */
2865                          : "r" (0),
2866                          "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
2867
2868             if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
2869                 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
2870                              "membar #Sync"
2871                              : /* no outputs */
2872                              : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
2873                 spitfire_put_itlb_data(i, 0x0UL);
2874             }
2875         }
2876     } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
2877         cheetah_flush_dtlb_all();
2878         cheetah_flush_itlb_all();
2879     }
2880     __asm__ __volatile__("wrpr  %0, 0, %%pstate"
2881                  : : "r" (pstate));
2882 }
2883
2884 pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
2885 {
2886     struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2887     pte_t *pte = NULL;
2888
2889     if (page)
2890         pte = (pte_t *) page_address(page);
2891
2892     return pte;
2893 }
2894
2895 pgtable_t pte_alloc_one(struct mm_struct *mm)
2896 {
2897     struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2898     if (!page)
2899         return NULL;
2900     if (!pgtable_pte_page_ctor(page)) {
2901         __free_page(page);
2902         return NULL;
2903     }
2904     return (pte_t *) page_address(page);
2905 }
2906
2907 void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
2908 {
2909     free_page((unsigned long)pte);
2910 }
2911
2912 static void __pte_free(pgtable_t pte)
2913 {
2914     struct page *page = virt_to_page(pte);
2915
2916     pgtable_pte_page_dtor(page);
2917     __free_page(page);
2918 }
2919
2920 void pte_free(struct mm_struct *mm, pgtable_t pte)
2921 {
2922     __pte_free(pte);
2923 }
2924
2925 void pgtable_free(void *table, bool is_page)
2926 {
2927     if (is_page)
2928         __pte_free(table);
2929     else
2930         kmem_cache_free(pgtable_cache, table);
2931 }
2932
2933 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2934 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
2935               pmd_t *pmd)
2936 {
2937     unsigned long pte, flags;
2938     struct mm_struct *mm;
2939     pmd_t entry = *pmd;
2940
2941     if (!pmd_large(entry) || !pmd_young(entry))
2942         return;
2943
2944     pte = pmd_val(entry);
2945
2946     /* Don't insert a non-valid PMD into the TSB, we'll deadlock.  */
2947     if (!(pte & _PAGE_VALID))
2948         return;
2949
2950     /* We are fabricating 8MB pages using 4MB real hw pages.  */
2951     pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
2952
2953     mm = vma->vm_mm;
2954
2955     spin_lock_irqsave(&mm->context.lock, flags);
2956
2957     if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
2958         __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
2959                     addr, pte);
2960
2961     spin_unlock_irqrestore(&mm->context.lock, flags);
2962 }
2963 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2964
2965 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
2966 static void context_reload(void *__data)
2967 {
2968     struct mm_struct *mm = __data;
2969
2970     if (mm == current->mm)
2971         load_secondary_context(mm);
2972 }
2973
2974 void hugetlb_setup(struct pt_regs *regs)
2975 {
2976     struct mm_struct *mm = current->mm;
2977     struct tsb_config *tp;
2978
2979     if (faulthandler_disabled() || !mm) {
2980         const struct exception_table_entry *entry;
2981
2982         entry = search_exception_tables(regs->tpc);
2983         if (entry) {
2984             regs->tpc = entry->fixup;
2985             regs->tnpc = regs->tpc + 4;
2986             return;
2987         }
2988         pr_alert("Unexpected HugeTLB setup in atomic context.\n");
2989         die_if_kernel("HugeTSB in atomic", regs);
2990     }
2991
2992     tp = &mm->context.tsb_block[MM_TSB_HUGE];
2993     if (likely(tp->tsb == NULL))
2994         tsb_grow(mm, MM_TSB_HUGE, 0);
2995
2996     tsb_context_switch(mm);
2997     smp_tsb_sync(mm);
2998
2999     /* On UltraSPARC-III+ and later, configure the second half of
3000      * the Data-TLB for huge pages.
3001      */
3002     if (tlb_type == cheetah_plus) {
3003         bool need_context_reload = false;
3004         unsigned long ctx;
3005
3006         spin_lock_irq(&ctx_alloc_lock);
3007         ctx = mm->context.sparc64_ctx_val;
3008         ctx &= ~CTX_PGSZ_MASK;
3009         ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
3010         ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
3011
3012         if (ctx != mm->context.sparc64_ctx_val) {
3013             /* When changing the page size fields, we
3014              * must perform a context flush so that no
3015              * stale entries match.  This flush must
3016              * occur with the original context register
3017              * settings.
3018              */
3019             do_flush_tlb_mm(mm);
3020
3021             /* Reload the context register of all processors
3022              * also executing in this address space.
3023              */
3024             mm->context.sparc64_ctx_val = ctx;
3025             need_context_reload = true;
3026         }
3027         spin_unlock_irq(&ctx_alloc_lock);
3028
3029         if (need_context_reload)
3030             on_each_cpu(context_reload, mm, 0);
3031     }
3032 }
3033 #endif
3034
3035 static struct resource code_resource = {
3036     .name   = "Kernel code",
3037     .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
3038 };
3039
3040 static struct resource data_resource = {
3041     .name   = "Kernel data",
3042     .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
3043 };
3044
3045 static struct resource bss_resource = {
3046     .name   = "Kernel bss",
3047     .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
3048 };
3049
3050 static inline resource_size_t compute_kern_paddr(void *addr)
3051 {
3052     return (resource_size_t) (addr - KERNBASE + kern_base);
3053 }
3054
3055 static void __init kernel_lds_init(void)
3056 {
3057     code_resource.start = compute_kern_paddr(_text);
3058     code_resource.end   = compute_kern_paddr(_etext - 1);
3059     data_resource.start = compute_kern_paddr(_etext);
3060     data_resource.end   = compute_kern_paddr(_edata - 1);
3061     bss_resource.start  = compute_kern_paddr(__bss_start);
3062     bss_resource.end    = compute_kern_paddr(_end - 1);
3063 }
3064
3065 static int __init report_memory(void)
3066 {
3067     int i;
3068     struct resource *res;
3069
3070     kernel_lds_init();
3071
3072     for (i = 0; i < pavail_ents; i++) {
3073         res = kzalloc(sizeof(struct resource), GFP_KERNEL);
3074
3075         if (!res) {
3076             pr_warn("Failed to allocate source.\n");
3077             break;
3078         }
3079
3080         res->name = "System RAM";
3081         res->start = pavail[i].phys_addr;
3082         res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
3083         res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
3084
3085         if (insert_resource(&iomem_resource, res) < 0) {
3086             pr_warn("Resource insertion failed.\n");
3087             break;
3088         }
3089
3090         insert_resource(res, &code_resource);
3091         insert_resource(res, &data_resource);
3092         insert_resource(res, &bss_resource);
3093     }
3094
3095     return 0;
3096 }
3097 arch_initcall(report_memory);
3098
3099 #ifdef CONFIG_SMP
3100 #define do_flush_tlb_kernel_range   smp_flush_tlb_kernel_range
3101 #else
3102 #define do_flush_tlb_kernel_range   __flush_tlb_kernel_range
3103 #endif
3104
3105 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
3106 {
3107     if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) {
3108         if (start < LOW_OBP_ADDRESS) {
3109             flush_tsb_kernel_range(start, LOW_OBP_ADDRESS);
3110             do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
3111         }
3112         if (end > HI_OBP_ADDRESS) {
3113             flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
3114             do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
3115         }
3116     } else {
3117         flush_tsb_kernel_range(start, end);
3118         do_flush_tlb_kernel_range(start, end);
3119     }
3120 }
3121
3122 void copy_user_highpage(struct page *to, struct page *from,
3123     unsigned long vaddr, struct vm_area_struct *vma)
3124 {
3125     char *vfrom, *vto;
3126
3127     vfrom = kmap_atomic(from);
3128     vto = kmap_atomic(to);
3129     copy_user_page(vto, vfrom, vaddr, to);
3130     kunmap_atomic(vto);
3131     kunmap_atomic(vfrom);
3132
3133     /* If this page has ADI enabled, copy over any ADI tags
3134      * as well
3135      */
3136     if (vma->vm_flags & VM_SPARC_ADI) {
3137         unsigned long pfrom, pto, i, adi_tag;
3138
3139         pfrom = page_to_phys(from);
3140         pto = page_to_phys(to);
3141
3142         for (i = pfrom; i < (pfrom + PAGE_SIZE); i += adi_blksize()) {
3143             asm volatile("ldxa [%1] %2, %0\n\t"
3144                     : "=r" (adi_tag)
3145                     :  "r" (i), "i" (ASI_MCD_REAL));
3146             asm volatile("stxa %0, [%1] %2\n\t"
3147                     :
3148                     : "r" (adi_tag), "r" (pto),
3149                       "i" (ASI_MCD_REAL));
3150             pto += adi_blksize();
3151         }
3152         asm volatile("membar #Sync\n\t");
3153     }
3154 }
3155 EXPORT_SYMBOL(copy_user_highpage);
3156
3157 void copy_highpage(struct page *to, struct page *from)
3158 {
3159     char *vfrom, *vto;
3160
3161     vfrom = kmap_atomic(from);
3162     vto = kmap_atomic(to);
3163     copy_page(vto, vfrom);
3164     kunmap_atomic(vto);
3165     kunmap_atomic(vfrom);
3166
3167     /* If this platform is ADI enabled, copy any ADI tags
3168      * as well
3169      */
3170     if (adi_capable()) {
3171         unsigned long pfrom, pto, i, adi_tag;
3172
3173         pfrom = page_to_phys(from);
3174         pto = page_to_phys(to);
3175
3176         for (i = pfrom; i < (pfrom + PAGE_SIZE); i += adi_blksize()) {
3177             asm volatile("ldxa [%1] %2, %0\n\t"
3178                     : "=r" (adi_tag)
3179                     :  "r" (i), "i" (ASI_MCD_REAL));
3180             asm volatile("stxa %0, [%1] %2\n\t"
3181                     :
3182                     : "r" (adi_tag), "r" (pto),
3183                       "i" (ASI_MCD_REAL));
3184             pto += adi_blksize();
3185         }
3186         asm volatile("membar #Sync\n\t");
3187     }
3188 }
3189 EXPORT_SYMBOL(copy_highpage);
3190
3191 pgprot_t vm_get_page_prot(unsigned long vm_flags)
3192 {
3193     unsigned long prot = pgprot_val(protection_map[vm_flags &
3194                     (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
3195
3196     if (vm_flags & VM_SPARC_ADI)
3197         prot |= _PAGE_MCD_4V;
3198
3199     return __pgprot(prot);
3200 }
3201 EXPORT_SYMBOL(vm_get_page_prot);