Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *
0004  *  Copyright (C) 1995  Linus Torvalds
0005  *
0006  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
0007  */
0008 
0009 #include <linux/signal.h>
0010 #include <linux/sched.h>
0011 #include <linux/kernel.h>
0012 #include <linux/errno.h>
0013 #include <linux/string.h>
0014 #include <linux/types.h>
0015 #include <linux/ptrace.h>
0016 #include <linux/mman.h>
0017 #include <linux/mm.h>
0018 #include <linux/hugetlb.h>
0019 #include <linux/swap.h>
0020 #include <linux/smp.h>
0021 #include <linux/init.h>
0022 #include <linux/highmem.h>
0023 #include <linux/pagemap.h>
0024 #include <linux/pci.h>
0025 #include <linux/pfn.h>
0026 #include <linux/poison.h>
0027 #include <linux/memblock.h>
0028 #include <linux/proc_fs.h>
0029 #include <linux/memory_hotplug.h>
0030 #include <linux/initrd.h>
0031 #include <linux/cpumask.h>
0032 #include <linux/gfp.h>
0033 
0034 #include <asm/asm.h>
0035 #include <asm/bios_ebda.h>
0036 #include <asm/processor.h>
0037 #include <linux/uaccess.h>
0038 #include <asm/dma.h>
0039 #include <asm/fixmap.h>
0040 #include <asm/e820/api.h>
0041 #include <asm/apic.h>
0042 #include <asm/bugs.h>
0043 #include <asm/tlb.h>
0044 #include <asm/tlbflush.h>
0045 #include <asm/olpc_ofw.h>
0046 #include <asm/pgalloc.h>
0047 #include <asm/sections.h>
0048 #include <asm/paravirt.h>
0049 #include <asm/setup.h>
0050 #include <asm/set_memory.h>
0051 #include <asm/page_types.h>
0052 #include <asm/cpu_entry_area.h>
0053 #include <asm/init.h>
0054 #include <asm/pgtable_areas.h>
0055 #include <asm/numa.h>
0056 
0057 #include "mm_internal.h"
0058 
0059 unsigned long highstart_pfn, highend_pfn;
0060 
0061 bool __read_mostly __vmalloc_start_set = false;
0062 
0063 /*
0064  * Creates a middle page table and puts a pointer to it in the
0065  * given global directory entry. This only returns the gd entry
0066  * in non-PAE compilation mode, since the middle layer is folded.
0067  */
0068 static pmd_t * __init one_md_table_init(pgd_t *pgd)
0069 {
0070     p4d_t *p4d;
0071     pud_t *pud;
0072     pmd_t *pmd_table;
0073 
0074 #ifdef CONFIG_X86_PAE
0075     if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
0076         pmd_table = (pmd_t *)alloc_low_page();
0077         paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
0078         set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
0079         p4d = p4d_offset(pgd, 0);
0080         pud = pud_offset(p4d, 0);
0081         BUG_ON(pmd_table != pmd_offset(pud, 0));
0082 
0083         return pmd_table;
0084     }
0085 #endif
0086     p4d = p4d_offset(pgd, 0);
0087     pud = pud_offset(p4d, 0);
0088     pmd_table = pmd_offset(pud, 0);
0089 
0090     return pmd_table;
0091 }
0092 
0093 /*
0094  * Create a page table and place a pointer to it in a middle page
0095  * directory entry:
0096  */
0097 static pte_t * __init one_page_table_init(pmd_t *pmd)
0098 {
0099     if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
0100         pte_t *page_table = (pte_t *)alloc_low_page();
0101 
0102         paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
0103         set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
0104         BUG_ON(page_table != pte_offset_kernel(pmd, 0));
0105     }
0106 
0107     return pte_offset_kernel(pmd, 0);
0108 }
0109 
0110 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
0111 {
0112     int pgd_idx = pgd_index(vaddr);
0113     int pmd_idx = pmd_index(vaddr);
0114 
0115     return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
0116 }
0117 
0118 pte_t * __init populate_extra_pte(unsigned long vaddr)
0119 {
0120     int pte_idx = pte_index(vaddr);
0121     pmd_t *pmd;
0122 
0123     pmd = populate_extra_pmd(vaddr);
0124     return one_page_table_init(pmd) + pte_idx;
0125 }
0126 
0127 static unsigned long __init
0128 page_table_range_init_count(unsigned long start, unsigned long end)
0129 {
0130     unsigned long count = 0;
0131 #ifdef CONFIG_HIGHMEM
0132     int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
0133     int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
0134     int pgd_idx, pmd_idx;
0135     unsigned long vaddr;
0136 
0137     if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
0138         return 0;
0139 
0140     vaddr = start;
0141     pgd_idx = pgd_index(vaddr);
0142     pmd_idx = pmd_index(vaddr);
0143 
0144     for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
0145         for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
0146                             pmd_idx++) {
0147             if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
0148                 (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
0149                 count++;
0150             vaddr += PMD_SIZE;
0151         }
0152         pmd_idx = 0;
0153     }
0154 #endif
0155     return count;
0156 }
0157 
0158 static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
0159                        unsigned long vaddr, pte_t *lastpte,
0160                        void **adr)
0161 {
0162 #ifdef CONFIG_HIGHMEM
0163     /*
0164      * Something (early fixmap) may already have put a pte
0165      * page here, which causes the page table allocation
0166      * to become nonlinear. Attempt to fix it, and if it
0167      * is still nonlinear then we have to bug.
0168      */
0169     int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
0170     int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
0171 
0172     if (pmd_idx_kmap_begin != pmd_idx_kmap_end
0173         && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
0174         && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
0175         pte_t *newpte;
0176         int i;
0177 
0178         BUG_ON(after_bootmem);
0179         newpte = *adr;
0180         for (i = 0; i < PTRS_PER_PTE; i++)
0181             set_pte(newpte + i, pte[i]);
0182         *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
0183 
0184         paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
0185         set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
0186         BUG_ON(newpte != pte_offset_kernel(pmd, 0));
0187         __flush_tlb_all();
0188 
0189         paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
0190         pte = newpte;
0191     }
0192     BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
0193            && vaddr > fix_to_virt(FIX_KMAP_END)
0194            && lastpte && lastpte + PTRS_PER_PTE != pte);
0195 #endif
0196     return pte;
0197 }
0198 
0199 /*
0200  * This function initializes a certain range of kernel virtual memory
0201  * with new bootmem page tables, everywhere page tables are missing in
0202  * the given range.
0203  *
0204  * NOTE: The pagetables are allocated contiguous on the physical space
0205  * so we can cache the place of the first one and move around without
0206  * checking the pgd every time.
0207  */
0208 static void __init
0209 page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
0210 {
0211     int pgd_idx, pmd_idx;
0212     unsigned long vaddr;
0213     pgd_t *pgd;
0214     pmd_t *pmd;
0215     pte_t *pte = NULL;
0216     unsigned long count = page_table_range_init_count(start, end);
0217     void *adr = NULL;
0218 
0219     if (count)
0220         adr = alloc_low_pages(count);
0221 
0222     vaddr = start;
0223     pgd_idx = pgd_index(vaddr);
0224     pmd_idx = pmd_index(vaddr);
0225     pgd = pgd_base + pgd_idx;
0226 
0227     for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
0228         pmd = one_md_table_init(pgd);
0229         pmd = pmd + pmd_index(vaddr);
0230         for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
0231                             pmd++, pmd_idx++) {
0232             pte = page_table_kmap_check(one_page_table_init(pmd),
0233                             pmd, vaddr, pte, &adr);
0234 
0235             vaddr += PMD_SIZE;
0236         }
0237         pmd_idx = 0;
0238     }
0239 }
0240 
0241 static inline int is_x86_32_kernel_text(unsigned long addr)
0242 {
0243     if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
0244         return 1;
0245     return 0;
0246 }
0247 
0248 /*
0249  * This maps the physical memory to kernel virtual address space, a total
0250  * of max_low_pfn pages, by creating page tables starting from address
0251  * PAGE_OFFSET:
0252  */
0253 unsigned long __init
0254 kernel_physical_mapping_init(unsigned long start,
0255                  unsigned long end,
0256                  unsigned long page_size_mask,
0257                  pgprot_t prot)
0258 {
0259     int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
0260     unsigned long last_map_addr = end;
0261     unsigned long start_pfn, end_pfn;
0262     pgd_t *pgd_base = swapper_pg_dir;
0263     int pgd_idx, pmd_idx, pte_ofs;
0264     unsigned long pfn;
0265     pgd_t *pgd;
0266     pmd_t *pmd;
0267     pte_t *pte;
0268     unsigned pages_2m, pages_4k;
0269     int mapping_iter;
0270 
0271     start_pfn = start >> PAGE_SHIFT;
0272     end_pfn = end >> PAGE_SHIFT;
0273 
0274     /*
0275      * First iteration will setup identity mapping using large/small pages
0276      * based on use_pse, with other attributes same as set by
0277      * the early code in head_32.S
0278      *
0279      * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
0280      * as desired for the kernel identity mapping.
0281      *
0282      * This two pass mechanism conforms to the TLB app note which says:
0283      *
0284      *     "Software should not write to a paging-structure entry in a way
0285      *      that would change, for any linear address, both the page size
0286      *      and either the page frame or attributes."
0287      */
0288     mapping_iter = 1;
0289 
0290     if (!boot_cpu_has(X86_FEATURE_PSE))
0291         use_pse = 0;
0292 
0293 repeat:
0294     pages_2m = pages_4k = 0;
0295     pfn = start_pfn;
0296     pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
0297     pgd = pgd_base + pgd_idx;
0298     for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
0299         pmd = one_md_table_init(pgd);
0300 
0301         if (pfn >= end_pfn)
0302             continue;
0303 #ifdef CONFIG_X86_PAE
0304         pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
0305         pmd += pmd_idx;
0306 #else
0307         pmd_idx = 0;
0308 #endif
0309         for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
0310              pmd++, pmd_idx++) {
0311             unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
0312 
0313             /*
0314              * Map with big pages if possible, otherwise
0315              * create normal page tables:
0316              */
0317             if (use_pse) {
0318                 unsigned int addr2;
0319                 pgprot_t prot = PAGE_KERNEL_LARGE;
0320                 /*
0321                  * first pass will use the same initial
0322                  * identity mapping attribute + _PAGE_PSE.
0323                  */
0324                 pgprot_t init_prot =
0325                     __pgprot(PTE_IDENT_ATTR |
0326                          _PAGE_PSE);
0327 
0328                 pfn &= PMD_MASK >> PAGE_SHIFT;
0329                 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
0330                     PAGE_OFFSET + PAGE_SIZE-1;
0331 
0332                 if (is_x86_32_kernel_text(addr) ||
0333                     is_x86_32_kernel_text(addr2))
0334                     prot = PAGE_KERNEL_LARGE_EXEC;
0335 
0336                 pages_2m++;
0337                 if (mapping_iter == 1)
0338                     set_pmd(pmd, pfn_pmd(pfn, init_prot));
0339                 else
0340                     set_pmd(pmd, pfn_pmd(pfn, prot));
0341 
0342                 pfn += PTRS_PER_PTE;
0343                 continue;
0344             }
0345             pte = one_page_table_init(pmd);
0346 
0347             pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
0348             pte += pte_ofs;
0349             for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
0350                  pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
0351                 pgprot_t prot = PAGE_KERNEL;
0352                 /*
0353                  * first pass will use the same initial
0354                  * identity mapping attribute.
0355                  */
0356                 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
0357 
0358                 if (is_x86_32_kernel_text(addr))
0359                     prot = PAGE_KERNEL_EXEC;
0360 
0361                 pages_4k++;
0362                 if (mapping_iter == 1) {
0363                     set_pte(pte, pfn_pte(pfn, init_prot));
0364                     last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
0365                 } else
0366                     set_pte(pte, pfn_pte(pfn, prot));
0367             }
0368         }
0369     }
0370     if (mapping_iter == 1) {
0371         /*
0372          * update direct mapping page count only in the first
0373          * iteration.
0374          */
0375         update_page_count(PG_LEVEL_2M, pages_2m);
0376         update_page_count(PG_LEVEL_4K, pages_4k);
0377 
0378         /*
0379          * local global flush tlb, which will flush the previous
0380          * mappings present in both small and large page TLB's.
0381          */
0382         __flush_tlb_all();
0383 
0384         /*
0385          * Second iteration will set the actual desired PTE attributes.
0386          */
0387         mapping_iter = 2;
0388         goto repeat;
0389     }
0390     return last_map_addr;
0391 }
0392 
0393 #ifdef CONFIG_HIGHMEM
0394 static void __init permanent_kmaps_init(pgd_t *pgd_base)
0395 {
0396     unsigned long vaddr = PKMAP_BASE;
0397 
0398     page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
0399 
0400     pkmap_page_table = virt_to_kpte(vaddr);
0401 }
0402 
0403 void __init add_highpages_with_active_regions(int nid,
0404              unsigned long start_pfn, unsigned long end_pfn)
0405 {
0406     phys_addr_t start, end;
0407     u64 i;
0408 
0409     for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
0410         unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
0411                         start_pfn, end_pfn);
0412         unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
0413                           start_pfn, end_pfn);
0414         for ( ; pfn < e_pfn; pfn++)
0415             if (pfn_valid(pfn))
0416                 free_highmem_page(pfn_to_page(pfn));
0417     }
0418 }
0419 #else
0420 static inline void permanent_kmaps_init(pgd_t *pgd_base)
0421 {
0422 }
0423 #endif /* CONFIG_HIGHMEM */
0424 
0425 void __init sync_initial_page_table(void)
0426 {
0427     clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
0428             swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
0429             KERNEL_PGD_PTRS);
0430 
0431     /*
0432      * sync back low identity map too.  It is used for example
0433      * in the 32-bit EFI stub.
0434      */
0435     clone_pgd_range(initial_page_table,
0436             swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
0437             min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
0438 }
0439 
0440 void __init native_pagetable_init(void)
0441 {
0442     unsigned long pfn, va;
0443     pgd_t *pgd, *base = swapper_pg_dir;
0444     p4d_t *p4d;
0445     pud_t *pud;
0446     pmd_t *pmd;
0447     pte_t *pte;
0448 
0449     /*
0450      * Remove any mappings which extend past the end of physical
0451      * memory from the boot time page table.
0452      * In virtual address space, we should have at least two pages
0453      * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
0454      * definition. And max_low_pfn is set to VMALLOC_END physical
0455      * address. If initial memory mapping is doing right job, we
0456      * should have pte used near max_low_pfn or one pmd is not present.
0457      */
0458     for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
0459         va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
0460         pgd = base + pgd_index(va);
0461         if (!pgd_present(*pgd))
0462             break;
0463 
0464         p4d = p4d_offset(pgd, va);
0465         pud = pud_offset(p4d, va);
0466         pmd = pmd_offset(pud, va);
0467         if (!pmd_present(*pmd))
0468             break;
0469 
0470         /* should not be large page here */
0471         if (pmd_large(*pmd)) {
0472             pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
0473                 pfn, pmd, __pa(pmd));
0474             BUG_ON(1);
0475         }
0476 
0477         pte = pte_offset_kernel(pmd, va);
0478         if (!pte_present(*pte))
0479             break;
0480 
0481         printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
0482                 pfn, pmd, __pa(pmd), pte, __pa(pte));
0483         pte_clear(NULL, va, pte);
0484     }
0485     paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
0486     paging_init();
0487 }
0488 
0489 /*
0490  * Build a proper pagetable for the kernel mappings.  Up until this
0491  * point, we've been running on some set of pagetables constructed by
0492  * the boot process.
0493  *
0494  * If we're booting on native hardware, this will be a pagetable
0495  * constructed in arch/x86/kernel/head_32.S.  The root of the
0496  * pagetable will be swapper_pg_dir.
0497  *
0498  * If we're booting paravirtualized under a hypervisor, then there are
0499  * more options: we may already be running PAE, and the pagetable may
0500  * or may not be based in swapper_pg_dir.  In any case,
0501  * paravirt_pagetable_init() will set up swapper_pg_dir
0502  * appropriately for the rest of the initialization to work.
0503  *
0504  * In general, pagetable_init() assumes that the pagetable may already
0505  * be partially populated, and so it avoids stomping on any existing
0506  * mappings.
0507  */
0508 void __init early_ioremap_page_table_range_init(void)
0509 {
0510     pgd_t *pgd_base = swapper_pg_dir;
0511     unsigned long vaddr, end;
0512 
0513     /*
0514      * Fixed mappings, only the page table structure has to be
0515      * created - mappings will be set by set_fixmap():
0516      */
0517     vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
0518     end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
0519     page_table_range_init(vaddr, end, pgd_base);
0520     early_ioremap_reset();
0521 }
0522 
0523 static void __init pagetable_init(void)
0524 {
0525     pgd_t *pgd_base = swapper_pg_dir;
0526 
0527     permanent_kmaps_init(pgd_base);
0528 }
0529 
0530 #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
0531 /* Bits supported by the hardware: */
0532 pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
0533 /* Bits allowed in normal kernel mappings: */
0534 pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
0535 EXPORT_SYMBOL_GPL(__supported_pte_mask);
0536 /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
0537 EXPORT_SYMBOL(__default_kernel_pte_mask);
0538 
0539 /* user-defined highmem size */
0540 static unsigned int highmem_pages = -1;
0541 
0542 /*
0543  * highmem=size forces highmem to be exactly 'size' bytes.
0544  * This works even on boxes that have no highmem otherwise.
0545  * This also works to reduce highmem size on bigger boxes.
0546  */
0547 static int __init parse_highmem(char *arg)
0548 {
0549     if (!arg)
0550         return -EINVAL;
0551 
0552     highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
0553     return 0;
0554 }
0555 early_param("highmem", parse_highmem);
0556 
0557 #define MSG_HIGHMEM_TOO_BIG \
0558     "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
0559 
0560 #define MSG_LOWMEM_TOO_SMALL \
0561     "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
0562 /*
0563  * All of RAM fits into lowmem - but if user wants highmem
0564  * artificially via the highmem=x boot parameter then create
0565  * it:
0566  */
0567 static void __init lowmem_pfn_init(void)
0568 {
0569     /* max_low_pfn is 0, we already have early_res support */
0570     max_low_pfn = max_pfn;
0571 
0572     if (highmem_pages == -1)
0573         highmem_pages = 0;
0574 #ifdef CONFIG_HIGHMEM
0575     if (highmem_pages >= max_pfn) {
0576         printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
0577             pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
0578         highmem_pages = 0;
0579     }
0580     if (highmem_pages) {
0581         if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
0582             printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
0583                 pages_to_mb(highmem_pages));
0584             highmem_pages = 0;
0585         }
0586         max_low_pfn -= highmem_pages;
0587     }
0588 #else
0589     if (highmem_pages)
0590         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
0591 #endif
0592 }
0593 
0594 #define MSG_HIGHMEM_TOO_SMALL \
0595     "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
0596 
0597 #define MSG_HIGHMEM_TRIMMED \
0598     "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
0599 /*
0600  * We have more RAM than fits into lowmem - we try to put it into
0601  * highmem, also taking the highmem=x boot parameter into account:
0602  */
0603 static void __init highmem_pfn_init(void)
0604 {
0605     max_low_pfn = MAXMEM_PFN;
0606 
0607     if (highmem_pages == -1)
0608         highmem_pages = max_pfn - MAXMEM_PFN;
0609 
0610     if (highmem_pages + MAXMEM_PFN < max_pfn)
0611         max_pfn = MAXMEM_PFN + highmem_pages;
0612 
0613     if (highmem_pages + MAXMEM_PFN > max_pfn) {
0614         printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
0615             pages_to_mb(max_pfn - MAXMEM_PFN),
0616             pages_to_mb(highmem_pages));
0617         highmem_pages = 0;
0618     }
0619 #ifndef CONFIG_HIGHMEM
0620     /* Maximum memory usable is what is directly addressable */
0621     printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
0622     if (max_pfn > MAX_NONPAE_PFN)
0623         printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
0624     else
0625         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
0626     max_pfn = MAXMEM_PFN;
0627 #else /* !CONFIG_HIGHMEM */
0628 #ifndef CONFIG_HIGHMEM64G
0629     if (max_pfn > MAX_NONPAE_PFN) {
0630         max_pfn = MAX_NONPAE_PFN;
0631         printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
0632     }
0633 #endif /* !CONFIG_HIGHMEM64G */
0634 #endif /* !CONFIG_HIGHMEM */
0635 }
0636 
0637 /*
0638  * Determine low and high memory ranges:
0639  */
0640 void __init find_low_pfn_range(void)
0641 {
0642     /* it could update max_pfn */
0643 
0644     if (max_pfn <= MAXMEM_PFN)
0645         lowmem_pfn_init();
0646     else
0647         highmem_pfn_init();
0648 }
0649 
0650 #ifndef CONFIG_NUMA
0651 void __init initmem_init(void)
0652 {
0653 #ifdef CONFIG_HIGHMEM
0654     highstart_pfn = highend_pfn = max_pfn;
0655     if (max_pfn > max_low_pfn)
0656         highstart_pfn = max_low_pfn;
0657     printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
0658         pages_to_mb(highend_pfn - highstart_pfn));
0659     high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
0660 #else
0661     high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
0662 #endif
0663 
0664     memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
0665 
0666 #ifdef CONFIG_FLATMEM
0667     max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
0668 #endif
0669     __vmalloc_start_set = true;
0670 
0671     printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
0672             pages_to_mb(max_low_pfn));
0673 
0674     setup_bootmem_allocator();
0675 }
0676 #endif /* !CONFIG_NUMA */
0677 
0678 void __init setup_bootmem_allocator(void)
0679 {
0680     printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
0681          max_pfn_mapped<<PAGE_SHIFT);
0682     printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
0683 }
0684 
0685 /*
0686  * paging_init() sets up the page tables - note that the first 8MB are
0687  * already mapped by head.S.
0688  *
0689  * This routines also unmaps the page at virtual kernel address 0, so
0690  * that we can trap those pesky NULL-reference errors in the kernel.
0691  */
0692 void __init paging_init(void)
0693 {
0694     pagetable_init();
0695 
0696     __flush_tlb_all();
0697 
0698     /*
0699      * NOTE: at this point the bootmem allocator is fully available.
0700      */
0701     olpc_dt_build_devicetree();
0702     sparse_init();
0703     zone_sizes_init();
0704 }
0705 
0706 /*
0707  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
0708  * and also on some strange 486's. All 586+'s are OK. This used to involve
0709  * black magic jumps to work around some nasty CPU bugs, but fortunately the
0710  * switch to using exceptions got rid of all that.
0711  */
0712 static void __init test_wp_bit(void)
0713 {
0714     char z = 0;
0715 
0716     printk(KERN_INFO "Checking if this processor honours the WP bit even in supervisor mode...");
0717 
0718     __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
0719 
0720     if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) {
0721         clear_fixmap(FIX_WP_TEST);
0722         printk(KERN_CONT "Ok.\n");
0723         return;
0724     }
0725 
0726     printk(KERN_CONT "No.\n");
0727     panic("Linux doesn't support CPUs with broken WP.");
0728 }
0729 
0730 void __init mem_init(void)
0731 {
0732     pci_iommu_alloc();
0733 
0734 #ifdef CONFIG_FLATMEM
0735     BUG_ON(!mem_map);
0736 #endif
0737     /*
0738      * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
0739      * be done before memblock_free_all(). Memblock use free low memory for
0740      * temporary data (see find_range_array()) and for this purpose can use
0741      * pages that was already passed to the buddy allocator, hence marked as
0742      * not accessible in the page tables when compiled with
0743      * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
0744      * important here.
0745      */
0746     set_highmem_pages_init();
0747 
0748     /* this will put all low memory onto the freelists */
0749     memblock_free_all();
0750 
0751     after_bootmem = 1;
0752     x86_init.hyper.init_after_bootmem();
0753 
0754     /*
0755      * Check boundaries twice: Some fundamental inconsistencies can
0756      * be detected at build time already.
0757      */
0758 #define __FIXADDR_TOP (-PAGE_SIZE)
0759 #ifdef CONFIG_HIGHMEM
0760     BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE  > FIXADDR_START);
0761     BUILD_BUG_ON(VMALLOC_END            > PKMAP_BASE);
0762 #endif
0763 #define high_memory (-128UL << 20)
0764     BUILD_BUG_ON(VMALLOC_START          >= VMALLOC_END);
0765 #undef high_memory
0766 #undef __FIXADDR_TOP
0767 
0768 #ifdef CONFIG_HIGHMEM
0769     BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE    > FIXADDR_START);
0770     BUG_ON(VMALLOC_END              > PKMAP_BASE);
0771 #endif
0772     BUG_ON(VMALLOC_START                >= VMALLOC_END);
0773     BUG_ON((unsigned long)high_memory       > VMALLOC_START);
0774 
0775     test_wp_bit();
0776 }
0777 
0778 int kernel_set_to_readonly __read_mostly;
0779 
0780 static void mark_nxdata_nx(void)
0781 {
0782     /*
0783      * When this called, init has already been executed and released,
0784      * so everything past _etext should be NX.
0785      */
0786     unsigned long start = PFN_ALIGN(_etext);
0787     /*
0788      * This comes from is_x86_32_kernel_text upper limit. Also HPAGE where used:
0789      */
0790     unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
0791 
0792     if (__supported_pte_mask & _PAGE_NX)
0793         printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
0794     set_memory_nx(start, size >> PAGE_SHIFT);
0795 }
0796 
0797 void mark_rodata_ro(void)
0798 {
0799     unsigned long start = PFN_ALIGN(_text);
0800     unsigned long size = (unsigned long)__end_rodata - start;
0801 
0802     set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
0803     pr_info("Write protecting kernel text and read-only data: %luk\n",
0804         size >> 10);
0805 
0806     kernel_set_to_readonly = 1;
0807 
0808 #ifdef CONFIG_CPA_DEBUG
0809     pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
0810     set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
0811 
0812     pr_info("Testing CPA: write protecting again\n");
0813     set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
0814 #endif
0815     mark_nxdata_nx();
0816     if (__supported_pte_mask & _PAGE_NX)
0817         debug_checkwx();
0818 }