Back to home page

LXR

 
 

    


0001 #include <linux/mm.h>
0002 #include <linux/mmzone.h>
0003 #include <linux/bootmem.h>
0004 #include <linux/page_ext.h>
0005 #include <linux/memory.h>
0006 #include <linux/vmalloc.h>
0007 #include <linux/kmemleak.h>
0008 #include <linux/page_owner.h>
0009 #include <linux/page_idle.h>
0010 
0011 /*
0012  * struct page extension
0013  *
0014  * This is the feature to manage memory for extended data per page.
0015  *
0016  * Until now, we must modify struct page itself to store extra data per page.
0017  * This requires rebuilding the kernel and it is really time consuming process.
0018  * And, sometimes, rebuild is impossible due to third party module dependency.
0019  * At last, enlarging struct page could cause un-wanted system behaviour change.
0020  *
0021  * This feature is intended to overcome above mentioned problems. This feature
0022  * allocates memory for extended data per page in certain place rather than
0023  * the struct page itself. This memory can be accessed by the accessor
0024  * functions provided by this code. During the boot process, it checks whether
0025  * allocation of huge chunk of memory is needed or not. If not, it avoids
0026  * allocating memory at all. With this advantage, we can include this feature
0027  * into the kernel in default and can avoid rebuild and solve related problems.
0028  *
0029  * To help these things to work well, there are two callbacks for clients. One
0030  * is the need callback which is mandatory if user wants to avoid useless
0031  * memory allocation at boot-time. The other is optional, init callback, which
0032  * is used to do proper initialization after memory is allocated.
0033  *
0034  * The need callback is used to decide whether extended memory allocation is
0035  * needed or not. Sometimes users want to deactivate some features in this
0036  * boot and extra memory would be unneccessary. In this case, to avoid
0037  * allocating huge chunk of memory, each clients represent their need of
0038  * extra memory through the need callback. If one of the need callbacks
0039  * returns true, it means that someone needs extra memory so that
0040  * page extension core should allocates memory for page extension. If
0041  * none of need callbacks return true, memory isn't needed at all in this boot
0042  * and page extension core can skip to allocate memory. As result,
0043  * none of memory is wasted.
0044  *
0045  * When need callback returns true, page_ext checks if there is a request for
0046  * extra memory through size in struct page_ext_operations. If it is non-zero,
0047  * extra space is allocated for each page_ext entry and offset is returned to
0048  * user through offset in struct page_ext_operations.
0049  *
0050  * The init callback is used to do proper initialization after page extension
0051  * is completely initialized. In sparse memory system, extra memory is
0052  * allocated some time later than memmap is allocated. In other words, lifetime
0053  * of memory for page extension isn't same with memmap for struct page.
0054  * Therefore, clients can't store extra data until page extension is
0055  * initialized, even if pages are allocated and used freely. This could
0056  * cause inadequate state of extra data per page, so, to prevent it, client
0057  * can utilize this callback to initialize the state of it correctly.
0058  */
0059 
0060 static struct page_ext_operations *page_ext_ops[] = {
0061     &debug_guardpage_ops,
0062 #ifdef CONFIG_PAGE_POISONING
0063     &page_poisoning_ops,
0064 #endif
0065 #ifdef CONFIG_PAGE_OWNER
0066     &page_owner_ops,
0067 #endif
0068 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
0069     &page_idle_ops,
0070 #endif
0071 };
0072 
0073 static unsigned long total_usage;
0074 static unsigned long extra_mem;
0075 
0076 static bool __init invoke_need_callbacks(void)
0077 {
0078     int i;
0079     int entries = ARRAY_SIZE(page_ext_ops);
0080     bool need = false;
0081 
0082     for (i = 0; i < entries; i++) {
0083         if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
0084             page_ext_ops[i]->offset = sizeof(struct page_ext) +
0085                         extra_mem;
0086             extra_mem += page_ext_ops[i]->size;
0087             need = true;
0088         }
0089     }
0090 
0091     return need;
0092 }
0093 
0094 static void __init invoke_init_callbacks(void)
0095 {
0096     int i;
0097     int entries = ARRAY_SIZE(page_ext_ops);
0098 
0099     for (i = 0; i < entries; i++) {
0100         if (page_ext_ops[i]->init)
0101             page_ext_ops[i]->init();
0102     }
0103 }
0104 
0105 static unsigned long get_entry_size(void)
0106 {
0107     return sizeof(struct page_ext) + extra_mem;
0108 }
0109 
0110 static inline struct page_ext *get_entry(void *base, unsigned long index)
0111 {
0112     return base + get_entry_size() * index;
0113 }
0114 
0115 #if !defined(CONFIG_SPARSEMEM)
0116 
0117 
0118 void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
0119 {
0120     pgdat->node_page_ext = NULL;
0121 }
0122 
0123 struct page_ext *lookup_page_ext(struct page *page)
0124 {
0125     unsigned long pfn = page_to_pfn(page);
0126     unsigned long index;
0127     struct page_ext *base;
0128 
0129     base = NODE_DATA(page_to_nid(page))->node_page_ext;
0130 #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
0131     /*
0132      * The sanity checks the page allocator does upon freeing a
0133      * page can reach here before the page_ext arrays are
0134      * allocated when feeding a range of pages to the allocator
0135      * for the first time during bootup or memory hotplug.
0136      *
0137      * This check is also necessary for ensuring page poisoning
0138      * works as expected when enabled
0139      */
0140     if (unlikely(!base))
0141         return NULL;
0142 #endif
0143     index = pfn - round_down(node_start_pfn(page_to_nid(page)),
0144                     MAX_ORDER_NR_PAGES);
0145     return get_entry(base, index);
0146 }
0147 
0148 static int __init alloc_node_page_ext(int nid)
0149 {
0150     struct page_ext *base;
0151     unsigned long table_size;
0152     unsigned long nr_pages;
0153 
0154     nr_pages = NODE_DATA(nid)->node_spanned_pages;
0155     if (!nr_pages)
0156         return 0;
0157 
0158     /*
0159      * Need extra space if node range is not aligned with
0160      * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
0161      * checks buddy's status, range could be out of exact node range.
0162      */
0163     if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) ||
0164         !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
0165         nr_pages += MAX_ORDER_NR_PAGES;
0166 
0167     table_size = get_entry_size() * nr_pages;
0168 
0169     base = memblock_virt_alloc_try_nid_nopanic(
0170             table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
0171             BOOTMEM_ALLOC_ACCESSIBLE, nid);
0172     if (!base)
0173         return -ENOMEM;
0174     NODE_DATA(nid)->node_page_ext = base;
0175     total_usage += table_size;
0176     return 0;
0177 }
0178 
0179 void __init page_ext_init_flatmem(void)
0180 {
0181 
0182     int nid, fail;
0183 
0184     if (!invoke_need_callbacks())
0185         return;
0186 
0187     for_each_online_node(nid)  {
0188         fail = alloc_node_page_ext(nid);
0189         if (fail)
0190             goto fail;
0191     }
0192     pr_info("allocated %ld bytes of page_ext\n", total_usage);
0193     invoke_init_callbacks();
0194     return;
0195 
0196 fail:
0197     pr_crit("allocation of page_ext failed.\n");
0198     panic("Out of memory");
0199 }
0200 
0201 #else /* CONFIG_FLAT_NODE_MEM_MAP */
0202 
0203 struct page_ext *lookup_page_ext(struct page *page)
0204 {
0205     unsigned long pfn = page_to_pfn(page);
0206     struct mem_section *section = __pfn_to_section(pfn);
0207 #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
0208     /*
0209      * The sanity checks the page allocator does upon freeing a
0210      * page can reach here before the page_ext arrays are
0211      * allocated when feeding a range of pages to the allocator
0212      * for the first time during bootup or memory hotplug.
0213      *
0214      * This check is also necessary for ensuring page poisoning
0215      * works as expected when enabled
0216      */
0217     if (!section->page_ext)
0218         return NULL;
0219 #endif
0220     return get_entry(section->page_ext, pfn);
0221 }
0222 
0223 static void *__meminit alloc_page_ext(size_t size, int nid)
0224 {
0225     gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
0226     void *addr = NULL;
0227 
0228     addr = alloc_pages_exact_nid(nid, size, flags);
0229     if (addr) {
0230         kmemleak_alloc(addr, size, 1, flags);
0231         return addr;
0232     }
0233 
0234     if (node_state(nid, N_HIGH_MEMORY))
0235         addr = vzalloc_node(size, nid);
0236     else
0237         addr = vzalloc(size);
0238 
0239     return addr;
0240 }
0241 
0242 static int __meminit init_section_page_ext(unsigned long pfn, int nid)
0243 {
0244     struct mem_section *section;
0245     struct page_ext *base;
0246     unsigned long table_size;
0247 
0248     section = __pfn_to_section(pfn);
0249 
0250     if (section->page_ext)
0251         return 0;
0252 
0253     table_size = get_entry_size() * PAGES_PER_SECTION;
0254     base = alloc_page_ext(table_size, nid);
0255 
0256     /*
0257      * The value stored in section->page_ext is (base - pfn)
0258      * and it does not point to the memory block allocated above,
0259      * causing kmemleak false positives.
0260      */
0261     kmemleak_not_leak(base);
0262 
0263     if (!base) {
0264         pr_err("page ext allocation failure\n");
0265         return -ENOMEM;
0266     }
0267 
0268     /*
0269      * The passed "pfn" may not be aligned to SECTION.  For the calculation
0270      * we need to apply a mask.
0271      */
0272     pfn &= PAGE_SECTION_MASK;
0273     section->page_ext = (void *)base - get_entry_size() * pfn;
0274     total_usage += table_size;
0275     return 0;
0276 }
0277 #ifdef CONFIG_MEMORY_HOTPLUG
0278 static void free_page_ext(void *addr)
0279 {
0280     if (is_vmalloc_addr(addr)) {
0281         vfree(addr);
0282     } else {
0283         struct page *page = virt_to_page(addr);
0284         size_t table_size;
0285 
0286         table_size = get_entry_size() * PAGES_PER_SECTION;
0287 
0288         BUG_ON(PageReserved(page));
0289         free_pages_exact(addr, table_size);
0290     }
0291 }
0292 
0293 static void __free_page_ext(unsigned long pfn)
0294 {
0295     struct mem_section *ms;
0296     struct page_ext *base;
0297 
0298     ms = __pfn_to_section(pfn);
0299     if (!ms || !ms->page_ext)
0300         return;
0301     base = get_entry(ms->page_ext, pfn);
0302     free_page_ext(base);
0303     ms->page_ext = NULL;
0304 }
0305 
0306 static int __meminit online_page_ext(unsigned long start_pfn,
0307                 unsigned long nr_pages,
0308                 int nid)
0309 {
0310     unsigned long start, end, pfn;
0311     int fail = 0;
0312 
0313     start = SECTION_ALIGN_DOWN(start_pfn);
0314     end = SECTION_ALIGN_UP(start_pfn + nr_pages);
0315 
0316     if (nid == -1) {
0317         /*
0318          * In this case, "nid" already exists and contains valid memory.
0319          * "start_pfn" passed to us is a pfn which is an arg for
0320          * online__pages(), and start_pfn should exist.
0321          */
0322         nid = pfn_to_nid(start_pfn);
0323         VM_BUG_ON(!node_state(nid, N_ONLINE));
0324     }
0325 
0326     for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
0327         if (!pfn_present(pfn))
0328             continue;
0329         fail = init_section_page_ext(pfn, nid);
0330     }
0331     if (!fail)
0332         return 0;
0333 
0334     /* rollback */
0335     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
0336         __free_page_ext(pfn);
0337 
0338     return -ENOMEM;
0339 }
0340 
0341 static int __meminit offline_page_ext(unsigned long start_pfn,
0342                 unsigned long nr_pages, int nid)
0343 {
0344     unsigned long start, end, pfn;
0345 
0346     start = SECTION_ALIGN_DOWN(start_pfn);
0347     end = SECTION_ALIGN_UP(start_pfn + nr_pages);
0348 
0349     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
0350         __free_page_ext(pfn);
0351     return 0;
0352 
0353 }
0354 
0355 static int __meminit page_ext_callback(struct notifier_block *self,
0356                    unsigned long action, void *arg)
0357 {
0358     struct memory_notify *mn = arg;
0359     int ret = 0;
0360 
0361     switch (action) {
0362     case MEM_GOING_ONLINE:
0363         ret = online_page_ext(mn->start_pfn,
0364                    mn->nr_pages, mn->status_change_nid);
0365         break;
0366     case MEM_OFFLINE:
0367         offline_page_ext(mn->start_pfn,
0368                 mn->nr_pages, mn->status_change_nid);
0369         break;
0370     case MEM_CANCEL_ONLINE:
0371         offline_page_ext(mn->start_pfn,
0372                 mn->nr_pages, mn->status_change_nid);
0373         break;
0374     case MEM_GOING_OFFLINE:
0375         break;
0376     case MEM_ONLINE:
0377     case MEM_CANCEL_OFFLINE:
0378         break;
0379     }
0380 
0381     return notifier_from_errno(ret);
0382 }
0383 
0384 #endif
0385 
0386 void __init page_ext_init(void)
0387 {
0388     unsigned long pfn;
0389     int nid;
0390 
0391     if (!invoke_need_callbacks())
0392         return;
0393 
0394     for_each_node_state(nid, N_MEMORY) {
0395         unsigned long start_pfn, end_pfn;
0396 
0397         start_pfn = node_start_pfn(nid);
0398         end_pfn = node_end_pfn(nid);
0399         /*
0400          * start_pfn and end_pfn may not be aligned to SECTION and the
0401          * page->flags of out of node pages are not initialized.  So we
0402          * scan [start_pfn, the biggest section's pfn < end_pfn) here.
0403          */
0404         for (pfn = start_pfn; pfn < end_pfn;
0405             pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
0406 
0407             if (!pfn_valid(pfn))
0408                 continue;
0409             /*
0410              * Nodes's pfns can be overlapping.
0411              * We know some arch can have a nodes layout such as
0412              * -------------pfn-------------->
0413              * N0 | N1 | N2 | N0 | N1 | N2|....
0414              *
0415              * Take into account DEFERRED_STRUCT_PAGE_INIT.
0416              */
0417             if (early_pfn_to_nid(pfn) != nid)
0418                 continue;
0419             if (init_section_page_ext(pfn, nid))
0420                 goto oom;
0421         }
0422     }
0423     hotplug_memory_notifier(page_ext_callback, 0);
0424     pr_info("allocated %ld bytes of page_ext\n", total_usage);
0425     invoke_init_callbacks();
0426     return;
0427 
0428 oom:
0429     panic("Out of memory");
0430 }
0431 
0432 void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
0433 {
0434 }
0435 
0436 #endif