Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/mm.h>
0003 #include <linux/mmzone.h>
0004 #include <linux/memblock.h>
0005 #include <linux/page_ext.h>
0006 #include <linux/memory.h>
0007 #include <linux/vmalloc.h>
0008 #include <linux/kmemleak.h>
0009 #include <linux/page_owner.h>
0010 #include <linux/page_idle.h>
0011 #include <linux/page_table_check.h>
0012 
0013 /*
0014  * struct page extension
0015  *
0016  * This is the feature to manage memory for extended data per page.
0017  *
0018  * Until now, we must modify struct page itself to store extra data per page.
0019  * This requires rebuilding the kernel and it is really time consuming process.
0020  * And, sometimes, rebuild is impossible due to third party module dependency.
0021  * At last, enlarging struct page could cause un-wanted system behaviour change.
0022  *
0023  * This feature is intended to overcome above mentioned problems. This feature
0024  * allocates memory for extended data per page in certain place rather than
0025  * the struct page itself. This memory can be accessed by the accessor
0026  * functions provided by this code. During the boot process, it checks whether
0027  * allocation of huge chunk of memory is needed or not. If not, it avoids
0028  * allocating memory at all. With this advantage, we can include this feature
0029  * into the kernel in default and can avoid rebuild and solve related problems.
0030  *
0031  * To help these things to work well, there are two callbacks for clients. One
0032  * is the need callback which is mandatory if user wants to avoid useless
0033  * memory allocation at boot-time. The other is optional, init callback, which
0034  * is used to do proper initialization after memory is allocated.
0035  *
0036  * The need callback is used to decide whether extended memory allocation is
0037  * needed or not. Sometimes users want to deactivate some features in this
0038  * boot and extra memory would be unnecessary. In this case, to avoid
0039  * allocating huge chunk of memory, each clients represent their need of
0040  * extra memory through the need callback. If one of the need callbacks
0041  * returns true, it means that someone needs extra memory so that
0042  * page extension core should allocates memory for page extension. If
0043  * none of need callbacks return true, memory isn't needed at all in this boot
0044  * and page extension core can skip to allocate memory. As result,
0045  * none of memory is wasted.
0046  *
0047  * When need callback returns true, page_ext checks if there is a request for
0048  * extra memory through size in struct page_ext_operations. If it is non-zero,
0049  * extra space is allocated for each page_ext entry and offset is returned to
0050  * user through offset in struct page_ext_operations.
0051  *
0052  * The init callback is used to do proper initialization after page extension
0053  * is completely initialized. In sparse memory system, extra memory is
0054  * allocated some time later than memmap is allocated. In other words, lifetime
0055  * of memory for page extension isn't same with memmap for struct page.
0056  * Therefore, clients can't store extra data until page extension is
0057  * initialized, even if pages are allocated and used freely. This could
0058  * cause inadequate state of extra data per page, so, to prevent it, client
0059  * can utilize this callback to initialize the state of it correctly.
0060  */
0061 
0062 #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
0063 static bool need_page_idle(void)
0064 {
0065     return true;
0066 }
0067 static struct page_ext_operations page_idle_ops __initdata = {
0068     .need = need_page_idle,
0069 };
0070 #endif
0071 
0072 static struct page_ext_operations *page_ext_ops[] __initdata = {
0073 #ifdef CONFIG_PAGE_OWNER
0074     &page_owner_ops,
0075 #endif
0076 #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
0077     &page_idle_ops,
0078 #endif
0079 #ifdef CONFIG_PAGE_TABLE_CHECK
0080     &page_table_check_ops,
0081 #endif
0082 };
0083 
0084 unsigned long page_ext_size = sizeof(struct page_ext);
0085 
0086 static unsigned long total_usage;
0087 
0088 static bool __init invoke_need_callbacks(void)
0089 {
0090     int i;
0091     int entries = ARRAY_SIZE(page_ext_ops);
0092     bool need = false;
0093 
0094     for (i = 0; i < entries; i++) {
0095         if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
0096             page_ext_ops[i]->offset = page_ext_size;
0097             page_ext_size += page_ext_ops[i]->size;
0098             need = true;
0099         }
0100     }
0101 
0102     return need;
0103 }
0104 
0105 static void __init invoke_init_callbacks(void)
0106 {
0107     int i;
0108     int entries = ARRAY_SIZE(page_ext_ops);
0109 
0110     for (i = 0; i < entries; i++) {
0111         if (page_ext_ops[i]->init)
0112             page_ext_ops[i]->init();
0113     }
0114 }
0115 
0116 #ifndef CONFIG_SPARSEMEM
0117 void __init page_ext_init_flatmem_late(void)
0118 {
0119     invoke_init_callbacks();
0120 }
0121 #endif
0122 
0123 static inline struct page_ext *get_entry(void *base, unsigned long index)
0124 {
0125     return base + page_ext_size * index;
0126 }
0127 
0128 #ifndef CONFIG_SPARSEMEM
0129 
0130 
0131 void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
0132 {
0133     pgdat->node_page_ext = NULL;
0134 }
0135 
0136 struct page_ext *lookup_page_ext(const struct page *page)
0137 {
0138     unsigned long pfn = page_to_pfn(page);
0139     unsigned long index;
0140     struct page_ext *base;
0141 
0142     base = NODE_DATA(page_to_nid(page))->node_page_ext;
0143     /*
0144      * The sanity checks the page allocator does upon freeing a
0145      * page can reach here before the page_ext arrays are
0146      * allocated when feeding a range of pages to the allocator
0147      * for the first time during bootup or memory hotplug.
0148      */
0149     if (unlikely(!base))
0150         return NULL;
0151     index = pfn - round_down(node_start_pfn(page_to_nid(page)),
0152                     MAX_ORDER_NR_PAGES);
0153     return get_entry(base, index);
0154 }
0155 
0156 static int __init alloc_node_page_ext(int nid)
0157 {
0158     struct page_ext *base;
0159     unsigned long table_size;
0160     unsigned long nr_pages;
0161 
0162     nr_pages = NODE_DATA(nid)->node_spanned_pages;
0163     if (!nr_pages)
0164         return 0;
0165 
0166     /*
0167      * Need extra space if node range is not aligned with
0168      * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
0169      * checks buddy's status, range could be out of exact node range.
0170      */
0171     if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) ||
0172         !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
0173         nr_pages += MAX_ORDER_NR_PAGES;
0174 
0175     table_size = page_ext_size * nr_pages;
0176 
0177     base = memblock_alloc_try_nid(
0178             table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
0179             MEMBLOCK_ALLOC_ACCESSIBLE, nid);
0180     if (!base)
0181         return -ENOMEM;
0182     NODE_DATA(nid)->node_page_ext = base;
0183     total_usage += table_size;
0184     return 0;
0185 }
0186 
0187 void __init page_ext_init_flatmem(void)
0188 {
0189 
0190     int nid, fail;
0191 
0192     if (!invoke_need_callbacks())
0193         return;
0194 
0195     for_each_online_node(nid)  {
0196         fail = alloc_node_page_ext(nid);
0197         if (fail)
0198             goto fail;
0199     }
0200     pr_info("allocated %ld bytes of page_ext\n", total_usage);
0201     return;
0202 
0203 fail:
0204     pr_crit("allocation of page_ext failed.\n");
0205     panic("Out of memory");
0206 }
0207 
0208 #else /* CONFIG_SPARSEMEM */
0209 
0210 struct page_ext *lookup_page_ext(const struct page *page)
0211 {
0212     unsigned long pfn = page_to_pfn(page);
0213     struct mem_section *section = __pfn_to_section(pfn);
0214     /*
0215      * The sanity checks the page allocator does upon freeing a
0216      * page can reach here before the page_ext arrays are
0217      * allocated when feeding a range of pages to the allocator
0218      * for the first time during bootup or memory hotplug.
0219      */
0220     if (!section->page_ext)
0221         return NULL;
0222     return get_entry(section->page_ext, pfn);
0223 }
0224 
0225 static void *__meminit alloc_page_ext(size_t size, int nid)
0226 {
0227     gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
0228     void *addr = NULL;
0229 
0230     addr = alloc_pages_exact_nid(nid, size, flags);
0231     if (addr) {
0232         kmemleak_alloc(addr, size, 1, flags);
0233         return addr;
0234     }
0235 
0236     addr = vzalloc_node(size, nid);
0237 
0238     return addr;
0239 }
0240 
0241 static int __meminit init_section_page_ext(unsigned long pfn, int nid)
0242 {
0243     struct mem_section *section;
0244     struct page_ext *base;
0245     unsigned long table_size;
0246 
0247     section = __pfn_to_section(pfn);
0248 
0249     if (section->page_ext)
0250         return 0;
0251 
0252     table_size = page_ext_size * PAGES_PER_SECTION;
0253     base = alloc_page_ext(table_size, nid);
0254 
0255     /*
0256      * The value stored in section->page_ext is (base - pfn)
0257      * and it does not point to the memory block allocated above,
0258      * causing kmemleak false positives.
0259      */
0260     kmemleak_not_leak(base);
0261 
0262     if (!base) {
0263         pr_err("page ext allocation failure\n");
0264         return -ENOMEM;
0265     }
0266 
0267     /*
0268      * The passed "pfn" may not be aligned to SECTION.  For the calculation
0269      * we need to apply a mask.
0270      */
0271     pfn &= PAGE_SECTION_MASK;
0272     section->page_ext = (void *)base - page_ext_size * pfn;
0273     total_usage += table_size;
0274     return 0;
0275 }
0276 
0277 static void free_page_ext(void *addr)
0278 {
0279     if (is_vmalloc_addr(addr)) {
0280         vfree(addr);
0281     } else {
0282         struct page *page = virt_to_page(addr);
0283         size_t table_size;
0284 
0285         table_size = page_ext_size * PAGES_PER_SECTION;
0286 
0287         BUG_ON(PageReserved(page));
0288         kmemleak_free(addr);
0289         free_pages_exact(addr, table_size);
0290     }
0291 }
0292 
0293 static void __free_page_ext(unsigned long pfn)
0294 {
0295     struct mem_section *ms;
0296     struct page_ext *base;
0297 
0298     ms = __pfn_to_section(pfn);
0299     if (!ms || !ms->page_ext)
0300         return;
0301     base = get_entry(ms->page_ext, pfn);
0302     free_page_ext(base);
0303     ms->page_ext = NULL;
0304 }
0305 
0306 static int __meminit online_page_ext(unsigned long start_pfn,
0307                 unsigned long nr_pages,
0308                 int nid)
0309 {
0310     unsigned long start, end, pfn;
0311     int fail = 0;
0312 
0313     start = SECTION_ALIGN_DOWN(start_pfn);
0314     end = SECTION_ALIGN_UP(start_pfn + nr_pages);
0315 
0316     if (nid == NUMA_NO_NODE) {
0317         /*
0318          * In this case, "nid" already exists and contains valid memory.
0319          * "start_pfn" passed to us is a pfn which is an arg for
0320          * online__pages(), and start_pfn should exist.
0321          */
0322         nid = pfn_to_nid(start_pfn);
0323         VM_BUG_ON(!node_online(nid));
0324     }
0325 
0326     for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
0327         fail = init_section_page_ext(pfn, nid);
0328     if (!fail)
0329         return 0;
0330 
0331     /* rollback */
0332     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
0333         __free_page_ext(pfn);
0334 
0335     return -ENOMEM;
0336 }
0337 
0338 static int __meminit offline_page_ext(unsigned long start_pfn,
0339                 unsigned long nr_pages, int nid)
0340 {
0341     unsigned long start, end, pfn;
0342 
0343     start = SECTION_ALIGN_DOWN(start_pfn);
0344     end = SECTION_ALIGN_UP(start_pfn + nr_pages);
0345 
0346     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
0347         __free_page_ext(pfn);
0348     return 0;
0349 
0350 }
0351 
0352 static int __meminit page_ext_callback(struct notifier_block *self,
0353                    unsigned long action, void *arg)
0354 {
0355     struct memory_notify *mn = arg;
0356     int ret = 0;
0357 
0358     switch (action) {
0359     case MEM_GOING_ONLINE:
0360         ret = online_page_ext(mn->start_pfn,
0361                    mn->nr_pages, mn->status_change_nid);
0362         break;
0363     case MEM_OFFLINE:
0364         offline_page_ext(mn->start_pfn,
0365                 mn->nr_pages, mn->status_change_nid);
0366         break;
0367     case MEM_CANCEL_ONLINE:
0368         offline_page_ext(mn->start_pfn,
0369                 mn->nr_pages, mn->status_change_nid);
0370         break;
0371     case MEM_GOING_OFFLINE:
0372         break;
0373     case MEM_ONLINE:
0374     case MEM_CANCEL_OFFLINE:
0375         break;
0376     }
0377 
0378     return notifier_from_errno(ret);
0379 }
0380 
0381 void __init page_ext_init(void)
0382 {
0383     unsigned long pfn;
0384     int nid;
0385 
0386     if (!invoke_need_callbacks())
0387         return;
0388 
0389     for_each_node_state(nid, N_MEMORY) {
0390         unsigned long start_pfn, end_pfn;
0391 
0392         start_pfn = node_start_pfn(nid);
0393         end_pfn = node_end_pfn(nid);
0394         /*
0395          * start_pfn and end_pfn may not be aligned to SECTION and the
0396          * page->flags of out of node pages are not initialized.  So we
0397          * scan [start_pfn, the biggest section's pfn < end_pfn) here.
0398          */
0399         for (pfn = start_pfn; pfn < end_pfn;
0400             pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
0401 
0402             if (!pfn_valid(pfn))
0403                 continue;
0404             /*
0405              * Nodes's pfns can be overlapping.
0406              * We know some arch can have a nodes layout such as
0407              * -------------pfn-------------->
0408              * N0 | N1 | N2 | N0 | N1 | N2|....
0409              */
0410             if (pfn_to_nid(pfn) != nid)
0411                 continue;
0412             if (init_section_page_ext(pfn, nid))
0413                 goto oom;
0414             cond_resched();
0415         }
0416     }
0417     hotplug_memory_notifier(page_ext_callback, 0);
0418     pr_info("allocated %ld bytes of page_ext\n", total_usage);
0419     invoke_init_callbacks();
0420     return;
0421 
0422 oom:
0423     panic("Out of memory");
0424 }
0425 
0426 void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
0427 {
0428 }
0429 
0430 #endif