Back to home page

LXR

 
 

    


0001 /*
0002  * sparse memory mappings.
0003  */
0004 #include <linux/mm.h>
0005 #include <linux/slab.h>
0006 #include <linux/mmzone.h>
0007 #include <linux/bootmem.h>
0008 #include <linux/compiler.h>
0009 #include <linux/highmem.h>
0010 #include <linux/export.h>
0011 #include <linux/spinlock.h>
0012 #include <linux/vmalloc.h>
0013 
0014 #include "internal.h"
0015 #include <asm/dma.h>
0016 #include <asm/pgalloc.h>
0017 #include <asm/pgtable.h>
0018 
0019 /*
0020  * Permanent SPARSEMEM data:
0021  *
0022  * 1) mem_section   - memory sections, mem_map's for valid memory
0023  */
0024 #ifdef CONFIG_SPARSEMEM_EXTREME
0025 struct mem_section *mem_section[NR_SECTION_ROOTS]
0026     ____cacheline_internodealigned_in_smp;
0027 #else
0028 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
0029     ____cacheline_internodealigned_in_smp;
0030 #endif
0031 EXPORT_SYMBOL(mem_section);
0032 
0033 #ifdef NODE_NOT_IN_PAGE_FLAGS
0034 /*
0035  * If we did not store the node number in the page then we have to
0036  * do a lookup in the section_to_node_table in order to find which
0037  * node the page belongs to.
0038  */
0039 #if MAX_NUMNODES <= 256
0040 static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
0041 #else
0042 static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
0043 #endif
0044 
0045 int page_to_nid(const struct page *page)
0046 {
0047     return section_to_node_table[page_to_section(page)];
0048 }
0049 EXPORT_SYMBOL(page_to_nid);
0050 
0051 static void set_section_nid(unsigned long section_nr, int nid)
0052 {
0053     section_to_node_table[section_nr] = nid;
0054 }
0055 #else /* !NODE_NOT_IN_PAGE_FLAGS */
0056 static inline void set_section_nid(unsigned long section_nr, int nid)
0057 {
0058 }
0059 #endif
0060 
0061 #ifdef CONFIG_SPARSEMEM_EXTREME
0062 static noinline struct mem_section __ref *sparse_index_alloc(int nid)
0063 {
0064     struct mem_section *section = NULL;
0065     unsigned long array_size = SECTIONS_PER_ROOT *
0066                    sizeof(struct mem_section);
0067 
0068     if (slab_is_available()) {
0069         if (node_state(nid, N_HIGH_MEMORY))
0070             section = kzalloc_node(array_size, GFP_KERNEL, nid);
0071         else
0072             section = kzalloc(array_size, GFP_KERNEL);
0073     } else {
0074         section = memblock_virt_alloc_node(array_size, nid);
0075     }
0076 
0077     return section;
0078 }
0079 
0080 static int __meminit sparse_index_init(unsigned long section_nr, int nid)
0081 {
0082     unsigned long root = SECTION_NR_TO_ROOT(section_nr);
0083     struct mem_section *section;
0084 
0085     if (mem_section[root])
0086         return -EEXIST;
0087 
0088     section = sparse_index_alloc(nid);
0089     if (!section)
0090         return -ENOMEM;
0091 
0092     mem_section[root] = section;
0093 
0094     return 0;
0095 }
0096 #else /* !SPARSEMEM_EXTREME */
0097 static inline int sparse_index_init(unsigned long section_nr, int nid)
0098 {
0099     return 0;
0100 }
0101 #endif
0102 
0103 #ifdef CONFIG_SPARSEMEM_EXTREME
0104 int __section_nr(struct mem_section* ms)
0105 {
0106     unsigned long root_nr;
0107     struct mem_section* root;
0108 
0109     for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
0110         root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
0111         if (!root)
0112             continue;
0113 
0114         if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
0115              break;
0116     }
0117 
0118     VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
0119 
0120     return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
0121 }
0122 #else
0123 int __section_nr(struct mem_section* ms)
0124 {
0125     return (int)(ms - mem_section[0]);
0126 }
0127 #endif
0128 
0129 /*
0130  * During early boot, before section_mem_map is used for an actual
0131  * mem_map, we use section_mem_map to store the section's NUMA
0132  * node.  This keeps us from having to use another data structure.  The
0133  * node information is cleared just before we store the real mem_map.
0134  */
0135 static inline unsigned long sparse_encode_early_nid(int nid)
0136 {
0137     return (nid << SECTION_NID_SHIFT);
0138 }
0139 
0140 static inline int sparse_early_nid(struct mem_section *section)
0141 {
0142     return (section->section_mem_map >> SECTION_NID_SHIFT);
0143 }
0144 
0145 /* Validate the physical addressing limitations of the model */
0146 void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
0147                         unsigned long *end_pfn)
0148 {
0149     unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
0150 
0151     /*
0152      * Sanity checks - do not allow an architecture to pass
0153      * in larger pfns than the maximum scope of sparsemem:
0154      */
0155     if (*start_pfn > max_sparsemem_pfn) {
0156         mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
0157             "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
0158             *start_pfn, *end_pfn, max_sparsemem_pfn);
0159         WARN_ON_ONCE(1);
0160         *start_pfn = max_sparsemem_pfn;
0161         *end_pfn = max_sparsemem_pfn;
0162     } else if (*end_pfn > max_sparsemem_pfn) {
0163         mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
0164             "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
0165             *start_pfn, *end_pfn, max_sparsemem_pfn);
0166         WARN_ON_ONCE(1);
0167         *end_pfn = max_sparsemem_pfn;
0168     }
0169 }
0170 
0171 /* Record a memory area against a node. */
0172 void __init memory_present(int nid, unsigned long start, unsigned long end)
0173 {
0174     unsigned long pfn;
0175 
0176     start &= PAGE_SECTION_MASK;
0177     mminit_validate_memmodel_limits(&start, &end);
0178     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
0179         unsigned long section = pfn_to_section_nr(pfn);
0180         struct mem_section *ms;
0181 
0182         sparse_index_init(section, nid);
0183         set_section_nid(section, nid);
0184 
0185         ms = __nr_to_section(section);
0186         if (!ms->section_mem_map)
0187             ms->section_mem_map = sparse_encode_early_nid(nid) |
0188                             SECTION_MARKED_PRESENT;
0189     }
0190 }
0191 
0192 /*
0193  * Only used by the i386 NUMA architecures, but relatively
0194  * generic code.
0195  */
0196 unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
0197                              unsigned long end_pfn)
0198 {
0199     unsigned long pfn;
0200     unsigned long nr_pages = 0;
0201 
0202     mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
0203     for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
0204         if (nid != early_pfn_to_nid(pfn))
0205             continue;
0206 
0207         if (pfn_present(pfn))
0208             nr_pages += PAGES_PER_SECTION;
0209     }
0210 
0211     return nr_pages * sizeof(struct page);
0212 }
0213 
0214 /*
0215  * Subtle, we encode the real pfn into the mem_map such that
0216  * the identity pfn - section_mem_map will return the actual
0217  * physical page frame number.
0218  */
0219 static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
0220 {
0221     return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
0222 }
0223 
0224 /*
0225  * Decode mem_map from the coded memmap
0226  */
0227 struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
0228 {
0229     /* mask off the extra low bits of information */
0230     coded_mem_map &= SECTION_MAP_MASK;
0231     return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
0232 }
0233 
0234 static int __meminit sparse_init_one_section(struct mem_section *ms,
0235         unsigned long pnum, struct page *mem_map,
0236         unsigned long *pageblock_bitmap)
0237 {
0238     if (!present_section(ms))
0239         return -EINVAL;
0240 
0241     ms->section_mem_map &= ~SECTION_MAP_MASK;
0242     ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
0243                             SECTION_HAS_MEM_MAP;
0244     ms->pageblock_flags = pageblock_bitmap;
0245 
0246     return 1;
0247 }
0248 
0249 unsigned long usemap_size(void)
0250 {
0251     unsigned long size_bytes;
0252     size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
0253     size_bytes = roundup(size_bytes, sizeof(unsigned long));
0254     return size_bytes;
0255 }
0256 
0257 #ifdef CONFIG_MEMORY_HOTPLUG
0258 static unsigned long *__kmalloc_section_usemap(void)
0259 {
0260     return kmalloc(usemap_size(), GFP_KERNEL);
0261 }
0262 #endif /* CONFIG_MEMORY_HOTPLUG */
0263 
0264 #ifdef CONFIG_MEMORY_HOTREMOVE
0265 static unsigned long * __init
0266 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
0267                      unsigned long size)
0268 {
0269     unsigned long goal, limit;
0270     unsigned long *p;
0271     int nid;
0272     /*
0273      * A page may contain usemaps for other sections preventing the
0274      * page being freed and making a section unremovable while
0275      * other sections referencing the usemap remain active. Similarly,
0276      * a pgdat can prevent a section being removed. If section A
0277      * contains a pgdat and section B contains the usemap, both
0278      * sections become inter-dependent. This allocates usemaps
0279      * from the same section as the pgdat where possible to avoid
0280      * this problem.
0281      */
0282     goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
0283     limit = goal + (1UL << PA_SECTION_SHIFT);
0284     nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
0285 again:
0286     p = memblock_virt_alloc_try_nid_nopanic(size,
0287                         SMP_CACHE_BYTES, goal, limit,
0288                         nid);
0289     if (!p && limit) {
0290         limit = 0;
0291         goto again;
0292     }
0293     return p;
0294 }
0295 
0296 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
0297 {
0298     unsigned long usemap_snr, pgdat_snr;
0299     static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
0300     static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
0301     struct pglist_data *pgdat = NODE_DATA(nid);
0302     int usemap_nid;
0303 
0304     usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
0305     pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
0306     if (usemap_snr == pgdat_snr)
0307         return;
0308 
0309     if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
0310         /* skip redundant message */
0311         return;
0312 
0313     old_usemap_snr = usemap_snr;
0314     old_pgdat_snr = pgdat_snr;
0315 
0316     usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
0317     if (usemap_nid != nid) {
0318         pr_info("node %d must be removed before remove section %ld\n",
0319             nid, usemap_snr);
0320         return;
0321     }
0322     /*
0323      * There is a circular dependency.
0324      * Some platforms allow un-removable section because they will just
0325      * gather other removable sections for dynamic partitioning.
0326      * Just notify un-removable section's number here.
0327      */
0328     pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
0329         usemap_snr, pgdat_snr, nid);
0330 }
0331 #else
0332 static unsigned long * __init
0333 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
0334                      unsigned long size)
0335 {
0336     return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
0337 }
0338 
0339 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
0340 {
0341 }
0342 #endif /* CONFIG_MEMORY_HOTREMOVE */
0343 
0344 static void __init sparse_early_usemaps_alloc_node(void *data,
0345                  unsigned long pnum_begin,
0346                  unsigned long pnum_end,
0347                  unsigned long usemap_count, int nodeid)
0348 {
0349     void *usemap;
0350     unsigned long pnum;
0351     unsigned long **usemap_map = (unsigned long **)data;
0352     int size = usemap_size();
0353 
0354     usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
0355                               size * usemap_count);
0356     if (!usemap) {
0357         pr_warn("%s: allocation failed\n", __func__);
0358         return;
0359     }
0360 
0361     for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
0362         if (!present_section_nr(pnum))
0363             continue;
0364         usemap_map[pnum] = usemap;
0365         usemap += size;
0366         check_usemap_section_nr(nodeid, usemap_map[pnum]);
0367     }
0368 }
0369 
0370 #ifndef CONFIG_SPARSEMEM_VMEMMAP
0371 struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
0372 {
0373     struct page *map;
0374     unsigned long size;
0375 
0376     map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
0377     if (map)
0378         return map;
0379 
0380     size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
0381     map = memblock_virt_alloc_try_nid(size,
0382                       PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
0383                       BOOTMEM_ALLOC_ACCESSIBLE, nid);
0384     return map;
0385 }
0386 void __init sparse_mem_maps_populate_node(struct page **map_map,
0387                       unsigned long pnum_begin,
0388                       unsigned long pnum_end,
0389                       unsigned long map_count, int nodeid)
0390 {
0391     void *map;
0392     unsigned long pnum;
0393     unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
0394 
0395     map = alloc_remap(nodeid, size * map_count);
0396     if (map) {
0397         for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
0398             if (!present_section_nr(pnum))
0399                 continue;
0400             map_map[pnum] = map;
0401             map += size;
0402         }
0403         return;
0404     }
0405 
0406     size = PAGE_ALIGN(size);
0407     map = memblock_virt_alloc_try_nid(size * map_count,
0408                       PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
0409                       BOOTMEM_ALLOC_ACCESSIBLE, nodeid);
0410     if (map) {
0411         for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
0412             if (!present_section_nr(pnum))
0413                 continue;
0414             map_map[pnum] = map;
0415             map += size;
0416         }
0417         return;
0418     }
0419 
0420     /* fallback */
0421     for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
0422         struct mem_section *ms;
0423 
0424         if (!present_section_nr(pnum))
0425             continue;
0426         map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
0427         if (map_map[pnum])
0428             continue;
0429         ms = __nr_to_section(pnum);
0430         pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
0431                __func__);
0432         ms->section_mem_map = 0;
0433     }
0434 }
0435 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
0436 
0437 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
0438 static void __init sparse_early_mem_maps_alloc_node(void *data,
0439                  unsigned long pnum_begin,
0440                  unsigned long pnum_end,
0441                  unsigned long map_count, int nodeid)
0442 {
0443     struct page **map_map = (struct page **)data;
0444     sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
0445                      map_count, nodeid);
0446 }
0447 #else
0448 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
0449 {
0450     struct page *map;
0451     struct mem_section *ms = __nr_to_section(pnum);
0452     int nid = sparse_early_nid(ms);
0453 
0454     map = sparse_mem_map_populate(pnum, nid);
0455     if (map)
0456         return map;
0457 
0458     pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
0459            __func__);
0460     ms->section_mem_map = 0;
0461     return NULL;
0462 }
0463 #endif
0464 
0465 void __weak __meminit vmemmap_populate_print_last(void)
0466 {
0467 }
0468 
0469 /**
0470  *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
0471  *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
0472  */
0473 static void __init alloc_usemap_and_memmap(void (*alloc_func)
0474                     (void *, unsigned long, unsigned long,
0475                     unsigned long, int), void *data)
0476 {
0477     unsigned long pnum;
0478     unsigned long map_count;
0479     int nodeid_begin = 0;
0480     unsigned long pnum_begin = 0;
0481 
0482     for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
0483         struct mem_section *ms;
0484 
0485         if (!present_section_nr(pnum))
0486             continue;
0487         ms = __nr_to_section(pnum);
0488         nodeid_begin = sparse_early_nid(ms);
0489         pnum_begin = pnum;
0490         break;
0491     }
0492     map_count = 1;
0493     for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
0494         struct mem_section *ms;
0495         int nodeid;
0496 
0497         if (!present_section_nr(pnum))
0498             continue;
0499         ms = __nr_to_section(pnum);
0500         nodeid = sparse_early_nid(ms);
0501         if (nodeid == nodeid_begin) {
0502             map_count++;
0503             continue;
0504         }
0505         /* ok, we need to take cake of from pnum_begin to pnum - 1*/
0506         alloc_func(data, pnum_begin, pnum,
0507                         map_count, nodeid_begin);
0508         /* new start, update count etc*/
0509         nodeid_begin = nodeid;
0510         pnum_begin = pnum;
0511         map_count = 1;
0512     }
0513     /* ok, last chunk */
0514     alloc_func(data, pnum_begin, NR_MEM_SECTIONS,
0515                         map_count, nodeid_begin);
0516 }
0517 
0518 /*
0519  * Allocate the accumulated non-linear sections, allocate a mem_map
0520  * for each and record the physical to section mapping.
0521  */
0522 void __init sparse_init(void)
0523 {
0524     unsigned long pnum;
0525     struct page *map;
0526     unsigned long *usemap;
0527     unsigned long **usemap_map;
0528     int size;
0529 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
0530     int size2;
0531     struct page **map_map;
0532 #endif
0533 
0534     /* see include/linux/mmzone.h 'struct mem_section' definition */
0535     BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
0536 
0537     /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
0538     set_pageblock_order();
0539 
0540     /*
0541      * map is using big page (aka 2M in x86 64 bit)
0542      * usemap is less one page (aka 24 bytes)
0543      * so alloc 2M (with 2M align) and 24 bytes in turn will
0544      * make next 2M slip to one more 2M later.
0545      * then in big system, the memory will have a lot of holes...
0546      * here try to allocate 2M pages continuously.
0547      *
0548      * powerpc need to call sparse_init_one_section right after each
0549      * sparse_early_mem_map_alloc, so allocate usemap_map at first.
0550      */
0551     size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
0552     usemap_map = memblock_virt_alloc(size, 0);
0553     if (!usemap_map)
0554         panic("can not allocate usemap_map\n");
0555     alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
0556                             (void *)usemap_map);
0557 
0558 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
0559     size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
0560     map_map = memblock_virt_alloc(size2, 0);
0561     if (!map_map)
0562         panic("can not allocate map_map\n");
0563     alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
0564                             (void *)map_map);
0565 #endif
0566 
0567     for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
0568         if (!present_section_nr(pnum))
0569             continue;
0570 
0571         usemap = usemap_map[pnum];
0572         if (!usemap)
0573             continue;
0574 
0575 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
0576         map = map_map[pnum];
0577 #else
0578         map = sparse_early_mem_map_alloc(pnum);
0579 #endif
0580         if (!map)
0581             continue;
0582 
0583         sparse_init_one_section(__nr_to_section(pnum), pnum, map,
0584                                 usemap);
0585     }
0586 
0587     vmemmap_populate_print_last();
0588 
0589 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
0590     memblock_free_early(__pa(map_map), size2);
0591 #endif
0592     memblock_free_early(__pa(usemap_map), size);
0593 }
0594 
0595 #ifdef CONFIG_MEMORY_HOTPLUG
0596 #ifdef CONFIG_SPARSEMEM_VMEMMAP
0597 static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
0598 {
0599     /* This will make the necessary allocations eventually. */
0600     return sparse_mem_map_populate(pnum, nid);
0601 }
0602 static void __kfree_section_memmap(struct page *memmap)
0603 {
0604     unsigned long start = (unsigned long)memmap;
0605     unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0606 
0607     vmemmap_free(start, end);
0608 }
0609 #ifdef CONFIG_MEMORY_HOTREMOVE
0610 static void free_map_bootmem(struct page *memmap)
0611 {
0612     unsigned long start = (unsigned long)memmap;
0613     unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0614 
0615     vmemmap_free(start, end);
0616 }
0617 #endif /* CONFIG_MEMORY_HOTREMOVE */
0618 #else
0619 static struct page *__kmalloc_section_memmap(void)
0620 {
0621     struct page *page, *ret;
0622     unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
0623 
0624     page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0625     if (page)
0626         goto got_map_page;
0627 
0628     ret = vmalloc(memmap_size);
0629     if (ret)
0630         goto got_map_ptr;
0631 
0632     return NULL;
0633 got_map_page:
0634     ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
0635 got_map_ptr:
0636 
0637     return ret;
0638 }
0639 
0640 static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
0641 {
0642     return __kmalloc_section_memmap();
0643 }
0644 
0645 static void __kfree_section_memmap(struct page *memmap)
0646 {
0647     if (is_vmalloc_addr(memmap))
0648         vfree(memmap);
0649     else
0650         free_pages((unsigned long)memmap,
0651                get_order(sizeof(struct page) * PAGES_PER_SECTION));
0652 }
0653 
0654 #ifdef CONFIG_MEMORY_HOTREMOVE
0655 static void free_map_bootmem(struct page *memmap)
0656 {
0657     unsigned long maps_section_nr, removing_section_nr, i;
0658     unsigned long magic, nr_pages;
0659     struct page *page = virt_to_page(memmap);
0660 
0661     nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
0662         >> PAGE_SHIFT;
0663 
0664     for (i = 0; i < nr_pages; i++, page++) {
0665         magic = (unsigned long) page->lru.next;
0666 
0667         BUG_ON(magic == NODE_INFO);
0668 
0669         maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
0670         removing_section_nr = page->private;
0671 
0672         /*
0673          * When this function is called, the removing section is
0674          * logical offlined state. This means all pages are isolated
0675          * from page allocator. If removing section's memmap is placed
0676          * on the same section, it must not be freed.
0677          * If it is freed, page allocator may allocate it which will
0678          * be removed physically soon.
0679          */
0680         if (maps_section_nr != removing_section_nr)
0681             put_page_bootmem(page);
0682     }
0683 }
0684 #endif /* CONFIG_MEMORY_HOTREMOVE */
0685 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0686 
0687 /*
0688  * returns the number of sections whose mem_maps were properly
0689  * set.  If this is <=0, then that means that the passed-in
0690  * map was not consumed and must be freed.
0691  */
0692 int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
0693 {
0694     unsigned long section_nr = pfn_to_section_nr(start_pfn);
0695     struct pglist_data *pgdat = zone->zone_pgdat;
0696     struct mem_section *ms;
0697     struct page *memmap;
0698     unsigned long *usemap;
0699     unsigned long flags;
0700     int ret;
0701 
0702     /*
0703      * no locking for this, because it does its own
0704      * plus, it does a kmalloc
0705      */
0706     ret = sparse_index_init(section_nr, pgdat->node_id);
0707     if (ret < 0 && ret != -EEXIST)
0708         return ret;
0709     memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
0710     if (!memmap)
0711         return -ENOMEM;
0712     usemap = __kmalloc_section_usemap();
0713     if (!usemap) {
0714         __kfree_section_memmap(memmap);
0715         return -ENOMEM;
0716     }
0717 
0718     pgdat_resize_lock(pgdat, &flags);
0719 
0720     ms = __pfn_to_section(start_pfn);
0721     if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
0722         ret = -EEXIST;
0723         goto out;
0724     }
0725 
0726     memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
0727 
0728     ms->section_mem_map |= SECTION_MARKED_PRESENT;
0729 
0730     ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0731 
0732 out:
0733     pgdat_resize_unlock(pgdat, &flags);
0734     if (ret <= 0) {
0735         kfree(usemap);
0736         __kfree_section_memmap(memmap);
0737     }
0738     return ret;
0739 }
0740 
0741 #ifdef CONFIG_MEMORY_HOTREMOVE
0742 #ifdef CONFIG_MEMORY_FAILURE
0743 static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
0744 {
0745     int i;
0746 
0747     if (!memmap)
0748         return;
0749 
0750     for (i = 0; i < nr_pages; i++) {
0751         if (PageHWPoison(&memmap[i])) {
0752             atomic_long_sub(1, &num_poisoned_pages);
0753             ClearPageHWPoison(&memmap[i]);
0754         }
0755     }
0756 }
0757 #else
0758 static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
0759 {
0760 }
0761 #endif
0762 
0763 static void free_section_usemap(struct page *memmap, unsigned long *usemap)
0764 {
0765     struct page *usemap_page;
0766 
0767     if (!usemap)
0768         return;
0769 
0770     usemap_page = virt_to_page(usemap);
0771     /*
0772      * Check to see if allocation came from hot-plug-add
0773      */
0774     if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
0775         kfree(usemap);
0776         if (memmap)
0777             __kfree_section_memmap(memmap);
0778         return;
0779     }
0780 
0781     /*
0782      * The usemap came from bootmem. This is packed with other usemaps
0783      * on the section which has pgdat at boot time. Just keep it as is now.
0784      */
0785 
0786     if (memmap)
0787         free_map_bootmem(memmap);
0788 }
0789 
0790 void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
0791         unsigned long map_offset)
0792 {
0793     struct page *memmap = NULL;
0794     unsigned long *usemap = NULL, flags;
0795     struct pglist_data *pgdat = zone->zone_pgdat;
0796 
0797     pgdat_resize_lock(pgdat, &flags);
0798     if (ms->section_mem_map) {
0799         usemap = ms->pageblock_flags;
0800         memmap = sparse_decode_mem_map(ms->section_mem_map,
0801                         __section_nr(ms));
0802         ms->section_mem_map = 0;
0803         ms->pageblock_flags = NULL;
0804     }
0805     pgdat_resize_unlock(pgdat, &flags);
0806 
0807     clear_hwpoisoned_pages(memmap + map_offset,
0808             PAGES_PER_SECTION - map_offset);
0809     free_section_usemap(memmap, usemap);
0810 }
0811 #endif /* CONFIG_MEMORY_HOTREMOVE */
0812 #endif /* CONFIG_MEMORY_HOTPLUG */