Back to home page

LXR

 
 

    


0001 /*
0002  * mm/percpu-vm.c - vmalloc area based chunk allocation
0003  *
0004  * Copyright (C) 2010       SUSE Linux Products GmbH
0005  * Copyright (C) 2010       Tejun Heo <tj@kernel.org>
0006  *
0007  * This file is released under the GPLv2.
0008  *
0009  * Chunks are mapped into vmalloc areas and populated page by page.
0010  * This is the default chunk allocator.
0011  */
0012 
0013 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
0014                     unsigned int cpu, int page_idx)
0015 {
0016     /* must not be used on pre-mapped chunk */
0017     WARN_ON(chunk->immutable);
0018 
0019     return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
0020 }
0021 
0022 /**
0023  * pcpu_get_pages - get temp pages array
0024  * @chunk: chunk of interest
0025  *
0026  * Returns pointer to array of pointers to struct page which can be indexed
0027  * with pcpu_page_idx().  Note that there is only one array and accesses
0028  * should be serialized by pcpu_alloc_mutex.
0029  *
0030  * RETURNS:
0031  * Pointer to temp pages array on success.
0032  */
0033 static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
0034 {
0035     static struct page **pages;
0036     size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
0037 
0038     lockdep_assert_held(&pcpu_alloc_mutex);
0039 
0040     if (!pages)
0041         pages = pcpu_mem_zalloc(pages_size);
0042     return pages;
0043 }
0044 
0045 /**
0046  * pcpu_free_pages - free pages which were allocated for @chunk
0047  * @chunk: chunk pages were allocated for
0048  * @pages: array of pages to be freed, indexed by pcpu_page_idx()
0049  * @page_start: page index of the first page to be freed
0050  * @page_end: page index of the last page to be freed + 1
0051  *
0052  * Free pages [@page_start and @page_end) in @pages for all units.
0053  * The pages were allocated for @chunk.
0054  */
0055 static void pcpu_free_pages(struct pcpu_chunk *chunk,
0056                 struct page **pages, int page_start, int page_end)
0057 {
0058     unsigned int cpu;
0059     int i;
0060 
0061     for_each_possible_cpu(cpu) {
0062         for (i = page_start; i < page_end; i++) {
0063             struct page *page = pages[pcpu_page_idx(cpu, i)];
0064 
0065             if (page)
0066                 __free_page(page);
0067         }
0068     }
0069 }
0070 
0071 /**
0072  * pcpu_alloc_pages - allocates pages for @chunk
0073  * @chunk: target chunk
0074  * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
0075  * @page_start: page index of the first page to be allocated
0076  * @page_end: page index of the last page to be allocated + 1
0077  *
0078  * Allocate pages [@page_start,@page_end) into @pages for all units.
0079  * The allocation is for @chunk.  Percpu core doesn't care about the
0080  * content of @pages and will pass it verbatim to pcpu_map_pages().
0081  */
0082 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
0083                 struct page **pages, int page_start, int page_end)
0084 {
0085     const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
0086     unsigned int cpu, tcpu;
0087     int i;
0088 
0089     for_each_possible_cpu(cpu) {
0090         for (i = page_start; i < page_end; i++) {
0091             struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
0092 
0093             *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
0094             if (!*pagep)
0095                 goto err;
0096         }
0097     }
0098     return 0;
0099 
0100 err:
0101     while (--i >= page_start)
0102         __free_page(pages[pcpu_page_idx(cpu, i)]);
0103 
0104     for_each_possible_cpu(tcpu) {
0105         if (tcpu == cpu)
0106             break;
0107         for (i = page_start; i < page_end; i++)
0108             __free_page(pages[pcpu_page_idx(tcpu, i)]);
0109     }
0110     return -ENOMEM;
0111 }
0112 
0113 /**
0114  * pcpu_pre_unmap_flush - flush cache prior to unmapping
0115  * @chunk: chunk the regions to be flushed belongs to
0116  * @page_start: page index of the first page to be flushed
0117  * @page_end: page index of the last page to be flushed + 1
0118  *
0119  * Pages in [@page_start,@page_end) of @chunk are about to be
0120  * unmapped.  Flush cache.  As each flushing trial can be very
0121  * expensive, issue flush on the whole region at once rather than
0122  * doing it for each cpu.  This could be an overkill but is more
0123  * scalable.
0124  */
0125 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
0126                  int page_start, int page_end)
0127 {
0128     flush_cache_vunmap(
0129         pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
0130         pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
0131 }
0132 
0133 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
0134 {
0135     unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
0136 }
0137 
0138 /**
0139  * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
0140  * @chunk: chunk of interest
0141  * @pages: pages array which can be used to pass information to free
0142  * @page_start: page index of the first page to unmap
0143  * @page_end: page index of the last page to unmap + 1
0144  *
0145  * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
0146  * Corresponding elements in @pages were cleared by the caller and can
0147  * be used to carry information to pcpu_free_pages() which will be
0148  * called after all unmaps are finished.  The caller should call
0149  * proper pre/post flush functions.
0150  */
0151 static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
0152                  struct page **pages, int page_start, int page_end)
0153 {
0154     unsigned int cpu;
0155     int i;
0156 
0157     for_each_possible_cpu(cpu) {
0158         for (i = page_start; i < page_end; i++) {
0159             struct page *page;
0160 
0161             page = pcpu_chunk_page(chunk, cpu, i);
0162             WARN_ON(!page);
0163             pages[pcpu_page_idx(cpu, i)] = page;
0164         }
0165         __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
0166                    page_end - page_start);
0167     }
0168 }
0169 
0170 /**
0171  * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
0172  * @chunk: pcpu_chunk the regions to be flushed belong to
0173  * @page_start: page index of the first page to be flushed
0174  * @page_end: page index of the last page to be flushed + 1
0175  *
0176  * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
0177  * TLB for the regions.  This can be skipped if the area is to be
0178  * returned to vmalloc as vmalloc will handle TLB flushing lazily.
0179  *
0180  * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
0181  * for the whole region.
0182  */
0183 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
0184                       int page_start, int page_end)
0185 {
0186     flush_tlb_kernel_range(
0187         pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
0188         pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
0189 }
0190 
0191 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
0192                 int nr_pages)
0193 {
0194     return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
0195                     PAGE_KERNEL, pages);
0196 }
0197 
0198 /**
0199  * pcpu_map_pages - map pages into a pcpu_chunk
0200  * @chunk: chunk of interest
0201  * @pages: pages array containing pages to be mapped
0202  * @page_start: page index of the first page to map
0203  * @page_end: page index of the last page to map + 1
0204  *
0205  * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
0206  * caller is responsible for calling pcpu_post_map_flush() after all
0207  * mappings are complete.
0208  *
0209  * This function is responsible for setting up whatever is necessary for
0210  * reverse lookup (addr -> chunk).
0211  */
0212 static int pcpu_map_pages(struct pcpu_chunk *chunk,
0213               struct page **pages, int page_start, int page_end)
0214 {
0215     unsigned int cpu, tcpu;
0216     int i, err;
0217 
0218     for_each_possible_cpu(cpu) {
0219         err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
0220                        &pages[pcpu_page_idx(cpu, page_start)],
0221                        page_end - page_start);
0222         if (err < 0)
0223             goto err;
0224 
0225         for (i = page_start; i < page_end; i++)
0226             pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
0227                         chunk);
0228     }
0229     return 0;
0230 err:
0231     for_each_possible_cpu(tcpu) {
0232         if (tcpu == cpu)
0233             break;
0234         __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
0235                    page_end - page_start);
0236     }
0237     pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
0238     return err;
0239 }
0240 
0241 /**
0242  * pcpu_post_map_flush - flush cache after mapping
0243  * @chunk: pcpu_chunk the regions to be flushed belong to
0244  * @page_start: page index of the first page to be flushed
0245  * @page_end: page index of the last page to be flushed + 1
0246  *
0247  * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
0248  * cache.
0249  *
0250  * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
0251  * for the whole region.
0252  */
0253 static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
0254                 int page_start, int page_end)
0255 {
0256     flush_cache_vmap(
0257         pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
0258         pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
0259 }
0260 
0261 /**
0262  * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
0263  * @chunk: chunk of interest
0264  * @page_start: the start page
0265  * @page_end: the end page
0266  *
0267  * For each cpu, populate and map pages [@page_start,@page_end) into
0268  * @chunk.
0269  *
0270  * CONTEXT:
0271  * pcpu_alloc_mutex, does GFP_KERNEL allocation.
0272  */
0273 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
0274                    int page_start, int page_end)
0275 {
0276     struct page **pages;
0277 
0278     pages = pcpu_get_pages(chunk);
0279     if (!pages)
0280         return -ENOMEM;
0281 
0282     if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
0283         return -ENOMEM;
0284 
0285     if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
0286         pcpu_free_pages(chunk, pages, page_start, page_end);
0287         return -ENOMEM;
0288     }
0289     pcpu_post_map_flush(chunk, page_start, page_end);
0290 
0291     return 0;
0292 }
0293 
0294 /**
0295  * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
0296  * @chunk: chunk to depopulate
0297  * @page_start: the start page
0298  * @page_end: the end page
0299  *
0300  * For each cpu, depopulate and unmap pages [@page_start,@page_end)
0301  * from @chunk.
0302  *
0303  * CONTEXT:
0304  * pcpu_alloc_mutex.
0305  */
0306 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
0307                   int page_start, int page_end)
0308 {
0309     struct page **pages;
0310 
0311     /*
0312      * If control reaches here, there must have been at least one
0313      * successful population attempt so the temp pages array must
0314      * be available now.
0315      */
0316     pages = pcpu_get_pages(chunk);
0317     BUG_ON(!pages);
0318 
0319     /* unmap and free */
0320     pcpu_pre_unmap_flush(chunk, page_start, page_end);
0321 
0322     pcpu_unmap_pages(chunk, pages, page_start, page_end);
0323 
0324     /* no need to flush tlb, vmalloc will handle it lazily */
0325 
0326     pcpu_free_pages(chunk, pages, page_start, page_end);
0327 }
0328 
0329 static struct pcpu_chunk *pcpu_create_chunk(void)
0330 {
0331     struct pcpu_chunk *chunk;
0332     struct vm_struct **vms;
0333 
0334     chunk = pcpu_alloc_chunk();
0335     if (!chunk)
0336         return NULL;
0337 
0338     vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
0339                 pcpu_nr_groups, pcpu_atom_size);
0340     if (!vms) {
0341         pcpu_free_chunk(chunk);
0342         return NULL;
0343     }
0344 
0345     chunk->data = vms;
0346     chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
0347     return chunk;
0348 }
0349 
0350 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
0351 {
0352     if (chunk && chunk->data)
0353         pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
0354     pcpu_free_chunk(chunk);
0355 }
0356 
0357 static struct page *pcpu_addr_to_page(void *addr)
0358 {
0359     return vmalloc_to_page(addr);
0360 }
0361 
0362 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
0363 {
0364     /* no extra restriction */
0365     return 0;
0366 }