Back to home page

LXR

 
 

    


0001 /*
0002  *  linux/mm/vmalloc.c
0003  *
0004  *  Copyright (C) 1993  Linus Torvalds
0005  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
0006  *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
0007  *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
0008  *  Numa awareness, Christoph Lameter, SGI, June 2005
0009  */
0010 
0011 #include <linux/vmalloc.h>
0012 #include <linux/mm.h>
0013 #include <linux/module.h>
0014 #include <linux/highmem.h>
0015 #include <linux/sched.h>
0016 #include <linux/slab.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/interrupt.h>
0019 #include <linux/proc_fs.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/debugobjects.h>
0022 #include <linux/kallsyms.h>
0023 #include <linux/list.h>
0024 #include <linux/notifier.h>
0025 #include <linux/rbtree.h>
0026 #include <linux/radix-tree.h>
0027 #include <linux/rcupdate.h>
0028 #include <linux/pfn.h>
0029 #include <linux/kmemleak.h>
0030 #include <linux/atomic.h>
0031 #include <linux/compiler.h>
0032 #include <linux/llist.h>
0033 #include <linux/bitops.h>
0034 
0035 #include <linux/uaccess.h>
0036 #include <asm/tlbflush.h>
0037 #include <asm/shmparam.h>
0038 
0039 #include "internal.h"
0040 
0041 struct vfree_deferred {
0042     struct llist_head list;
0043     struct work_struct wq;
0044 };
0045 static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
0046 
0047 static void __vunmap(const void *, int);
0048 
0049 static void free_work(struct work_struct *w)
0050 {
0051     struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
0052     struct llist_node *llnode = llist_del_all(&p->list);
0053     while (llnode) {
0054         void *p = llnode;
0055         llnode = llist_next(llnode);
0056         __vunmap(p, 1);
0057     }
0058 }
0059 
0060 /*** Page table manipulation functions ***/
0061 
0062 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
0063 {
0064     pte_t *pte;
0065 
0066     pte = pte_offset_kernel(pmd, addr);
0067     do {
0068         pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
0069         WARN_ON(!pte_none(ptent) && !pte_present(ptent));
0070     } while (pte++, addr += PAGE_SIZE, addr != end);
0071 }
0072 
0073 static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
0074 {
0075     pmd_t *pmd;
0076     unsigned long next;
0077 
0078     pmd = pmd_offset(pud, addr);
0079     do {
0080         next = pmd_addr_end(addr, end);
0081         if (pmd_clear_huge(pmd))
0082             continue;
0083         if (pmd_none_or_clear_bad(pmd))
0084             continue;
0085         vunmap_pte_range(pmd, addr, next);
0086     } while (pmd++, addr = next, addr != end);
0087 }
0088 
0089 static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
0090 {
0091     pud_t *pud;
0092     unsigned long next;
0093 
0094     pud = pud_offset(pgd, addr);
0095     do {
0096         next = pud_addr_end(addr, end);
0097         if (pud_clear_huge(pud))
0098             continue;
0099         if (pud_none_or_clear_bad(pud))
0100             continue;
0101         vunmap_pmd_range(pud, addr, next);
0102     } while (pud++, addr = next, addr != end);
0103 }
0104 
0105 static void vunmap_page_range(unsigned long addr, unsigned long end)
0106 {
0107     pgd_t *pgd;
0108     unsigned long next;
0109 
0110     BUG_ON(addr >= end);
0111     pgd = pgd_offset_k(addr);
0112     do {
0113         next = pgd_addr_end(addr, end);
0114         if (pgd_none_or_clear_bad(pgd))
0115             continue;
0116         vunmap_pud_range(pgd, addr, next);
0117     } while (pgd++, addr = next, addr != end);
0118 }
0119 
0120 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
0121         unsigned long end, pgprot_t prot, struct page **pages, int *nr)
0122 {
0123     pte_t *pte;
0124 
0125     /*
0126      * nr is a running index into the array which helps higher level
0127      * callers keep track of where we're up to.
0128      */
0129 
0130     pte = pte_alloc_kernel(pmd, addr);
0131     if (!pte)
0132         return -ENOMEM;
0133     do {
0134         struct page *page = pages[*nr];
0135 
0136         if (WARN_ON(!pte_none(*pte)))
0137             return -EBUSY;
0138         if (WARN_ON(!page))
0139             return -ENOMEM;
0140         set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
0141         (*nr)++;
0142     } while (pte++, addr += PAGE_SIZE, addr != end);
0143     return 0;
0144 }
0145 
0146 static int vmap_pmd_range(pud_t *pud, unsigned long addr,
0147         unsigned long end, pgprot_t prot, struct page **pages, int *nr)
0148 {
0149     pmd_t *pmd;
0150     unsigned long next;
0151 
0152     pmd = pmd_alloc(&init_mm, pud, addr);
0153     if (!pmd)
0154         return -ENOMEM;
0155     do {
0156         next = pmd_addr_end(addr, end);
0157         if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
0158             return -ENOMEM;
0159     } while (pmd++, addr = next, addr != end);
0160     return 0;
0161 }
0162 
0163 static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
0164         unsigned long end, pgprot_t prot, struct page **pages, int *nr)
0165 {
0166     pud_t *pud;
0167     unsigned long next;
0168 
0169     pud = pud_alloc(&init_mm, pgd, addr);
0170     if (!pud)
0171         return -ENOMEM;
0172     do {
0173         next = pud_addr_end(addr, end);
0174         if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
0175             return -ENOMEM;
0176     } while (pud++, addr = next, addr != end);
0177     return 0;
0178 }
0179 
0180 /*
0181  * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
0182  * will have pfns corresponding to the "pages" array.
0183  *
0184  * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
0185  */
0186 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
0187                    pgprot_t prot, struct page **pages)
0188 {
0189     pgd_t *pgd;
0190     unsigned long next;
0191     unsigned long addr = start;
0192     int err = 0;
0193     int nr = 0;
0194 
0195     BUG_ON(addr >= end);
0196     pgd = pgd_offset_k(addr);
0197     do {
0198         next = pgd_addr_end(addr, end);
0199         err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
0200         if (err)
0201             return err;
0202     } while (pgd++, addr = next, addr != end);
0203 
0204     return nr;
0205 }
0206 
0207 static int vmap_page_range(unsigned long start, unsigned long end,
0208                pgprot_t prot, struct page **pages)
0209 {
0210     int ret;
0211 
0212     ret = vmap_page_range_noflush(start, end, prot, pages);
0213     flush_cache_vmap(start, end);
0214     return ret;
0215 }
0216 
0217 int is_vmalloc_or_module_addr(const void *x)
0218 {
0219     /*
0220      * ARM, x86-64 and sparc64 put modules in a special place,
0221      * and fall back on vmalloc() if that fails. Others
0222      * just put it in the vmalloc space.
0223      */
0224 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
0225     unsigned long addr = (unsigned long)x;
0226     if (addr >= MODULES_VADDR && addr < MODULES_END)
0227         return 1;
0228 #endif
0229     return is_vmalloc_addr(x);
0230 }
0231 
0232 /*
0233  * Walk a vmap address to the struct page it maps.
0234  */
0235 struct page *vmalloc_to_page(const void *vmalloc_addr)
0236 {
0237     unsigned long addr = (unsigned long) vmalloc_addr;
0238     struct page *page = NULL;
0239     pgd_t *pgd = pgd_offset_k(addr);
0240 
0241     /*
0242      * XXX we might need to change this if we add VIRTUAL_BUG_ON for
0243      * architectures that do not vmalloc module space
0244      */
0245     VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
0246 
0247     if (!pgd_none(*pgd)) {
0248         pud_t *pud = pud_offset(pgd, addr);
0249         if (!pud_none(*pud)) {
0250             pmd_t *pmd = pmd_offset(pud, addr);
0251             if (!pmd_none(*pmd)) {
0252                 pte_t *ptep, pte;
0253 
0254                 ptep = pte_offset_map(pmd, addr);
0255                 pte = *ptep;
0256                 if (pte_present(pte))
0257                     page = pte_page(pte);
0258                 pte_unmap(ptep);
0259             }
0260         }
0261     }
0262     return page;
0263 }
0264 EXPORT_SYMBOL(vmalloc_to_page);
0265 
0266 /*
0267  * Map a vmalloc()-space virtual address to the physical page frame number.
0268  */
0269 unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
0270 {
0271     return page_to_pfn(vmalloc_to_page(vmalloc_addr));
0272 }
0273 EXPORT_SYMBOL(vmalloc_to_pfn);
0274 
0275 
0276 /*** Global kva allocator ***/
0277 
0278 #define VM_VM_AREA  0x04
0279 
0280 static DEFINE_SPINLOCK(vmap_area_lock);
0281 /* Export for kexec only */
0282 LIST_HEAD(vmap_area_list);
0283 static LLIST_HEAD(vmap_purge_list);
0284 static struct rb_root vmap_area_root = RB_ROOT;
0285 
0286 /* The vmap cache globals are protected by vmap_area_lock */
0287 static struct rb_node *free_vmap_cache;
0288 static unsigned long cached_hole_size;
0289 static unsigned long cached_vstart;
0290 static unsigned long cached_align;
0291 
0292 static unsigned long vmap_area_pcpu_hole;
0293 
0294 static struct vmap_area *__find_vmap_area(unsigned long addr)
0295 {
0296     struct rb_node *n = vmap_area_root.rb_node;
0297 
0298     while (n) {
0299         struct vmap_area *va;
0300 
0301         va = rb_entry(n, struct vmap_area, rb_node);
0302         if (addr < va->va_start)
0303             n = n->rb_left;
0304         else if (addr >= va->va_end)
0305             n = n->rb_right;
0306         else
0307             return va;
0308     }
0309 
0310     return NULL;
0311 }
0312 
0313 static void __insert_vmap_area(struct vmap_area *va)
0314 {
0315     struct rb_node **p = &vmap_area_root.rb_node;
0316     struct rb_node *parent = NULL;
0317     struct rb_node *tmp;
0318 
0319     while (*p) {
0320         struct vmap_area *tmp_va;
0321 
0322         parent = *p;
0323         tmp_va = rb_entry(parent, struct vmap_area, rb_node);
0324         if (va->va_start < tmp_va->va_end)
0325             p = &(*p)->rb_left;
0326         else if (va->va_end > tmp_va->va_start)
0327             p = &(*p)->rb_right;
0328         else
0329             BUG();
0330     }
0331 
0332     rb_link_node(&va->rb_node, parent, p);
0333     rb_insert_color(&va->rb_node, &vmap_area_root);
0334 
0335     /* address-sort this list */
0336     tmp = rb_prev(&va->rb_node);
0337     if (tmp) {
0338         struct vmap_area *prev;
0339         prev = rb_entry(tmp, struct vmap_area, rb_node);
0340         list_add_rcu(&va->list, &prev->list);
0341     } else
0342         list_add_rcu(&va->list, &vmap_area_list);
0343 }
0344 
0345 static void purge_vmap_area_lazy(void);
0346 
0347 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
0348 
0349 /*
0350  * Allocate a region of KVA of the specified size and alignment, within the
0351  * vstart and vend.
0352  */
0353 static struct vmap_area *alloc_vmap_area(unsigned long size,
0354                 unsigned long align,
0355                 unsigned long vstart, unsigned long vend,
0356                 int node, gfp_t gfp_mask)
0357 {
0358     struct vmap_area *va;
0359     struct rb_node *n;
0360     unsigned long addr;
0361     int purged = 0;
0362     struct vmap_area *first;
0363 
0364     BUG_ON(!size);
0365     BUG_ON(offset_in_page(size));
0366     BUG_ON(!is_power_of_2(align));
0367 
0368     might_sleep();
0369 
0370     va = kmalloc_node(sizeof(struct vmap_area),
0371             gfp_mask & GFP_RECLAIM_MASK, node);
0372     if (unlikely(!va))
0373         return ERR_PTR(-ENOMEM);
0374 
0375     /*
0376      * Only scan the relevant parts containing pointers to other objects
0377      * to avoid false negatives.
0378      */
0379     kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
0380 
0381 retry:
0382     spin_lock(&vmap_area_lock);
0383     /*
0384      * Invalidate cache if we have more permissive parameters.
0385      * cached_hole_size notes the largest hole noticed _below_
0386      * the vmap_area cached in free_vmap_cache: if size fits
0387      * into that hole, we want to scan from vstart to reuse
0388      * the hole instead of allocating above free_vmap_cache.
0389      * Note that __free_vmap_area may update free_vmap_cache
0390      * without updating cached_hole_size or cached_align.
0391      */
0392     if (!free_vmap_cache ||
0393             size < cached_hole_size ||
0394             vstart < cached_vstart ||
0395             align < cached_align) {
0396 nocache:
0397         cached_hole_size = 0;
0398         free_vmap_cache = NULL;
0399     }
0400     /* record if we encounter less permissive parameters */
0401     cached_vstart = vstart;
0402     cached_align = align;
0403 
0404     /* find starting point for our search */
0405     if (free_vmap_cache) {
0406         first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
0407         addr = ALIGN(first->va_end, align);
0408         if (addr < vstart)
0409             goto nocache;
0410         if (addr + size < addr)
0411             goto overflow;
0412 
0413     } else {
0414         addr = ALIGN(vstart, align);
0415         if (addr + size < addr)
0416             goto overflow;
0417 
0418         n = vmap_area_root.rb_node;
0419         first = NULL;
0420 
0421         while (n) {
0422             struct vmap_area *tmp;
0423             tmp = rb_entry(n, struct vmap_area, rb_node);
0424             if (tmp->va_end >= addr) {
0425                 first = tmp;
0426                 if (tmp->va_start <= addr)
0427                     break;
0428                 n = n->rb_left;
0429             } else
0430                 n = n->rb_right;
0431         }
0432 
0433         if (!first)
0434             goto found;
0435     }
0436 
0437     /* from the starting point, walk areas until a suitable hole is found */
0438     while (addr + size > first->va_start && addr + size <= vend) {
0439         if (addr + cached_hole_size < first->va_start)
0440             cached_hole_size = first->va_start - addr;
0441         addr = ALIGN(first->va_end, align);
0442         if (addr + size < addr)
0443             goto overflow;
0444 
0445         if (list_is_last(&first->list, &vmap_area_list))
0446             goto found;
0447 
0448         first = list_next_entry(first, list);
0449     }
0450 
0451 found:
0452     if (addr + size > vend)
0453         goto overflow;
0454 
0455     va->va_start = addr;
0456     va->va_end = addr + size;
0457     va->flags = 0;
0458     __insert_vmap_area(va);
0459     free_vmap_cache = &va->rb_node;
0460     spin_unlock(&vmap_area_lock);
0461 
0462     BUG_ON(!IS_ALIGNED(va->va_start, align));
0463     BUG_ON(va->va_start < vstart);
0464     BUG_ON(va->va_end > vend);
0465 
0466     return va;
0467 
0468 overflow:
0469     spin_unlock(&vmap_area_lock);
0470     if (!purged) {
0471         purge_vmap_area_lazy();
0472         purged = 1;
0473         goto retry;
0474     }
0475 
0476     if (gfpflags_allow_blocking(gfp_mask)) {
0477         unsigned long freed = 0;
0478         blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
0479         if (freed > 0) {
0480             purged = 0;
0481             goto retry;
0482         }
0483     }
0484 
0485     if (printk_ratelimit())
0486         pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
0487             size);
0488     kfree(va);
0489     return ERR_PTR(-EBUSY);
0490 }
0491 
0492 int register_vmap_purge_notifier(struct notifier_block *nb)
0493 {
0494     return blocking_notifier_chain_register(&vmap_notify_list, nb);
0495 }
0496 EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
0497 
0498 int unregister_vmap_purge_notifier(struct notifier_block *nb)
0499 {
0500     return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
0501 }
0502 EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
0503 
0504 static void __free_vmap_area(struct vmap_area *va)
0505 {
0506     BUG_ON(RB_EMPTY_NODE(&va->rb_node));
0507 
0508     if (free_vmap_cache) {
0509         if (va->va_end < cached_vstart) {
0510             free_vmap_cache = NULL;
0511         } else {
0512             struct vmap_area *cache;
0513             cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
0514             if (va->va_start <= cache->va_start) {
0515                 free_vmap_cache = rb_prev(&va->rb_node);
0516                 /*
0517                  * We don't try to update cached_hole_size or
0518                  * cached_align, but it won't go very wrong.
0519                  */
0520             }
0521         }
0522     }
0523     rb_erase(&va->rb_node, &vmap_area_root);
0524     RB_CLEAR_NODE(&va->rb_node);
0525     list_del_rcu(&va->list);
0526 
0527     /*
0528      * Track the highest possible candidate for pcpu area
0529      * allocation.  Areas outside of vmalloc area can be returned
0530      * here too, consider only end addresses which fall inside
0531      * vmalloc area proper.
0532      */
0533     if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
0534         vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
0535 
0536     kfree_rcu(va, rcu_head);
0537 }
0538 
0539 /*
0540  * Free a region of KVA allocated by alloc_vmap_area
0541  */
0542 static void free_vmap_area(struct vmap_area *va)
0543 {
0544     spin_lock(&vmap_area_lock);
0545     __free_vmap_area(va);
0546     spin_unlock(&vmap_area_lock);
0547 }
0548 
0549 /*
0550  * Clear the pagetable entries of a given vmap_area
0551  */
0552 static void unmap_vmap_area(struct vmap_area *va)
0553 {
0554     vunmap_page_range(va->va_start, va->va_end);
0555 }
0556 
0557 static void vmap_debug_free_range(unsigned long start, unsigned long end)
0558 {
0559     /*
0560      * Unmap page tables and force a TLB flush immediately if pagealloc
0561      * debugging is enabled.  This catches use after free bugs similarly to
0562      * those in linear kernel virtual address space after a page has been
0563      * freed.
0564      *
0565      * All the lazy freeing logic is still retained, in order to minimise
0566      * intrusiveness of this debugging feature.
0567      *
0568      * This is going to be *slow* (linear kernel virtual address debugging
0569      * doesn't do a broadcast TLB flush so it is a lot faster).
0570      */
0571     if (debug_pagealloc_enabled()) {
0572         vunmap_page_range(start, end);
0573         flush_tlb_kernel_range(start, end);
0574     }
0575 }
0576 
0577 /*
0578  * lazy_max_pages is the maximum amount of virtual address space we gather up
0579  * before attempting to purge with a TLB flush.
0580  *
0581  * There is a tradeoff here: a larger number will cover more kernel page tables
0582  * and take slightly longer to purge, but it will linearly reduce the number of
0583  * global TLB flushes that must be performed. It would seem natural to scale
0584  * this number up linearly with the number of CPUs (because vmapping activity
0585  * could also scale linearly with the number of CPUs), however it is likely
0586  * that in practice, workloads might be constrained in other ways that mean
0587  * vmap activity will not scale linearly with CPUs. Also, I want to be
0588  * conservative and not introduce a big latency on huge systems, so go with
0589  * a less aggressive log scale. It will still be an improvement over the old
0590  * code, and it will be simple to change the scale factor if we find that it
0591  * becomes a problem on bigger systems.
0592  */
0593 static unsigned long lazy_max_pages(void)
0594 {
0595     unsigned int log;
0596 
0597     log = fls(num_online_cpus());
0598 
0599     return log * (32UL * 1024 * 1024 / PAGE_SIZE);
0600 }
0601 
0602 static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
0603 
0604 /*
0605  * Serialize vmap purging.  There is no actual criticial section protected
0606  * by this look, but we want to avoid concurrent calls for performance
0607  * reasons and to make the pcpu_get_vm_areas more deterministic.
0608  */
0609 static DEFINE_MUTEX(vmap_purge_lock);
0610 
0611 /* for per-CPU blocks */
0612 static void purge_fragmented_blocks_allcpus(void);
0613 
0614 /*
0615  * called before a call to iounmap() if the caller wants vm_area_struct's
0616  * immediately freed.
0617  */
0618 void set_iounmap_nonlazy(void)
0619 {
0620     atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
0621 }
0622 
0623 /*
0624  * Purges all lazily-freed vmap areas.
0625  */
0626 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
0627 {
0628     struct llist_node *valist;
0629     struct vmap_area *va;
0630     struct vmap_area *n_va;
0631     bool do_free = false;
0632 
0633     lockdep_assert_held(&vmap_purge_lock);
0634 
0635     valist = llist_del_all(&vmap_purge_list);
0636     llist_for_each_entry(va, valist, purge_list) {
0637         if (va->va_start < start)
0638             start = va->va_start;
0639         if (va->va_end > end)
0640             end = va->va_end;
0641         do_free = true;
0642     }
0643 
0644     if (!do_free)
0645         return false;
0646 
0647     flush_tlb_kernel_range(start, end);
0648 
0649     spin_lock(&vmap_area_lock);
0650     llist_for_each_entry_safe(va, n_va, valist, purge_list) {
0651         int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
0652 
0653         __free_vmap_area(va);
0654         atomic_sub(nr, &vmap_lazy_nr);
0655         cond_resched_lock(&vmap_area_lock);
0656     }
0657     spin_unlock(&vmap_area_lock);
0658     return true;
0659 }
0660 
0661 /*
0662  * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
0663  * is already purging.
0664  */
0665 static void try_purge_vmap_area_lazy(void)
0666 {
0667     if (mutex_trylock(&vmap_purge_lock)) {
0668         __purge_vmap_area_lazy(ULONG_MAX, 0);
0669         mutex_unlock(&vmap_purge_lock);
0670     }
0671 }
0672 
0673 /*
0674  * Kick off a purge of the outstanding lazy areas.
0675  */
0676 static void purge_vmap_area_lazy(void)
0677 {
0678     mutex_lock(&vmap_purge_lock);
0679     purge_fragmented_blocks_allcpus();
0680     __purge_vmap_area_lazy(ULONG_MAX, 0);
0681     mutex_unlock(&vmap_purge_lock);
0682 }
0683 
0684 /*
0685  * Free a vmap area, caller ensuring that the area has been unmapped
0686  * and flush_cache_vunmap had been called for the correct range
0687  * previously.
0688  */
0689 static void free_vmap_area_noflush(struct vmap_area *va)
0690 {
0691     int nr_lazy;
0692 
0693     nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
0694                     &vmap_lazy_nr);
0695 
0696     /* After this point, we may free va at any time */
0697     llist_add(&va->purge_list, &vmap_purge_list);
0698 
0699     if (unlikely(nr_lazy > lazy_max_pages()))
0700         try_purge_vmap_area_lazy();
0701 }
0702 
0703 /*
0704  * Free and unmap a vmap area
0705  */
0706 static void free_unmap_vmap_area(struct vmap_area *va)
0707 {
0708     flush_cache_vunmap(va->va_start, va->va_end);
0709     unmap_vmap_area(va);
0710     free_vmap_area_noflush(va);
0711 }
0712 
0713 static struct vmap_area *find_vmap_area(unsigned long addr)
0714 {
0715     struct vmap_area *va;
0716 
0717     spin_lock(&vmap_area_lock);
0718     va = __find_vmap_area(addr);
0719     spin_unlock(&vmap_area_lock);
0720 
0721     return va;
0722 }
0723 
0724 /*** Per cpu kva allocator ***/
0725 
0726 /*
0727  * vmap space is limited especially on 32 bit architectures. Ensure there is
0728  * room for at least 16 percpu vmap blocks per CPU.
0729  */
0730 /*
0731  * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
0732  * to #define VMALLOC_SPACE     (VMALLOC_END-VMALLOC_START). Guess
0733  * instead (we just need a rough idea)
0734  */
0735 #if BITS_PER_LONG == 32
0736 #define VMALLOC_SPACE       (128UL*1024*1024)
0737 #else
0738 #define VMALLOC_SPACE       (128UL*1024*1024*1024)
0739 #endif
0740 
0741 #define VMALLOC_PAGES       (VMALLOC_SPACE / PAGE_SIZE)
0742 #define VMAP_MAX_ALLOC      BITS_PER_LONG   /* 256K with 4K pages */
0743 #define VMAP_BBMAP_BITS_MAX 1024    /* 4MB with 4K pages */
0744 #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
0745 #define VMAP_MIN(x, y)      ((x) < (y) ? (x) : (y)) /* can't use min() */
0746 #define VMAP_MAX(x, y)      ((x) > (y) ? (x) : (y)) /* can't use max() */
0747 #define VMAP_BBMAP_BITS     \
0748         VMAP_MIN(VMAP_BBMAP_BITS_MAX,   \
0749         VMAP_MAX(VMAP_BBMAP_BITS_MIN,   \
0750             VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
0751 
0752 #define VMAP_BLOCK_SIZE     (VMAP_BBMAP_BITS * PAGE_SIZE)
0753 
0754 static bool vmap_initialized __read_mostly = false;
0755 
0756 struct vmap_block_queue {
0757     spinlock_t lock;
0758     struct list_head free;
0759 };
0760 
0761 struct vmap_block {
0762     spinlock_t lock;
0763     struct vmap_area *va;
0764     unsigned long free, dirty;
0765     unsigned long dirty_min, dirty_max; /*< dirty range */
0766     struct list_head free_list;
0767     struct rcu_head rcu_head;
0768     struct list_head purge;
0769 };
0770 
0771 /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
0772 static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
0773 
0774 /*
0775  * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
0776  * in the free path. Could get rid of this if we change the API to return a
0777  * "cookie" from alloc, to be passed to free. But no big deal yet.
0778  */
0779 static DEFINE_SPINLOCK(vmap_block_tree_lock);
0780 static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
0781 
0782 /*
0783  * We should probably have a fallback mechanism to allocate virtual memory
0784  * out of partially filled vmap blocks. However vmap block sizing should be
0785  * fairly reasonable according to the vmalloc size, so it shouldn't be a
0786  * big problem.
0787  */
0788 
0789 static unsigned long addr_to_vb_idx(unsigned long addr)
0790 {
0791     addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
0792     addr /= VMAP_BLOCK_SIZE;
0793     return addr;
0794 }
0795 
0796 static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
0797 {
0798     unsigned long addr;
0799 
0800     addr = va_start + (pages_off << PAGE_SHIFT);
0801     BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
0802     return (void *)addr;
0803 }
0804 
0805 /**
0806  * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
0807  *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
0808  * @order:    how many 2^order pages should be occupied in newly allocated block
0809  * @gfp_mask: flags for the page level allocator
0810  *
0811  * Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
0812  */
0813 static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
0814 {
0815     struct vmap_block_queue *vbq;
0816     struct vmap_block *vb;
0817     struct vmap_area *va;
0818     unsigned long vb_idx;
0819     int node, err;
0820     void *vaddr;
0821 
0822     node = numa_node_id();
0823 
0824     vb = kmalloc_node(sizeof(struct vmap_block),
0825             gfp_mask & GFP_RECLAIM_MASK, node);
0826     if (unlikely(!vb))
0827         return ERR_PTR(-ENOMEM);
0828 
0829     va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
0830                     VMALLOC_START, VMALLOC_END,
0831                     node, gfp_mask);
0832     if (IS_ERR(va)) {
0833         kfree(vb);
0834         return ERR_CAST(va);
0835     }
0836 
0837     err = radix_tree_preload(gfp_mask);
0838     if (unlikely(err)) {
0839         kfree(vb);
0840         free_vmap_area(va);
0841         return ERR_PTR(err);
0842     }
0843 
0844     vaddr = vmap_block_vaddr(va->va_start, 0);
0845     spin_lock_init(&vb->lock);
0846     vb->va = va;
0847     /* At least something should be left free */
0848     BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
0849     vb->free = VMAP_BBMAP_BITS - (1UL << order);
0850     vb->dirty = 0;
0851     vb->dirty_min = VMAP_BBMAP_BITS;
0852     vb->dirty_max = 0;
0853     INIT_LIST_HEAD(&vb->free_list);
0854 
0855     vb_idx = addr_to_vb_idx(va->va_start);
0856     spin_lock(&vmap_block_tree_lock);
0857     err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
0858     spin_unlock(&vmap_block_tree_lock);
0859     BUG_ON(err);
0860     radix_tree_preload_end();
0861 
0862     vbq = &get_cpu_var(vmap_block_queue);
0863     spin_lock(&vbq->lock);
0864     list_add_tail_rcu(&vb->free_list, &vbq->free);
0865     spin_unlock(&vbq->lock);
0866     put_cpu_var(vmap_block_queue);
0867 
0868     return vaddr;
0869 }
0870 
0871 static void free_vmap_block(struct vmap_block *vb)
0872 {
0873     struct vmap_block *tmp;
0874     unsigned long vb_idx;
0875 
0876     vb_idx = addr_to_vb_idx(vb->va->va_start);
0877     spin_lock(&vmap_block_tree_lock);
0878     tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
0879     spin_unlock(&vmap_block_tree_lock);
0880     BUG_ON(tmp != vb);
0881 
0882     free_vmap_area_noflush(vb->va);
0883     kfree_rcu(vb, rcu_head);
0884 }
0885 
0886 static void purge_fragmented_blocks(int cpu)
0887 {
0888     LIST_HEAD(purge);
0889     struct vmap_block *vb;
0890     struct vmap_block *n_vb;
0891     struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
0892 
0893     rcu_read_lock();
0894     list_for_each_entry_rcu(vb, &vbq->free, free_list) {
0895 
0896         if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
0897             continue;
0898 
0899         spin_lock(&vb->lock);
0900         if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
0901             vb->free = 0; /* prevent further allocs after releasing lock */
0902             vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
0903             vb->dirty_min = 0;
0904             vb->dirty_max = VMAP_BBMAP_BITS;
0905             spin_lock(&vbq->lock);
0906             list_del_rcu(&vb->free_list);
0907             spin_unlock(&vbq->lock);
0908             spin_unlock(&vb->lock);
0909             list_add_tail(&vb->purge, &purge);
0910         } else
0911             spin_unlock(&vb->lock);
0912     }
0913     rcu_read_unlock();
0914 
0915     list_for_each_entry_safe(vb, n_vb, &purge, purge) {
0916         list_del(&vb->purge);
0917         free_vmap_block(vb);
0918     }
0919 }
0920 
0921 static void purge_fragmented_blocks_allcpus(void)
0922 {
0923     int cpu;
0924 
0925     for_each_possible_cpu(cpu)
0926         purge_fragmented_blocks(cpu);
0927 }
0928 
0929 static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
0930 {
0931     struct vmap_block_queue *vbq;
0932     struct vmap_block *vb;
0933     void *vaddr = NULL;
0934     unsigned int order;
0935 
0936     BUG_ON(offset_in_page(size));
0937     BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
0938     if (WARN_ON(size == 0)) {
0939         /*
0940          * Allocating 0 bytes isn't what caller wants since
0941          * get_order(0) returns funny result. Just warn and terminate
0942          * early.
0943          */
0944         return NULL;
0945     }
0946     order = get_order(size);
0947 
0948     rcu_read_lock();
0949     vbq = &get_cpu_var(vmap_block_queue);
0950     list_for_each_entry_rcu(vb, &vbq->free, free_list) {
0951         unsigned long pages_off;
0952 
0953         spin_lock(&vb->lock);
0954         if (vb->free < (1UL << order)) {
0955             spin_unlock(&vb->lock);
0956             continue;
0957         }
0958 
0959         pages_off = VMAP_BBMAP_BITS - vb->free;
0960         vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
0961         vb->free -= 1UL << order;
0962         if (vb->free == 0) {
0963             spin_lock(&vbq->lock);
0964             list_del_rcu(&vb->free_list);
0965             spin_unlock(&vbq->lock);
0966         }
0967 
0968         spin_unlock(&vb->lock);
0969         break;
0970     }
0971 
0972     put_cpu_var(vmap_block_queue);
0973     rcu_read_unlock();
0974 
0975     /* Allocate new block if nothing was found */
0976     if (!vaddr)
0977         vaddr = new_vmap_block(order, gfp_mask);
0978 
0979     return vaddr;
0980 }
0981 
0982 static void vb_free(const void *addr, unsigned long size)
0983 {
0984     unsigned long offset;
0985     unsigned long vb_idx;
0986     unsigned int order;
0987     struct vmap_block *vb;
0988 
0989     BUG_ON(offset_in_page(size));
0990     BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
0991 
0992     flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
0993 
0994     order = get_order(size);
0995 
0996     offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
0997     offset >>= PAGE_SHIFT;
0998 
0999     vb_idx = addr_to_vb_idx((unsigned long)addr);
1000     rcu_read_lock();
1001     vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
1002     rcu_read_unlock();
1003     BUG_ON(!vb);
1004 
1005     vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
1006 
1007     spin_lock(&vb->lock);
1008 
1009     /* Expand dirty range */
1010     vb->dirty_min = min(vb->dirty_min, offset);
1011     vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
1012 
1013     vb->dirty += 1UL << order;
1014     if (vb->dirty == VMAP_BBMAP_BITS) {
1015         BUG_ON(vb->free);
1016         spin_unlock(&vb->lock);
1017         free_vmap_block(vb);
1018     } else
1019         spin_unlock(&vb->lock);
1020 }
1021 
1022 /**
1023  * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
1024  *
1025  * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
1026  * to amortize TLB flushing overheads. What this means is that any page you
1027  * have now, may, in a former life, have been mapped into kernel virtual
1028  * address by the vmap layer and so there might be some CPUs with TLB entries
1029  * still referencing that page (additional to the regular 1:1 kernel mapping).
1030  *
1031  * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
1032  * be sure that none of the pages we have control over will have any aliases
1033  * from the vmap layer.
1034  */
1035 void vm_unmap_aliases(void)
1036 {
1037     unsigned long start = ULONG_MAX, end = 0;
1038     int cpu;
1039     int flush = 0;
1040 
1041     if (unlikely(!vmap_initialized))
1042         return;
1043 
1044     might_sleep();
1045 
1046     for_each_possible_cpu(cpu) {
1047         struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1048         struct vmap_block *vb;
1049 
1050         rcu_read_lock();
1051         list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1052             spin_lock(&vb->lock);
1053             if (vb->dirty) {
1054                 unsigned long va_start = vb->va->va_start;
1055                 unsigned long s, e;
1056 
1057                 s = va_start + (vb->dirty_min << PAGE_SHIFT);
1058                 e = va_start + (vb->dirty_max << PAGE_SHIFT);
1059 
1060                 start = min(s, start);
1061                 end   = max(e, end);
1062 
1063                 flush = 1;
1064             }
1065             spin_unlock(&vb->lock);
1066         }
1067         rcu_read_unlock();
1068     }
1069 
1070     mutex_lock(&vmap_purge_lock);
1071     purge_fragmented_blocks_allcpus();
1072     if (!__purge_vmap_area_lazy(start, end) && flush)
1073         flush_tlb_kernel_range(start, end);
1074     mutex_unlock(&vmap_purge_lock);
1075 }
1076 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1077 
1078 /**
1079  * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
1080  * @mem: the pointer returned by vm_map_ram
1081  * @count: the count passed to that vm_map_ram call (cannot unmap partial)
1082  */
1083 void vm_unmap_ram(const void *mem, unsigned int count)
1084 {
1085     unsigned long size = (unsigned long)count << PAGE_SHIFT;
1086     unsigned long addr = (unsigned long)mem;
1087     struct vmap_area *va;
1088 
1089     might_sleep();
1090     BUG_ON(!addr);
1091     BUG_ON(addr < VMALLOC_START);
1092     BUG_ON(addr > VMALLOC_END);
1093     BUG_ON(!PAGE_ALIGNED(addr));
1094 
1095     debug_check_no_locks_freed(mem, size);
1096     vmap_debug_free_range(addr, addr+size);
1097 
1098     if (likely(count <= VMAP_MAX_ALLOC)) {
1099         vb_free(mem, size);
1100         return;
1101     }
1102 
1103     va = find_vmap_area(addr);
1104     BUG_ON(!va);
1105     free_unmap_vmap_area(va);
1106 }
1107 EXPORT_SYMBOL(vm_unmap_ram);
1108 
1109 /**
1110  * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
1111  * @pages: an array of pointers to the pages to be mapped
1112  * @count: number of pages
1113  * @node: prefer to allocate data structures on this node
1114  * @prot: memory protection to use. PAGE_KERNEL for regular RAM
1115  *
1116  * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
1117  * faster than vmap so it's good.  But if you mix long-life and short-life
1118  * objects with vm_map_ram(), it could consume lots of address space through
1119  * fragmentation (especially on a 32bit machine).  You could see failures in
1120  * the end.  Please use this function for short-lived objects.
1121  *
1122  * Returns: a pointer to the address that has been mapped, or %NULL on failure
1123  */
1124 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1125 {
1126     unsigned long size = (unsigned long)count << PAGE_SHIFT;
1127     unsigned long addr;
1128     void *mem;
1129 
1130     if (likely(count <= VMAP_MAX_ALLOC)) {
1131         mem = vb_alloc(size, GFP_KERNEL);
1132         if (IS_ERR(mem))
1133             return NULL;
1134         addr = (unsigned long)mem;
1135     } else {
1136         struct vmap_area *va;
1137         va = alloc_vmap_area(size, PAGE_SIZE,
1138                 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1139         if (IS_ERR(va))
1140             return NULL;
1141 
1142         addr = va->va_start;
1143         mem = (void *)addr;
1144     }
1145     if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1146         vm_unmap_ram(mem, count);
1147         return NULL;
1148     }
1149     return mem;
1150 }
1151 EXPORT_SYMBOL(vm_map_ram);
1152 
1153 static struct vm_struct *vmlist __initdata;
1154 /**
1155  * vm_area_add_early - add vmap area early during boot
1156  * @vm: vm_struct to add
1157  *
1158  * This function is used to add fixed kernel vm area to vmlist before
1159  * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
1160  * should contain proper values and the other fields should be zero.
1161  *
1162  * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
1163  */
1164 void __init vm_area_add_early(struct vm_struct *vm)
1165 {
1166     struct vm_struct *tmp, **p;
1167 
1168     BUG_ON(vmap_initialized);
1169     for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1170         if (tmp->addr >= vm->addr) {
1171             BUG_ON(tmp->addr < vm->addr + vm->size);
1172             break;
1173         } else
1174             BUG_ON(tmp->addr + tmp->size > vm->addr);
1175     }
1176     vm->next = *p;
1177     *p = vm;
1178 }
1179 
1180 /**
1181  * vm_area_register_early - register vmap area early during boot
1182  * @vm: vm_struct to register
1183  * @align: requested alignment
1184  *
1185  * This function is used to register kernel vm area before
1186  * vmalloc_init() is called.  @vm->size and @vm->flags should contain
1187  * proper values on entry and other fields should be zero.  On return,
1188  * vm->addr contains the allocated address.
1189  *
1190  * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
1191  */
1192 void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1193 {
1194     static size_t vm_init_off __initdata;
1195     unsigned long addr;
1196 
1197     addr = ALIGN(VMALLOC_START + vm_init_off, align);
1198     vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1199 
1200     vm->addr = (void *)addr;
1201 
1202     vm_area_add_early(vm);
1203 }
1204 
1205 void __init vmalloc_init(void)
1206 {
1207     struct vmap_area *va;
1208     struct vm_struct *tmp;
1209     int i;
1210 
1211     for_each_possible_cpu(i) {
1212         struct vmap_block_queue *vbq;
1213         struct vfree_deferred *p;
1214 
1215         vbq = &per_cpu(vmap_block_queue, i);
1216         spin_lock_init(&vbq->lock);
1217         INIT_LIST_HEAD(&vbq->free);
1218         p = &per_cpu(vfree_deferred, i);
1219         init_llist_head(&p->list);
1220         INIT_WORK(&p->wq, free_work);
1221     }
1222 
1223     /* Import existing vmlist entries. */
1224     for (tmp = vmlist; tmp; tmp = tmp->next) {
1225         va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1226         va->flags = VM_VM_AREA;
1227         va->va_start = (unsigned long)tmp->addr;
1228         va->va_end = va->va_start + tmp->size;
1229         va->vm = tmp;
1230         __insert_vmap_area(va);
1231     }
1232 
1233     vmap_area_pcpu_hole = VMALLOC_END;
1234 
1235     vmap_initialized = true;
1236 }
1237 
1238 /**
1239  * map_kernel_range_noflush - map kernel VM area with the specified pages
1240  * @addr: start of the VM area to map
1241  * @size: size of the VM area to map
1242  * @prot: page protection flags to use
1243  * @pages: pages to map
1244  *
1245  * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size
1246  * specify should have been allocated using get_vm_area() and its
1247  * friends.
1248  *
1249  * NOTE:
1250  * This function does NOT do any cache flushing.  The caller is
1251  * responsible for calling flush_cache_vmap() on to-be-mapped areas
1252  * before calling this function.
1253  *
1254  * RETURNS:
1255  * The number of pages mapped on success, -errno on failure.
1256  */
1257 int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1258                  pgprot_t prot, struct page **pages)
1259 {
1260     return vmap_page_range_noflush(addr, addr + size, prot, pages);
1261 }
1262 
1263 /**
1264  * unmap_kernel_range_noflush - unmap kernel VM area
1265  * @addr: start of the VM area to unmap
1266  * @size: size of the VM area to unmap
1267  *
1268  * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size
1269  * specify should have been allocated using get_vm_area() and its
1270  * friends.
1271  *
1272  * NOTE:
1273  * This function does NOT do any cache flushing.  The caller is
1274  * responsible for calling flush_cache_vunmap() on to-be-mapped areas
1275  * before calling this function and flush_tlb_kernel_range() after.
1276  */
1277 void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1278 {
1279     vunmap_page_range(addr, addr + size);
1280 }
1281 EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1282 
1283 /**
1284  * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
1285  * @addr: start of the VM area to unmap
1286  * @size: size of the VM area to unmap
1287  *
1288  * Similar to unmap_kernel_range_noflush() but flushes vcache before
1289  * the unmapping and tlb after.
1290  */
1291 void unmap_kernel_range(unsigned long addr, unsigned long size)
1292 {
1293     unsigned long end = addr + size;
1294 
1295     flush_cache_vunmap(addr, end);
1296     vunmap_page_range(addr, end);
1297     flush_tlb_kernel_range(addr, end);
1298 }
1299 EXPORT_SYMBOL_GPL(unmap_kernel_range);
1300 
1301 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
1302 {
1303     unsigned long addr = (unsigned long)area->addr;
1304     unsigned long end = addr + get_vm_area_size(area);
1305     int err;
1306 
1307     err = vmap_page_range(addr, end, prot, pages);
1308 
1309     return err > 0 ? 0 : err;
1310 }
1311 EXPORT_SYMBOL_GPL(map_vm_area);
1312 
1313 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1314                   unsigned long flags, const void *caller)
1315 {
1316     spin_lock(&vmap_area_lock);
1317     vm->flags = flags;
1318     vm->addr = (void *)va->va_start;
1319     vm->size = va->va_end - va->va_start;
1320     vm->caller = caller;
1321     va->vm = vm;
1322     va->flags |= VM_VM_AREA;
1323     spin_unlock(&vmap_area_lock);
1324 }
1325 
1326 static void clear_vm_uninitialized_flag(struct vm_struct *vm)
1327 {
1328     /*
1329      * Before removing VM_UNINITIALIZED,
1330      * we should make sure that vm has proper values.
1331      * Pair with smp_rmb() in show_numa_info().
1332      */
1333     smp_wmb();
1334     vm->flags &= ~VM_UNINITIALIZED;
1335 }
1336 
1337 static struct vm_struct *__get_vm_area_node(unsigned long size,
1338         unsigned long align, unsigned long flags, unsigned long start,
1339         unsigned long end, int node, gfp_t gfp_mask, const void *caller)
1340 {
1341     struct vmap_area *va;
1342     struct vm_struct *area;
1343 
1344     BUG_ON(in_interrupt());
1345     size = PAGE_ALIGN(size);
1346     if (unlikely(!size))
1347         return NULL;
1348 
1349     if (flags & VM_IOREMAP)
1350         align = 1ul << clamp_t(int, get_count_order_long(size),
1351                        PAGE_SHIFT, IOREMAP_MAX_ORDER);
1352 
1353     area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1354     if (unlikely(!area))
1355         return NULL;
1356 
1357     if (!(flags & VM_NO_GUARD))
1358         size += PAGE_SIZE;
1359 
1360     va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1361     if (IS_ERR(va)) {
1362         kfree(area);
1363         return NULL;
1364     }
1365 
1366     setup_vmalloc_vm(area, va, flags, caller);
1367 
1368     return area;
1369 }
1370 
1371 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1372                 unsigned long start, unsigned long end)
1373 {
1374     return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1375                   GFP_KERNEL, __builtin_return_address(0));
1376 }
1377 EXPORT_SYMBOL_GPL(__get_vm_area);
1378 
1379 struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1380                        unsigned long start, unsigned long end,
1381                        const void *caller)
1382 {
1383     return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1384                   GFP_KERNEL, caller);
1385 }
1386 
1387 /**
1388  *  get_vm_area  -  reserve a contiguous kernel virtual area
1389  *  @size:      size of the area
1390  *  @flags:     %VM_IOREMAP for I/O mappings or VM_ALLOC
1391  *
1392  *  Search an area of @size in the kernel virtual mapping area,
1393  *  and reserved it for out purposes.  Returns the area descriptor
1394  *  on success or %NULL on failure.
1395  */
1396 struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1397 {
1398     return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1399                   NUMA_NO_NODE, GFP_KERNEL,
1400                   __builtin_return_address(0));
1401 }
1402 
1403 struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1404                 const void *caller)
1405 {
1406     return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1407                   NUMA_NO_NODE, GFP_KERNEL, caller);
1408 }
1409 
1410 /**
1411  *  find_vm_area  -  find a continuous kernel virtual area
1412  *  @addr:      base address
1413  *
1414  *  Search for the kernel VM area starting at @addr, and return it.
1415  *  It is up to the caller to do all required locking to keep the returned
1416  *  pointer valid.
1417  */
1418 struct vm_struct *find_vm_area(const void *addr)
1419 {
1420     struct vmap_area *va;
1421 
1422     va = find_vmap_area((unsigned long)addr);
1423     if (va && va->flags & VM_VM_AREA)
1424         return va->vm;
1425 
1426     return NULL;
1427 }
1428 
1429 /**
1430  *  remove_vm_area  -  find and remove a continuous kernel virtual area
1431  *  @addr:      base address
1432  *
1433  *  Search for the kernel VM area starting at @addr, and remove it.
1434  *  This function returns the found VM area, but using it is NOT safe
1435  *  on SMP machines, except for its size or flags.
1436  */
1437 struct vm_struct *remove_vm_area(const void *addr)
1438 {
1439     struct vmap_area *va;
1440 
1441     might_sleep();
1442 
1443     va = find_vmap_area((unsigned long)addr);
1444     if (va && va->flags & VM_VM_AREA) {
1445         struct vm_struct *vm = va->vm;
1446 
1447         spin_lock(&vmap_area_lock);
1448         va->vm = NULL;
1449         va->flags &= ~VM_VM_AREA;
1450         spin_unlock(&vmap_area_lock);
1451 
1452         vmap_debug_free_range(va->va_start, va->va_end);
1453         kasan_free_shadow(vm);
1454         free_unmap_vmap_area(va);
1455 
1456         return vm;
1457     }
1458     return NULL;
1459 }
1460 
1461 static void __vunmap(const void *addr, int deallocate_pages)
1462 {
1463     struct vm_struct *area;
1464 
1465     if (!addr)
1466         return;
1467 
1468     if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
1469             addr))
1470         return;
1471 
1472     area = remove_vm_area(addr);
1473     if (unlikely(!area)) {
1474         WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1475                 addr);
1476         return;
1477     }
1478 
1479     debug_check_no_locks_freed(addr, get_vm_area_size(area));
1480     debug_check_no_obj_freed(addr, get_vm_area_size(area));
1481 
1482     if (deallocate_pages) {
1483         int i;
1484 
1485         for (i = 0; i < area->nr_pages; i++) {
1486             struct page *page = area->pages[i];
1487 
1488             BUG_ON(!page);
1489             __free_pages(page, 0);
1490         }
1491 
1492         kvfree(area->pages);
1493     }
1494 
1495     kfree(area);
1496     return;
1497 }
1498 
1499 static inline void __vfree_deferred(const void *addr)
1500 {
1501     /*
1502      * Use raw_cpu_ptr() because this can be called from preemptible
1503      * context. Preemption is absolutely fine here, because the llist_add()
1504      * implementation is lockless, so it works even if we are adding to
1505      * nother cpu's list.  schedule_work() should be fine with this too.
1506      */
1507     struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
1508 
1509     if (llist_add((struct llist_node *)addr, &p->list))
1510         schedule_work(&p->wq);
1511 }
1512 
1513 /**
1514  *  vfree_atomic  -  release memory allocated by vmalloc()
1515  *  @addr:      memory base address
1516  *
1517  *  This one is just like vfree() but can be called in any atomic context
1518  *  except NMIs.
1519  */
1520 void vfree_atomic(const void *addr)
1521 {
1522     BUG_ON(in_nmi());
1523 
1524     kmemleak_free(addr);
1525 
1526     if (!addr)
1527         return;
1528     __vfree_deferred(addr);
1529 }
1530 
1531 /**
1532  *  vfree  -  release memory allocated by vmalloc()
1533  *  @addr:      memory base address
1534  *
1535  *  Free the virtually continuous memory area starting at @addr, as
1536  *  obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
1537  *  NULL, no operation is performed.
1538  *
1539  *  Must not be called in NMI context (strictly speaking, only if we don't
1540  *  have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
1541  *  conventions for vfree() arch-depenedent would be a really bad idea)
1542  *
1543  *  NOTE: assumes that the object at *addr has a size >= sizeof(llist_node)
1544  */
1545 void vfree(const void *addr)
1546 {
1547     BUG_ON(in_nmi());
1548 
1549     kmemleak_free(addr);
1550 
1551     if (!addr)
1552         return;
1553     if (unlikely(in_interrupt()))
1554         __vfree_deferred(addr);
1555     else
1556         __vunmap(addr, 1);
1557 }
1558 EXPORT_SYMBOL(vfree);
1559 
1560 /**
1561  *  vunmap  -  release virtual mapping obtained by vmap()
1562  *  @addr:      memory base address
1563  *
1564  *  Free the virtually contiguous memory area starting at @addr,
1565  *  which was created from the page array passed to vmap().
1566  *
1567  *  Must not be called in interrupt context.
1568  */
1569 void vunmap(const void *addr)
1570 {
1571     BUG_ON(in_interrupt());
1572     might_sleep();
1573     if (addr)
1574         __vunmap(addr, 0);
1575 }
1576 EXPORT_SYMBOL(vunmap);
1577 
1578 /**
1579  *  vmap  -  map an array of pages into virtually contiguous space
1580  *  @pages:     array of page pointers
1581  *  @count:     number of pages to map
1582  *  @flags:     vm_area->flags
1583  *  @prot:      page protection for the mapping
1584  *
1585  *  Maps @count pages from @pages into contiguous kernel virtual
1586  *  space.
1587  */
1588 void *vmap(struct page **pages, unsigned int count,
1589         unsigned long flags, pgprot_t prot)
1590 {
1591     struct vm_struct *area;
1592     unsigned long size;     /* In bytes */
1593 
1594     might_sleep();
1595 
1596     if (count > totalram_pages)
1597         return NULL;
1598 
1599     size = (unsigned long)count << PAGE_SHIFT;
1600     area = get_vm_area_caller(size, flags, __builtin_return_address(0));
1601     if (!area)
1602         return NULL;
1603 
1604     if (map_vm_area(area, prot, pages)) {
1605         vunmap(area->addr);
1606         return NULL;
1607     }
1608 
1609     return area->addr;
1610 }
1611 EXPORT_SYMBOL(vmap);
1612 
1613 static void *__vmalloc_node(unsigned long size, unsigned long align,
1614                 gfp_t gfp_mask, pgprot_t prot,
1615                 int node, const void *caller);
1616 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1617                  pgprot_t prot, int node)
1618 {
1619     struct page **pages;
1620     unsigned int nr_pages, array_size, i;
1621     const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1622     const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
1623 
1624     nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
1625     array_size = (nr_pages * sizeof(struct page *));
1626 
1627     area->nr_pages = nr_pages;
1628     /* Please note that the recursion is strictly bounded. */
1629     if (array_size > PAGE_SIZE) {
1630         pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1631                 PAGE_KERNEL, node, area->caller);
1632     } else {
1633         pages = kmalloc_node(array_size, nested_gfp, node);
1634     }
1635     area->pages = pages;
1636     if (!area->pages) {
1637         remove_vm_area(area->addr);
1638         kfree(area);
1639         return NULL;
1640     }
1641 
1642     for (i = 0; i < area->nr_pages; i++) {
1643         struct page *page;
1644 
1645         if (node == NUMA_NO_NODE)
1646             page = alloc_page(alloc_mask);
1647         else
1648             page = alloc_pages_node(node, alloc_mask, 0);
1649 
1650         if (unlikely(!page)) {
1651             /* Successfully allocated i pages, free them in __vunmap() */
1652             area->nr_pages = i;
1653             goto fail;
1654         }
1655         area->pages[i] = page;
1656         if (gfpflags_allow_blocking(gfp_mask))
1657             cond_resched();
1658     }
1659 
1660     if (map_vm_area(area, prot, pages))
1661         goto fail;
1662     return area->addr;
1663 
1664 fail:
1665     warn_alloc(gfp_mask,
1666               "vmalloc: allocation failure, allocated %ld of %ld bytes",
1667               (area->nr_pages*PAGE_SIZE), area->size);
1668     vfree(area->addr);
1669     return NULL;
1670 }
1671 
1672 /**
1673  *  __vmalloc_node_range  -  allocate virtually contiguous memory
1674  *  @size:      allocation size
1675  *  @align:     desired alignment
1676  *  @start:     vm area range start
1677  *  @end:       vm area range end
1678  *  @gfp_mask:  flags for the page level allocator
1679  *  @prot:      protection mask for the allocated pages
1680  *  @vm_flags:  additional vm area flags (e.g. %VM_NO_GUARD)
1681  *  @node:      node to use for allocation or NUMA_NO_NODE
1682  *  @caller:    caller's return address
1683  *
1684  *  Allocate enough pages to cover @size from the page level
1685  *  allocator with @gfp_mask flags.  Map them into contiguous
1686  *  kernel virtual space, using a pagetable protection of @prot.
1687  */
1688 void *__vmalloc_node_range(unsigned long size, unsigned long align,
1689             unsigned long start, unsigned long end, gfp_t gfp_mask,
1690             pgprot_t prot, unsigned long vm_flags, int node,
1691             const void *caller)
1692 {
1693     struct vm_struct *area;
1694     void *addr;
1695     unsigned long real_size = size;
1696 
1697     size = PAGE_ALIGN(size);
1698     if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1699         goto fail;
1700 
1701     area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
1702                 vm_flags, start, end, node, gfp_mask, caller);
1703     if (!area)
1704         goto fail;
1705 
1706     addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1707     if (!addr)
1708         return NULL;
1709 
1710     /*
1711      * In this function, newly allocated vm_struct has VM_UNINITIALIZED
1712      * flag. It means that vm_struct is not fully initialized.
1713      * Now, it is fully initialized, so remove this flag here.
1714      */
1715     clear_vm_uninitialized_flag(area);
1716 
1717     /*
1718      * A ref_count = 2 is needed because vm_struct allocated in
1719      * __get_vm_area_node() contains a reference to the virtual address of
1720      * the vmalloc'ed block.
1721      */
1722     kmemleak_alloc(addr, real_size, 2, gfp_mask);
1723 
1724     return addr;
1725 
1726 fail:
1727     warn_alloc(gfp_mask,
1728               "vmalloc: allocation failure: %lu bytes", real_size);
1729     return NULL;
1730 }
1731 
1732 /**
1733  *  __vmalloc_node  -  allocate virtually contiguous memory
1734  *  @size:      allocation size
1735  *  @align:     desired alignment
1736  *  @gfp_mask:  flags for the page level allocator
1737  *  @prot:      protection mask for the allocated pages
1738  *  @node:      node to use for allocation or NUMA_NO_NODE
1739  *  @caller:    caller's return address
1740  *
1741  *  Allocate enough pages to cover @size from the page level
1742  *  allocator with @gfp_mask flags.  Map them into contiguous
1743  *  kernel virtual space, using a pagetable protection of @prot.
1744  */
1745 static void *__vmalloc_node(unsigned long size, unsigned long align,
1746                 gfp_t gfp_mask, pgprot_t prot,
1747                 int node, const void *caller)
1748 {
1749     return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1750                 gfp_mask, prot, 0, node, caller);
1751 }
1752 
1753 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1754 {
1755     return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
1756                 __builtin_return_address(0));
1757 }
1758 EXPORT_SYMBOL(__vmalloc);
1759 
1760 static inline void *__vmalloc_node_flags(unsigned long size,
1761                     int node, gfp_t flags)
1762 {
1763     return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1764                     node, __builtin_return_address(0));
1765 }
1766 
1767 /**
1768  *  vmalloc  -  allocate virtually contiguous memory
1769  *  @size:      allocation size
1770  *  Allocate enough pages to cover @size from the page level
1771  *  allocator and map them into contiguous kernel virtual space.
1772  *
1773  *  For tight control over page level allocator and protection flags
1774  *  use __vmalloc() instead.
1775  */
1776 void *vmalloc(unsigned long size)
1777 {
1778     return __vmalloc_node_flags(size, NUMA_NO_NODE,
1779                     GFP_KERNEL | __GFP_HIGHMEM);
1780 }
1781 EXPORT_SYMBOL(vmalloc);
1782 
1783 /**
1784  *  vzalloc - allocate virtually contiguous memory with zero fill
1785  *  @size:  allocation size
1786  *  Allocate enough pages to cover @size from the page level
1787  *  allocator and map them into contiguous kernel virtual space.
1788  *  The memory allocated is set to zero.
1789  *
1790  *  For tight control over page level allocator and protection flags
1791  *  use __vmalloc() instead.
1792  */
1793 void *vzalloc(unsigned long size)
1794 {
1795     return __vmalloc_node_flags(size, NUMA_NO_NODE,
1796                 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1797 }
1798 EXPORT_SYMBOL(vzalloc);
1799 
1800 /**
1801  * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
1802  * @size: allocation size
1803  *
1804  * The resulting memory area is zeroed so it can be mapped to userspace
1805  * without leaking data.
1806  */
1807 void *vmalloc_user(unsigned long size)
1808 {
1809     struct vm_struct *area;
1810     void *ret;
1811 
1812     ret = __vmalloc_node(size, SHMLBA,
1813                  GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1814                  PAGE_KERNEL, NUMA_NO_NODE,
1815                  __builtin_return_address(0));
1816     if (ret) {
1817         area = find_vm_area(ret);
1818         area->flags |= VM_USERMAP;
1819     }
1820     return ret;
1821 }
1822 EXPORT_SYMBOL(vmalloc_user);
1823 
1824 /**
1825  *  vmalloc_node  -  allocate memory on a specific node
1826  *  @size:      allocation size
1827  *  @node:      numa node
1828  *
1829  *  Allocate enough pages to cover @size from the page level
1830  *  allocator and map them into contiguous kernel virtual space.
1831  *
1832  *  For tight control over page level allocator and protection flags
1833  *  use __vmalloc() instead.
1834  */
1835 void *vmalloc_node(unsigned long size, int node)
1836 {
1837     return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1838                     node, __builtin_return_address(0));
1839 }
1840 EXPORT_SYMBOL(vmalloc_node);
1841 
1842 /**
1843  * vzalloc_node - allocate memory on a specific node with zero fill
1844  * @size:   allocation size
1845  * @node:   numa node
1846  *
1847  * Allocate enough pages to cover @size from the page level
1848  * allocator and map them into contiguous kernel virtual space.
1849  * The memory allocated is set to zero.
1850  *
1851  * For tight control over page level allocator and protection flags
1852  * use __vmalloc_node() instead.
1853  */
1854 void *vzalloc_node(unsigned long size, int node)
1855 {
1856     return __vmalloc_node_flags(size, node,
1857              GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1858 }
1859 EXPORT_SYMBOL(vzalloc_node);
1860 
1861 #ifndef PAGE_KERNEL_EXEC
1862 # define PAGE_KERNEL_EXEC PAGE_KERNEL
1863 #endif
1864 
1865 /**
1866  *  vmalloc_exec  -  allocate virtually contiguous, executable memory
1867  *  @size:      allocation size
1868  *
1869  *  Kernel-internal function to allocate enough pages to cover @size
1870  *  the page level allocator and map them into contiguous and
1871  *  executable kernel virtual space.
1872  *
1873  *  For tight control over page level allocator and protection flags
1874  *  use __vmalloc() instead.
1875  */
1876 
1877 void *vmalloc_exec(unsigned long size)
1878 {
1879     return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1880                   NUMA_NO_NODE, __builtin_return_address(0));
1881 }
1882 
1883 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1884 #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1885 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1886 #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1887 #else
1888 #define GFP_VMALLOC32 GFP_KERNEL
1889 #endif
1890 
1891 /**
1892  *  vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
1893  *  @size:      allocation size
1894  *
1895  *  Allocate enough 32bit PA addressable pages to cover @size from the
1896  *  page level allocator and map them into contiguous kernel virtual space.
1897  */
1898 void *vmalloc_32(unsigned long size)
1899 {
1900     return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1901                   NUMA_NO_NODE, __builtin_return_address(0));
1902 }
1903 EXPORT_SYMBOL(vmalloc_32);
1904 
1905 /**
1906  * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
1907  *  @size:      allocation size
1908  *
1909  * The resulting memory area is 32bit addressable and zeroed so it can be
1910  * mapped to userspace without leaking data.
1911  */
1912 void *vmalloc_32_user(unsigned long size)
1913 {
1914     struct vm_struct *area;
1915     void *ret;
1916 
1917     ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1918                  NUMA_NO_NODE, __builtin_return_address(0));
1919     if (ret) {
1920         area = find_vm_area(ret);
1921         area->flags |= VM_USERMAP;
1922     }
1923     return ret;
1924 }
1925 EXPORT_SYMBOL(vmalloc_32_user);
1926 
1927 /*
1928  * small helper routine , copy contents to buf from addr.
1929  * If the page is not present, fill zero.
1930  */
1931 
1932 static int aligned_vread(char *buf, char *addr, unsigned long count)
1933 {
1934     struct page *p;
1935     int copied = 0;
1936 
1937     while (count) {
1938         unsigned long offset, length;
1939 
1940         offset = offset_in_page(addr);
1941         length = PAGE_SIZE - offset;
1942         if (length > count)
1943             length = count;
1944         p = vmalloc_to_page(addr);
1945         /*
1946          * To do safe access to this _mapped_ area, we need
1947          * lock. But adding lock here means that we need to add
1948          * overhead of vmalloc()/vfree() calles for this _debug_
1949          * interface, rarely used. Instead of that, we'll use
1950          * kmap() and get small overhead in this access function.
1951          */
1952         if (p) {
1953             /*
1954              * we can expect USER0 is not used (see vread/vwrite's
1955              * function description)
1956              */
1957             void *map = kmap_atomic(p);
1958             memcpy(buf, map + offset, length);
1959             kunmap_atomic(map);
1960         } else
1961             memset(buf, 0, length);
1962 
1963         addr += length;
1964         buf += length;
1965         copied += length;
1966         count -= length;
1967     }
1968     return copied;
1969 }
1970 
1971 static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1972 {
1973     struct page *p;
1974     int copied = 0;
1975 
1976     while (count) {
1977         unsigned long offset, length;
1978 
1979         offset = offset_in_page(addr);
1980         length = PAGE_SIZE - offset;
1981         if (length > count)
1982             length = count;
1983         p = vmalloc_to_page(addr);
1984         /*
1985          * To do safe access to this _mapped_ area, we need
1986          * lock. But adding lock here means that we need to add
1987          * overhead of vmalloc()/vfree() calles for this _debug_
1988          * interface, rarely used. Instead of that, we'll use
1989          * kmap() and get small overhead in this access function.
1990          */
1991         if (p) {
1992             /*
1993              * we can expect USER0 is not used (see vread/vwrite's
1994              * function description)
1995              */
1996             void *map = kmap_atomic(p);
1997             memcpy(map + offset, buf, length);
1998             kunmap_atomic(map);
1999         }
2000         addr += length;
2001         buf += length;
2002         copied += length;
2003         count -= length;
2004     }
2005     return copied;
2006 }
2007 
2008 /**
2009  *  vread() -  read vmalloc area in a safe way.
2010  *  @buf:       buffer for reading data
2011  *  @addr:      vm address.
2012  *  @count:     number of bytes to be read.
2013  *
2014  *  Returns # of bytes which addr and buf should be increased.
2015  *  (same number to @count). Returns 0 if [addr...addr+count) doesn't
2016  *  includes any intersect with alive vmalloc area.
2017  *
2018  *  This function checks that addr is a valid vmalloc'ed area, and
2019  *  copy data from that area to a given buffer. If the given memory range
2020  *  of [addr...addr+count) includes some valid address, data is copied to
2021  *  proper area of @buf. If there are memory holes, they'll be zero-filled.
2022  *  IOREMAP area is treated as memory hole and no copy is done.
2023  *
2024  *  If [addr...addr+count) doesn't includes any intersects with alive
2025  *  vm_struct area, returns 0. @buf should be kernel's buffer.
2026  *
2027  *  Note: In usual ops, vread() is never necessary because the caller
2028  *  should know vmalloc() area is valid and can use memcpy().
2029  *  This is for routines which have to access vmalloc area without
2030  *  any informaion, as /dev/kmem.
2031  *
2032  */
2033 
2034 long vread(char *buf, char *addr, unsigned long count)
2035 {
2036     struct vmap_area *va;
2037     struct vm_struct *vm;
2038     char *vaddr, *buf_start = buf;
2039     unsigned long buflen = count;
2040     unsigned long n;
2041 
2042     /* Don't allow overflow */
2043     if ((unsigned long) addr + count < count)
2044         count = -(unsigned long) addr;
2045 
2046     spin_lock(&vmap_area_lock);
2047     list_for_each_entry(va, &vmap_area_list, list) {
2048         if (!count)
2049             break;
2050 
2051         if (!(va->flags & VM_VM_AREA))
2052             continue;
2053 
2054         vm = va->vm;
2055         vaddr = (char *) vm->addr;
2056         if (addr >= vaddr + get_vm_area_size(vm))
2057             continue;
2058         while (addr < vaddr) {
2059             if (count == 0)
2060                 goto finished;
2061             *buf = '\0';
2062             buf++;
2063             addr++;
2064             count--;
2065         }
2066         n = vaddr + get_vm_area_size(vm) - addr;
2067         if (n > count)
2068             n = count;
2069         if (!(vm->flags & VM_IOREMAP))
2070             aligned_vread(buf, addr, n);
2071         else /* IOREMAP area is treated as memory hole */
2072             memset(buf, 0, n);
2073         buf += n;
2074         addr += n;
2075         count -= n;
2076     }
2077 finished:
2078     spin_unlock(&vmap_area_lock);
2079 
2080     if (buf == buf_start)
2081         return 0;
2082     /* zero-fill memory holes */
2083     if (buf != buf_start + buflen)
2084         memset(buf, 0, buflen - (buf - buf_start));
2085 
2086     return buflen;
2087 }
2088 
2089 /**
2090  *  vwrite() -  write vmalloc area in a safe way.
2091  *  @buf:       buffer for source data
2092  *  @addr:      vm address.
2093  *  @count:     number of bytes to be read.
2094  *
2095  *  Returns # of bytes which addr and buf should be incresed.
2096  *  (same number to @count).
2097  *  If [addr...addr+count) doesn't includes any intersect with valid
2098  *  vmalloc area, returns 0.
2099  *
2100  *  This function checks that addr is a valid vmalloc'ed area, and
2101  *  copy data from a buffer to the given addr. If specified range of
2102  *  [addr...addr+count) includes some valid address, data is copied from
2103  *  proper area of @buf. If there are memory holes, no copy to hole.
2104  *  IOREMAP area is treated as memory hole and no copy is done.
2105  *
2106  *  If [addr...addr+count) doesn't includes any intersects with alive
2107  *  vm_struct area, returns 0. @buf should be kernel's buffer.
2108  *
2109  *  Note: In usual ops, vwrite() is never necessary because the caller
2110  *  should know vmalloc() area is valid and can use memcpy().
2111  *  This is for routines which have to access vmalloc area without
2112  *  any informaion, as /dev/kmem.
2113  */
2114 
2115 long vwrite(char *buf, char *addr, unsigned long count)
2116 {
2117     struct vmap_area *va;
2118     struct vm_struct *vm;
2119     char *vaddr;
2120     unsigned long n, buflen;
2121     int copied = 0;
2122 
2123     /* Don't allow overflow */
2124     if ((unsigned long) addr + count < count)
2125         count = -(unsigned long) addr;
2126     buflen = count;
2127 
2128     spin_lock(&vmap_area_lock);
2129     list_for_each_entry(va, &vmap_area_list, list) {
2130         if (!count)
2131             break;
2132 
2133         if (!(va->flags & VM_VM_AREA))
2134             continue;
2135 
2136         vm = va->vm;
2137         vaddr = (char *) vm->addr;
2138         if (addr >= vaddr + get_vm_area_size(vm))
2139             continue;
2140         while (addr < vaddr) {
2141             if (count == 0)
2142                 goto finished;
2143             buf++;
2144             addr++;
2145             count--;
2146         }
2147         n = vaddr + get_vm_area_size(vm) - addr;
2148         if (n > count)
2149             n = count;
2150         if (!(vm->flags & VM_IOREMAP)) {
2151             aligned_vwrite(buf, addr, n);
2152             copied++;
2153         }
2154         buf += n;
2155         addr += n;
2156         count -= n;
2157     }
2158 finished:
2159     spin_unlock(&vmap_area_lock);
2160     if (!copied)
2161         return 0;
2162     return buflen;
2163 }
2164 
2165 /**
2166  *  remap_vmalloc_range_partial  -  map vmalloc pages to userspace
2167  *  @vma:       vma to cover
2168  *  @uaddr:     target user address to start at
2169  *  @kaddr:     virtual address of vmalloc kernel memory
2170  *  @size:      size of map area
2171  *
2172  *  Returns:    0 for success, -Exxx on failure
2173  *
2174  *  This function checks that @kaddr is a valid vmalloc'ed area,
2175  *  and that it is big enough to cover the range starting at
2176  *  @uaddr in @vma. Will return failure if that criteria isn't
2177  *  met.
2178  *
2179  *  Similar to remap_pfn_range() (see mm/memory.c)
2180  */
2181 int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2182                 void *kaddr, unsigned long size)
2183 {
2184     struct vm_struct *area;
2185 
2186     size = PAGE_ALIGN(size);
2187 
2188     if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
2189         return -EINVAL;
2190 
2191     area = find_vm_area(kaddr);
2192     if (!area)
2193         return -EINVAL;
2194 
2195     if (!(area->flags & VM_USERMAP))
2196         return -EINVAL;
2197 
2198     if (kaddr + size > area->addr + area->size)
2199         return -EINVAL;
2200 
2201     do {
2202         struct page *page = vmalloc_to_page(kaddr);
2203         int ret;
2204 
2205         ret = vm_insert_page(vma, uaddr, page);
2206         if (ret)
2207             return ret;
2208 
2209         uaddr += PAGE_SIZE;
2210         kaddr += PAGE_SIZE;
2211         size -= PAGE_SIZE;
2212     } while (size > 0);
2213 
2214     vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
2215 
2216     return 0;
2217 }
2218 EXPORT_SYMBOL(remap_vmalloc_range_partial);
2219 
2220 /**
2221  *  remap_vmalloc_range  -  map vmalloc pages to userspace
2222  *  @vma:       vma to cover (map full range of vma)
2223  *  @addr:      vmalloc memory
2224  *  @pgoff:     number of pages into addr before first page to map
2225  *
2226  *  Returns:    0 for success, -Exxx on failure
2227  *
2228  *  This function checks that addr is a valid vmalloc'ed area, and
2229  *  that it is big enough to cover the vma. Will return failure if
2230  *  that criteria isn't met.
2231  *
2232  *  Similar to remap_pfn_range() (see mm/memory.c)
2233  */
2234 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2235                         unsigned long pgoff)
2236 {
2237     return remap_vmalloc_range_partial(vma, vma->vm_start,
2238                        addr + (pgoff << PAGE_SHIFT),
2239                        vma->vm_end - vma->vm_start);
2240 }
2241 EXPORT_SYMBOL(remap_vmalloc_range);
2242 
2243 /*
2244  * Implement a stub for vmalloc_sync_all() if the architecture chose not to
2245  * have one.
2246  */
2247 void __weak vmalloc_sync_all(void)
2248 {
2249 }
2250 
2251 
2252 static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2253 {
2254     pte_t ***p = data;
2255 
2256     if (p) {
2257         *(*p) = pte;
2258         (*p)++;
2259     }
2260     return 0;
2261 }
2262 
2263 /**
2264  *  alloc_vm_area - allocate a range of kernel address space
2265  *  @size:      size of the area
2266  *  @ptes:      returns the PTEs for the address space
2267  *
2268  *  Returns:    NULL on failure, vm_struct on success
2269  *
2270  *  This function reserves a range of kernel address space, and
2271  *  allocates pagetables to map that range.  No actual mappings
2272  *  are created.
2273  *
2274  *  If @ptes is non-NULL, pointers to the PTEs (in init_mm)
2275  *  allocated for the VM area are returned.
2276  */
2277 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2278 {
2279     struct vm_struct *area;
2280 
2281     area = get_vm_area_caller(size, VM_IOREMAP,
2282                 __builtin_return_address(0));
2283     if (area == NULL)
2284         return NULL;
2285 
2286     /*
2287      * This ensures that page tables are constructed for this region
2288      * of kernel virtual address space and mapped into init_mm.
2289      */
2290     if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2291                 size, f, ptes ? &ptes : NULL)) {
2292         free_vm_area(area);
2293         return NULL;
2294     }
2295 
2296     return area;
2297 }
2298 EXPORT_SYMBOL_GPL(alloc_vm_area);
2299 
2300 void free_vm_area(struct vm_struct *area)
2301 {
2302     struct vm_struct *ret;
2303     ret = remove_vm_area(area->addr);
2304     BUG_ON(ret != area);
2305     kfree(area);
2306 }
2307 EXPORT_SYMBOL_GPL(free_vm_area);
2308 
2309 #ifdef CONFIG_SMP
2310 static struct vmap_area *node_to_va(struct rb_node *n)
2311 {
2312     return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2313 }
2314 
2315 /**
2316  * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
2317  * @end: target address
2318  * @pnext: out arg for the next vmap_area
2319  * @pprev: out arg for the previous vmap_area
2320  *
2321  * Returns: %true if either or both of next and prev are found,
2322  *      %false if no vmap_area exists
2323  *
2324  * Find vmap_areas end addresses of which enclose @end.  ie. if not
2325  * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
2326  */
2327 static bool pvm_find_next_prev(unsigned long end,
2328                    struct vmap_area **pnext,
2329                    struct vmap_area **pprev)
2330 {
2331     struct rb_node *n = vmap_area_root.rb_node;
2332     struct vmap_area *va = NULL;
2333 
2334     while (n) {
2335         va = rb_entry(n, struct vmap_area, rb_node);
2336         if (end < va->va_end)
2337             n = n->rb_left;
2338         else if (end > va->va_end)
2339             n = n->rb_right;
2340         else
2341             break;
2342     }
2343 
2344     if (!va)
2345         return false;
2346 
2347     if (va->va_end > end) {
2348         *pnext = va;
2349         *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2350     } else {
2351         *pprev = va;
2352         *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2353     }
2354     return true;
2355 }
2356 
2357 /**
2358  * pvm_determine_end - find the highest aligned address between two vmap_areas
2359  * @pnext: in/out arg for the next vmap_area
2360  * @pprev: in/out arg for the previous vmap_area
2361  * @align: alignment
2362  *
2363  * Returns: determined end address
2364  *
2365  * Find the highest aligned address between *@pnext and *@pprev below
2366  * VMALLOC_END.  *@pnext and *@pprev are adjusted so that the aligned
2367  * down address is between the end addresses of the two vmap_areas.
2368  *
2369  * Please note that the address returned by this function may fall
2370  * inside *@pnext vmap_area.  The caller is responsible for checking
2371  * that.
2372  */
2373 static unsigned long pvm_determine_end(struct vmap_area **pnext,
2374                        struct vmap_area **pprev,
2375                        unsigned long align)
2376 {
2377     const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2378     unsigned long addr;
2379 
2380     if (*pnext)
2381         addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2382     else
2383         addr = vmalloc_end;
2384 
2385     while (*pprev && (*pprev)->va_end > addr) {
2386         *pnext = *pprev;
2387         *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2388     }
2389 
2390     return addr;
2391 }
2392 
2393 /**
2394  * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
2395  * @offsets: array containing offset of each area
2396  * @sizes: array containing size of each area
2397  * @nr_vms: the number of areas to allocate
2398  * @align: alignment, all entries in @offsets and @sizes must be aligned to this
2399  *
2400  * Returns: kmalloc'd vm_struct pointer array pointing to allocated
2401  *      vm_structs on success, %NULL on failure
2402  *
2403  * Percpu allocator wants to use congruent vm areas so that it can
2404  * maintain the offsets among percpu areas.  This function allocates
2405  * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
2406  * be scattered pretty far, distance between two areas easily going up
2407  * to gigabytes.  To avoid interacting with regular vmallocs, these
2408  * areas are allocated from top.
2409  *
2410  * Despite its complicated look, this allocator is rather simple.  It
2411  * does everything top-down and scans areas from the end looking for
2412  * matching slot.  While scanning, if any of the areas overlaps with
2413  * existing vmap_area, the base address is pulled down to fit the
2414  * area.  Scanning is repeated till all the areas fit and then all
2415  * necessary data structres are inserted and the result is returned.
2416  */
2417 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2418                      const size_t *sizes, int nr_vms,
2419                      size_t align)
2420 {
2421     const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2422     const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2423     struct vmap_area **vas, *prev, *next;
2424     struct vm_struct **vms;
2425     int area, area2, last_area, term_area;
2426     unsigned long base, start, end, last_end;
2427     bool purged = false;
2428 
2429     /* verify parameters and allocate data structures */
2430     BUG_ON(offset_in_page(align) || !is_power_of_2(align));
2431     for (last_area = 0, area = 0; area < nr_vms; area++) {
2432         start = offsets[area];
2433         end = start + sizes[area];
2434 
2435         /* is everything aligned properly? */
2436         BUG_ON(!IS_ALIGNED(offsets[area], align));
2437         BUG_ON(!IS_ALIGNED(sizes[area], align));
2438 
2439         /* detect the area with the highest address */
2440         if (start > offsets[last_area])
2441             last_area = area;
2442 
2443         for (area2 = 0; area2 < nr_vms; area2++) {
2444             unsigned long start2 = offsets[area2];
2445             unsigned long end2 = start2 + sizes[area2];
2446 
2447             if (area2 == area)
2448                 continue;
2449 
2450             BUG_ON(start2 >= start && start2 < end);
2451             BUG_ON(end2 <= end && end2 > start);
2452         }
2453     }
2454     last_end = offsets[last_area] + sizes[last_area];
2455 
2456     if (vmalloc_end - vmalloc_start < last_end) {
2457         WARN_ON(true);
2458         return NULL;
2459     }
2460 
2461     vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2462     vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2463     if (!vas || !vms)
2464         goto err_free2;
2465 
2466     for (area = 0; area < nr_vms; area++) {
2467         vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2468         vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2469         if (!vas[area] || !vms[area])
2470             goto err_free;
2471     }
2472 retry:
2473     spin_lock(&vmap_area_lock);
2474 
2475     /* start scanning - we scan from the top, begin with the last area */
2476     area = term_area = last_area;
2477     start = offsets[area];
2478     end = start + sizes[area];
2479 
2480     if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2481         base = vmalloc_end - last_end;
2482         goto found;
2483     }
2484     base = pvm_determine_end(&next, &prev, align) - end;
2485 
2486     while (true) {
2487         BUG_ON(next && next->va_end <= base + end);
2488         BUG_ON(prev && prev->va_end > base + end);
2489 
2490         /*
2491          * base might have underflowed, add last_end before
2492          * comparing.
2493          */
2494         if (base + last_end < vmalloc_start + last_end) {
2495             spin_unlock(&vmap_area_lock);
2496             if (!purged) {
2497                 purge_vmap_area_lazy();
2498                 purged = true;
2499                 goto retry;
2500             }
2501             goto err_free;
2502         }
2503 
2504         /*
2505          * If next overlaps, move base downwards so that it's
2506          * right below next and then recheck.
2507          */
2508         if (next && next->va_start < base + end) {
2509             base = pvm_determine_end(&next, &prev, align) - end;
2510             term_area = area;
2511             continue;
2512         }
2513 
2514         /*
2515          * If prev overlaps, shift down next and prev and move
2516          * base so that it's right below new next and then
2517          * recheck.
2518          */
2519         if (prev && prev->va_end > base + start)  {
2520             next = prev;
2521             prev = node_to_va(rb_prev(&next->rb_node));
2522             base = pvm_determine_end(&next, &prev, align) - end;
2523             term_area = area;
2524             continue;
2525         }
2526 
2527         /*
2528          * This area fits, move on to the previous one.  If
2529          * the previous one is the terminal one, we're done.
2530          */
2531         area = (area + nr_vms - 1) % nr_vms;
2532         if (area == term_area)
2533             break;
2534         start = offsets[area];
2535         end = start + sizes[area];
2536         pvm_find_next_prev(base + end, &next, &prev);
2537     }
2538 found:
2539     /* we've found a fitting base, insert all va's */
2540     for (area = 0; area < nr_vms; area++) {
2541         struct vmap_area *va = vas[area];
2542 
2543         va->va_start = base + offsets[area];
2544         va->va_end = va->va_start + sizes[area];
2545         __insert_vmap_area(va);
2546     }
2547 
2548     vmap_area_pcpu_hole = base + offsets[last_area];
2549 
2550     spin_unlock(&vmap_area_lock);
2551 
2552     /* insert all vm's */
2553     for (area = 0; area < nr_vms; area++)
2554         setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2555                  pcpu_get_vm_areas);
2556 
2557     kfree(vas);
2558     return vms;
2559 
2560 err_free:
2561     for (area = 0; area < nr_vms; area++) {
2562         kfree(vas[area]);
2563         kfree(vms[area]);
2564     }
2565 err_free2:
2566     kfree(vas);
2567     kfree(vms);
2568     return NULL;
2569 }
2570 
2571 /**
2572  * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
2573  * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
2574  * @nr_vms: the number of allocated areas
2575  *
2576  * Free vm_structs and the array allocated by pcpu_get_vm_areas().
2577  */
2578 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2579 {
2580     int i;
2581 
2582     for (i = 0; i < nr_vms; i++)
2583         free_vm_area(vms[i]);
2584     kfree(vms);
2585 }
2586 #endif  /* CONFIG_SMP */
2587 
2588 #ifdef CONFIG_PROC_FS
2589 static void *s_start(struct seq_file *m, loff_t *pos)
2590     __acquires(&vmap_area_lock)
2591 {
2592     spin_lock(&vmap_area_lock);
2593     return seq_list_start(&vmap_area_list, *pos);
2594 }
2595 
2596 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2597 {
2598     return seq_list_next(p, &vmap_area_list, pos);
2599 }
2600 
2601 static void s_stop(struct seq_file *m, void *p)
2602     __releases(&vmap_area_lock)
2603 {
2604     spin_unlock(&vmap_area_lock);
2605 }
2606 
2607 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2608 {
2609     if (IS_ENABLED(CONFIG_NUMA)) {
2610         unsigned int nr, *counters = m->private;
2611 
2612         if (!counters)
2613             return;
2614 
2615         if (v->flags & VM_UNINITIALIZED)
2616             return;
2617         /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
2618         smp_rmb();
2619 
2620         memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2621 
2622         for (nr = 0; nr < v->nr_pages; nr++)
2623             counters[page_to_nid(v->pages[nr])]++;
2624 
2625         for_each_node_state(nr, N_HIGH_MEMORY)
2626             if (counters[nr])
2627                 seq_printf(m, " N%u=%u", nr, counters[nr]);
2628     }
2629 }
2630 
2631 static int s_show(struct seq_file *m, void *p)
2632 {
2633     struct vmap_area *va;
2634     struct vm_struct *v;
2635 
2636     va = list_entry(p, struct vmap_area, list);
2637 
2638     /*
2639      * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
2640      * behalf of vmap area is being tear down or vm_map_ram allocation.
2641      */
2642     if (!(va->flags & VM_VM_AREA))
2643         return 0;
2644 
2645     v = va->vm;
2646 
2647     seq_printf(m, "0x%pK-0x%pK %7ld",
2648         v->addr, v->addr + v->size, v->size);
2649 
2650     if (v->caller)
2651         seq_printf(m, " %pS", v->caller);
2652 
2653     if (v->nr_pages)
2654         seq_printf(m, " pages=%d", v->nr_pages);
2655 
2656     if (v->phys_addr)
2657         seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2658 
2659     if (v->flags & VM_IOREMAP)
2660         seq_puts(m, " ioremap");
2661 
2662     if (v->flags & VM_ALLOC)
2663         seq_puts(m, " vmalloc");
2664 
2665     if (v->flags & VM_MAP)
2666         seq_puts(m, " vmap");
2667 
2668     if (v->flags & VM_USERMAP)
2669         seq_puts(m, " user");
2670 
2671     if (is_vmalloc_addr(v->pages))
2672         seq_puts(m, " vpages");
2673 
2674     show_numa_info(m, v);
2675     seq_putc(m, '\n');
2676     return 0;
2677 }
2678 
2679 static const struct seq_operations vmalloc_op = {
2680     .start = s_start,
2681     .next = s_next,
2682     .stop = s_stop,
2683     .show = s_show,
2684 };
2685 
2686 static int vmalloc_open(struct inode *inode, struct file *file)
2687 {
2688     if (IS_ENABLED(CONFIG_NUMA))
2689         return seq_open_private(file, &vmalloc_op,
2690                     nr_node_ids * sizeof(unsigned int));
2691     else
2692         return seq_open(file, &vmalloc_op);
2693 }
2694 
2695 static const struct file_operations proc_vmalloc_operations = {
2696     .open       = vmalloc_open,
2697     .read       = seq_read,
2698     .llseek     = seq_lseek,
2699     .release    = seq_release_private,
2700 };
2701 
2702 static int __init proc_vmalloc_init(void)
2703 {
2704     proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2705     return 0;
2706 }
2707 module_init(proc_vmalloc_init);
2708 
2709 #endif
2710