0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/vmalloc.h>
0012 #include <linux/mm.h>
0013 #include <linux/module.h>
0014 #include <linux/highmem.h>
0015 #include <linux/sched/signal.h>
0016 #include <linux/slab.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/interrupt.h>
0019 #include <linux/proc_fs.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/set_memory.h>
0022 #include <linux/debugobjects.h>
0023 #include <linux/kallsyms.h>
0024 #include <linux/list.h>
0025 #include <linux/notifier.h>
0026 #include <linux/rbtree.h>
0027 #include <linux/xarray.h>
0028 #include <linux/io.h>
0029 #include <linux/rcupdate.h>
0030 #include <linux/pfn.h>
0031 #include <linux/kmemleak.h>
0032 #include <linux/atomic.h>
0033 #include <linux/compiler.h>
0034 #include <linux/memcontrol.h>
0035 #include <linux/llist.h>
0036 #include <linux/bitops.h>
0037 #include <linux/rbtree_augmented.h>
0038 #include <linux/overflow.h>
0039 #include <linux/pgtable.h>
0040 #include <linux/uaccess.h>
0041 #include <linux/hugetlb.h>
0042 #include <linux/sched/mm.h>
0043 #include <asm/tlbflush.h>
0044 #include <asm/shmparam.h>
0045
0046 #include "internal.h"
0047 #include "pgalloc-track.h"
0048
0049 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
0050 static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
0051
0052 static int __init set_nohugeiomap(char *str)
0053 {
0054 ioremap_max_page_shift = PAGE_SHIFT;
0055 return 0;
0056 }
0057 early_param("nohugeiomap", set_nohugeiomap);
0058 #else
0059 static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
0060 #endif
0061
0062 #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
0063 static bool __ro_after_init vmap_allow_huge = true;
0064
0065 static int __init set_nohugevmalloc(char *str)
0066 {
0067 vmap_allow_huge = false;
0068 return 0;
0069 }
0070 early_param("nohugevmalloc", set_nohugevmalloc);
0071 #else
0072 static const bool vmap_allow_huge = false;
0073 #endif
0074
0075 bool is_vmalloc_addr(const void *x)
0076 {
0077 unsigned long addr = (unsigned long)kasan_reset_tag(x);
0078
0079 return addr >= VMALLOC_START && addr < VMALLOC_END;
0080 }
0081 EXPORT_SYMBOL(is_vmalloc_addr);
0082
0083 struct vfree_deferred {
0084 struct llist_head list;
0085 struct work_struct wq;
0086 };
0087 static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
0088
0089 static void __vunmap(const void *, int);
0090
0091 static void free_work(struct work_struct *w)
0092 {
0093 struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
0094 struct llist_node *t, *llnode;
0095
0096 llist_for_each_safe(llnode, t, llist_del_all(&p->list))
0097 __vunmap((void *)llnode, 1);
0098 }
0099
0100
0101 static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0102 phys_addr_t phys_addr, pgprot_t prot,
0103 unsigned int max_page_shift, pgtbl_mod_mask *mask)
0104 {
0105 pte_t *pte;
0106 u64 pfn;
0107 unsigned long size = PAGE_SIZE;
0108
0109 pfn = phys_addr >> PAGE_SHIFT;
0110 pte = pte_alloc_kernel_track(pmd, addr, mask);
0111 if (!pte)
0112 return -ENOMEM;
0113 do {
0114 BUG_ON(!pte_none(*pte));
0115
0116 #ifdef CONFIG_HUGETLB_PAGE
0117 size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
0118 if (size != PAGE_SIZE) {
0119 pte_t entry = pfn_pte(pfn, prot);
0120
0121 entry = arch_make_huge_pte(entry, ilog2(size), 0);
0122 set_huge_pte_at(&init_mm, addr, pte, entry);
0123 pfn += PFN_DOWN(size);
0124 continue;
0125 }
0126 #endif
0127 set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
0128 pfn++;
0129 } while (pte += PFN_DOWN(size), addr += size, addr != end);
0130 *mask |= PGTBL_PTE_MODIFIED;
0131 return 0;
0132 }
0133
0134 static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
0135 phys_addr_t phys_addr, pgprot_t prot,
0136 unsigned int max_page_shift)
0137 {
0138 if (max_page_shift < PMD_SHIFT)
0139 return 0;
0140
0141 if (!arch_vmap_pmd_supported(prot))
0142 return 0;
0143
0144 if ((end - addr) != PMD_SIZE)
0145 return 0;
0146
0147 if (!IS_ALIGNED(addr, PMD_SIZE))
0148 return 0;
0149
0150 if (!IS_ALIGNED(phys_addr, PMD_SIZE))
0151 return 0;
0152
0153 if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
0154 return 0;
0155
0156 return pmd_set_huge(pmd, phys_addr, prot);
0157 }
0158
0159 static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
0160 phys_addr_t phys_addr, pgprot_t prot,
0161 unsigned int max_page_shift, pgtbl_mod_mask *mask)
0162 {
0163 pmd_t *pmd;
0164 unsigned long next;
0165
0166 pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
0167 if (!pmd)
0168 return -ENOMEM;
0169 do {
0170 next = pmd_addr_end(addr, end);
0171
0172 if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
0173 max_page_shift)) {
0174 *mask |= PGTBL_PMD_MODIFIED;
0175 continue;
0176 }
0177
0178 if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
0179 return -ENOMEM;
0180 } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
0181 return 0;
0182 }
0183
0184 static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
0185 phys_addr_t phys_addr, pgprot_t prot,
0186 unsigned int max_page_shift)
0187 {
0188 if (max_page_shift < PUD_SHIFT)
0189 return 0;
0190
0191 if (!arch_vmap_pud_supported(prot))
0192 return 0;
0193
0194 if ((end - addr) != PUD_SIZE)
0195 return 0;
0196
0197 if (!IS_ALIGNED(addr, PUD_SIZE))
0198 return 0;
0199
0200 if (!IS_ALIGNED(phys_addr, PUD_SIZE))
0201 return 0;
0202
0203 if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
0204 return 0;
0205
0206 return pud_set_huge(pud, phys_addr, prot);
0207 }
0208
0209 static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
0210 phys_addr_t phys_addr, pgprot_t prot,
0211 unsigned int max_page_shift, pgtbl_mod_mask *mask)
0212 {
0213 pud_t *pud;
0214 unsigned long next;
0215
0216 pud = pud_alloc_track(&init_mm, p4d, addr, mask);
0217 if (!pud)
0218 return -ENOMEM;
0219 do {
0220 next = pud_addr_end(addr, end);
0221
0222 if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
0223 max_page_shift)) {
0224 *mask |= PGTBL_PUD_MODIFIED;
0225 continue;
0226 }
0227
0228 if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
0229 max_page_shift, mask))
0230 return -ENOMEM;
0231 } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
0232 return 0;
0233 }
0234
0235 static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
0236 phys_addr_t phys_addr, pgprot_t prot,
0237 unsigned int max_page_shift)
0238 {
0239 if (max_page_shift < P4D_SHIFT)
0240 return 0;
0241
0242 if (!arch_vmap_p4d_supported(prot))
0243 return 0;
0244
0245 if ((end - addr) != P4D_SIZE)
0246 return 0;
0247
0248 if (!IS_ALIGNED(addr, P4D_SIZE))
0249 return 0;
0250
0251 if (!IS_ALIGNED(phys_addr, P4D_SIZE))
0252 return 0;
0253
0254 if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
0255 return 0;
0256
0257 return p4d_set_huge(p4d, phys_addr, prot);
0258 }
0259
0260 static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
0261 phys_addr_t phys_addr, pgprot_t prot,
0262 unsigned int max_page_shift, pgtbl_mod_mask *mask)
0263 {
0264 p4d_t *p4d;
0265 unsigned long next;
0266
0267 p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
0268 if (!p4d)
0269 return -ENOMEM;
0270 do {
0271 next = p4d_addr_end(addr, end);
0272
0273 if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
0274 max_page_shift)) {
0275 *mask |= PGTBL_P4D_MODIFIED;
0276 continue;
0277 }
0278
0279 if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
0280 max_page_shift, mask))
0281 return -ENOMEM;
0282 } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
0283 return 0;
0284 }
0285
0286 static int vmap_range_noflush(unsigned long addr, unsigned long end,
0287 phys_addr_t phys_addr, pgprot_t prot,
0288 unsigned int max_page_shift)
0289 {
0290 pgd_t *pgd;
0291 unsigned long start;
0292 unsigned long next;
0293 int err;
0294 pgtbl_mod_mask mask = 0;
0295
0296 might_sleep();
0297 BUG_ON(addr >= end);
0298
0299 start = addr;
0300 pgd = pgd_offset_k(addr);
0301 do {
0302 next = pgd_addr_end(addr, end);
0303 err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
0304 max_page_shift, &mask);
0305 if (err)
0306 break;
0307 } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
0308
0309 if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
0310 arch_sync_kernel_mappings(start, end);
0311
0312 return err;
0313 }
0314
0315 int ioremap_page_range(unsigned long addr, unsigned long end,
0316 phys_addr_t phys_addr, pgprot_t prot)
0317 {
0318 int err;
0319
0320 err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
0321 ioremap_max_page_shift);
0322 flush_cache_vmap(addr, end);
0323 return err;
0324 }
0325
0326 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0327 pgtbl_mod_mask *mask)
0328 {
0329 pte_t *pte;
0330
0331 pte = pte_offset_kernel(pmd, addr);
0332 do {
0333 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
0334 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
0335 } while (pte++, addr += PAGE_SIZE, addr != end);
0336 *mask |= PGTBL_PTE_MODIFIED;
0337 }
0338
0339 static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
0340 pgtbl_mod_mask *mask)
0341 {
0342 pmd_t *pmd;
0343 unsigned long next;
0344 int cleared;
0345
0346 pmd = pmd_offset(pud, addr);
0347 do {
0348 next = pmd_addr_end(addr, end);
0349
0350 cleared = pmd_clear_huge(pmd);
0351 if (cleared || pmd_bad(*pmd))
0352 *mask |= PGTBL_PMD_MODIFIED;
0353
0354 if (cleared)
0355 continue;
0356 if (pmd_none_or_clear_bad(pmd))
0357 continue;
0358 vunmap_pte_range(pmd, addr, next, mask);
0359
0360 cond_resched();
0361 } while (pmd++, addr = next, addr != end);
0362 }
0363
0364 static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
0365 pgtbl_mod_mask *mask)
0366 {
0367 pud_t *pud;
0368 unsigned long next;
0369 int cleared;
0370
0371 pud = pud_offset(p4d, addr);
0372 do {
0373 next = pud_addr_end(addr, end);
0374
0375 cleared = pud_clear_huge(pud);
0376 if (cleared || pud_bad(*pud))
0377 *mask |= PGTBL_PUD_MODIFIED;
0378
0379 if (cleared)
0380 continue;
0381 if (pud_none_or_clear_bad(pud))
0382 continue;
0383 vunmap_pmd_range(pud, addr, next, mask);
0384 } while (pud++, addr = next, addr != end);
0385 }
0386
0387 static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
0388 pgtbl_mod_mask *mask)
0389 {
0390 p4d_t *p4d;
0391 unsigned long next;
0392
0393 p4d = p4d_offset(pgd, addr);
0394 do {
0395 next = p4d_addr_end(addr, end);
0396
0397 p4d_clear_huge(p4d);
0398 if (p4d_bad(*p4d))
0399 *mask |= PGTBL_P4D_MODIFIED;
0400
0401 if (p4d_none_or_clear_bad(p4d))
0402 continue;
0403 vunmap_pud_range(p4d, addr, next, mask);
0404 } while (p4d++, addr = next, addr != end);
0405 }
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419 void vunmap_range_noflush(unsigned long start, unsigned long end)
0420 {
0421 unsigned long next;
0422 pgd_t *pgd;
0423 unsigned long addr = start;
0424 pgtbl_mod_mask mask = 0;
0425
0426 BUG_ON(addr >= end);
0427 pgd = pgd_offset_k(addr);
0428 do {
0429 next = pgd_addr_end(addr, end);
0430 if (pgd_bad(*pgd))
0431 mask |= PGTBL_PGD_MODIFIED;
0432 if (pgd_none_or_clear_bad(pgd))
0433 continue;
0434 vunmap_p4d_range(pgd, addr, next, &mask);
0435 } while (pgd++, addr = next, addr != end);
0436
0437 if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
0438 arch_sync_kernel_mappings(start, end);
0439 }
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450 void vunmap_range(unsigned long addr, unsigned long end)
0451 {
0452 flush_cache_vunmap(addr, end);
0453 vunmap_range_noflush(addr, end);
0454 flush_tlb_kernel_range(addr, end);
0455 }
0456
0457 static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
0458 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
0459 pgtbl_mod_mask *mask)
0460 {
0461 pte_t *pte;
0462
0463
0464
0465
0466
0467
0468 pte = pte_alloc_kernel_track(pmd, addr, mask);
0469 if (!pte)
0470 return -ENOMEM;
0471 do {
0472 struct page *page = pages[*nr];
0473
0474 if (WARN_ON(!pte_none(*pte)))
0475 return -EBUSY;
0476 if (WARN_ON(!page))
0477 return -ENOMEM;
0478 if (WARN_ON(!pfn_valid(page_to_pfn(page))))
0479 return -EINVAL;
0480
0481 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
0482 (*nr)++;
0483 } while (pte++, addr += PAGE_SIZE, addr != end);
0484 *mask |= PGTBL_PTE_MODIFIED;
0485 return 0;
0486 }
0487
0488 static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
0489 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
0490 pgtbl_mod_mask *mask)
0491 {
0492 pmd_t *pmd;
0493 unsigned long next;
0494
0495 pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
0496 if (!pmd)
0497 return -ENOMEM;
0498 do {
0499 next = pmd_addr_end(addr, end);
0500 if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
0501 return -ENOMEM;
0502 } while (pmd++, addr = next, addr != end);
0503 return 0;
0504 }
0505
0506 static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
0507 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
0508 pgtbl_mod_mask *mask)
0509 {
0510 pud_t *pud;
0511 unsigned long next;
0512
0513 pud = pud_alloc_track(&init_mm, p4d, addr, mask);
0514 if (!pud)
0515 return -ENOMEM;
0516 do {
0517 next = pud_addr_end(addr, end);
0518 if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
0519 return -ENOMEM;
0520 } while (pud++, addr = next, addr != end);
0521 return 0;
0522 }
0523
0524 static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
0525 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
0526 pgtbl_mod_mask *mask)
0527 {
0528 p4d_t *p4d;
0529 unsigned long next;
0530
0531 p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
0532 if (!p4d)
0533 return -ENOMEM;
0534 do {
0535 next = p4d_addr_end(addr, end);
0536 if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
0537 return -ENOMEM;
0538 } while (p4d++, addr = next, addr != end);
0539 return 0;
0540 }
0541
0542 static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
0543 pgprot_t prot, struct page **pages)
0544 {
0545 unsigned long start = addr;
0546 pgd_t *pgd;
0547 unsigned long next;
0548 int err = 0;
0549 int nr = 0;
0550 pgtbl_mod_mask mask = 0;
0551
0552 BUG_ON(addr >= end);
0553 pgd = pgd_offset_k(addr);
0554 do {
0555 next = pgd_addr_end(addr, end);
0556 if (pgd_bad(*pgd))
0557 mask |= PGTBL_PGD_MODIFIED;
0558 err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
0559 if (err)
0560 return err;
0561 } while (pgd++, addr = next, addr != end);
0562
0563 if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
0564 arch_sync_kernel_mappings(start, end);
0565
0566 return 0;
0567 }
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578 int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
0579 pgprot_t prot, struct page **pages, unsigned int page_shift)
0580 {
0581 unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
0582
0583 WARN_ON(page_shift < PAGE_SHIFT);
0584
0585 if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
0586 page_shift == PAGE_SHIFT)
0587 return vmap_small_pages_range_noflush(addr, end, prot, pages);
0588
0589 for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
0590 int err;
0591
0592 err = vmap_range_noflush(addr, addr + (1UL << page_shift),
0593 __pa(page_address(pages[i])), prot,
0594 page_shift);
0595 if (err)
0596 return err;
0597
0598 addr += 1UL << page_shift;
0599 }
0600
0601 return 0;
0602 }
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616 static int vmap_pages_range(unsigned long addr, unsigned long end,
0617 pgprot_t prot, struct page **pages, unsigned int page_shift)
0618 {
0619 int err;
0620
0621 err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
0622 flush_cache_vmap(addr, end);
0623 return err;
0624 }
0625
0626 int is_vmalloc_or_module_addr(const void *x)
0627 {
0628
0629
0630
0631
0632
0633 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
0634 unsigned long addr = (unsigned long)kasan_reset_tag(x);
0635 if (addr >= MODULES_VADDR && addr < MODULES_END)
0636 return 1;
0637 #endif
0638 return is_vmalloc_addr(x);
0639 }
0640
0641
0642
0643
0644
0645
0646 struct page *vmalloc_to_page(const void *vmalloc_addr)
0647 {
0648 unsigned long addr = (unsigned long) vmalloc_addr;
0649 struct page *page = NULL;
0650 pgd_t *pgd = pgd_offset_k(addr);
0651 p4d_t *p4d;
0652 pud_t *pud;
0653 pmd_t *pmd;
0654 pte_t *ptep, pte;
0655
0656
0657
0658
0659
0660 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
0661
0662 if (pgd_none(*pgd))
0663 return NULL;
0664 if (WARN_ON_ONCE(pgd_leaf(*pgd)))
0665 return NULL;
0666 if (WARN_ON_ONCE(pgd_bad(*pgd)))
0667 return NULL;
0668
0669 p4d = p4d_offset(pgd, addr);
0670 if (p4d_none(*p4d))
0671 return NULL;
0672 if (p4d_leaf(*p4d))
0673 return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT);
0674 if (WARN_ON_ONCE(p4d_bad(*p4d)))
0675 return NULL;
0676
0677 pud = pud_offset(p4d, addr);
0678 if (pud_none(*pud))
0679 return NULL;
0680 if (pud_leaf(*pud))
0681 return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
0682 if (WARN_ON_ONCE(pud_bad(*pud)))
0683 return NULL;
0684
0685 pmd = pmd_offset(pud, addr);
0686 if (pmd_none(*pmd))
0687 return NULL;
0688 if (pmd_leaf(*pmd))
0689 return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
0690 if (WARN_ON_ONCE(pmd_bad(*pmd)))
0691 return NULL;
0692
0693 ptep = pte_offset_map(pmd, addr);
0694 pte = *ptep;
0695 if (pte_present(pte))
0696 page = pte_page(pte);
0697 pte_unmap(ptep);
0698
0699 return page;
0700 }
0701 EXPORT_SYMBOL(vmalloc_to_page);
0702
0703
0704
0705
0706 unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
0707 {
0708 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
0709 }
0710 EXPORT_SYMBOL(vmalloc_to_pfn);
0711
0712
0713
0714
0715 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
0716 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
0717
0718
0719 static DEFINE_SPINLOCK(vmap_area_lock);
0720 static DEFINE_SPINLOCK(free_vmap_area_lock);
0721
0722 LIST_HEAD(vmap_area_list);
0723 static struct rb_root vmap_area_root = RB_ROOT;
0724 static bool vmap_initialized __read_mostly;
0725
0726 static struct rb_root purge_vmap_area_root = RB_ROOT;
0727 static LIST_HEAD(purge_vmap_area_list);
0728 static DEFINE_SPINLOCK(purge_vmap_area_lock);
0729
0730
0731
0732
0733
0734
0735
0736 static struct kmem_cache *vmap_area_cachep;
0737
0738
0739
0740
0741
0742 static LIST_HEAD(free_vmap_area_list);
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754 static struct rb_root free_vmap_area_root = RB_ROOT;
0755
0756
0757
0758
0759
0760
0761 static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
0762
0763 static __always_inline unsigned long
0764 va_size(struct vmap_area *va)
0765 {
0766 return (va->va_end - va->va_start);
0767 }
0768
0769 static __always_inline unsigned long
0770 get_subtree_max_size(struct rb_node *node)
0771 {
0772 struct vmap_area *va;
0773
0774 va = rb_entry_safe(node, struct vmap_area, rb_node);
0775 return va ? va->subtree_max_size : 0;
0776 }
0777
0778 RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
0779 struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
0780
0781 static void purge_vmap_area_lazy(void);
0782 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
0783 static void drain_vmap_area_work(struct work_struct *work);
0784 static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
0785
0786 static atomic_long_t nr_vmalloc_pages;
0787
0788 unsigned long vmalloc_nr_pages(void)
0789 {
0790 return atomic_long_read(&nr_vmalloc_pages);
0791 }
0792
0793
0794 static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
0795 {
0796 struct vmap_area *va = NULL;
0797 struct rb_node *n = vmap_area_root.rb_node;
0798
0799 addr = (unsigned long)kasan_reset_tag((void *)addr);
0800
0801 while (n) {
0802 struct vmap_area *tmp;
0803
0804 tmp = rb_entry(n, struct vmap_area, rb_node);
0805 if (tmp->va_end > addr) {
0806 va = tmp;
0807 if (tmp->va_start <= addr)
0808 break;
0809
0810 n = n->rb_left;
0811 } else
0812 n = n->rb_right;
0813 }
0814
0815 return va;
0816 }
0817
0818 static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
0819 {
0820 struct rb_node *n = root->rb_node;
0821
0822 addr = (unsigned long)kasan_reset_tag((void *)addr);
0823
0824 while (n) {
0825 struct vmap_area *va;
0826
0827 va = rb_entry(n, struct vmap_area, rb_node);
0828 if (addr < va->va_start)
0829 n = n->rb_left;
0830 else if (addr >= va->va_end)
0831 n = n->rb_right;
0832 else
0833 return va;
0834 }
0835
0836 return NULL;
0837 }
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847 static __always_inline struct rb_node **
0848 find_va_links(struct vmap_area *va,
0849 struct rb_root *root, struct rb_node *from,
0850 struct rb_node **parent)
0851 {
0852 struct vmap_area *tmp_va;
0853 struct rb_node **link;
0854
0855 if (root) {
0856 link = &root->rb_node;
0857 if (unlikely(!*link)) {
0858 *parent = NULL;
0859 return link;
0860 }
0861 } else {
0862 link = &from;
0863 }
0864
0865
0866
0867
0868
0869
0870 do {
0871 tmp_va = rb_entry(*link, struct vmap_area, rb_node);
0872
0873
0874
0875
0876
0877
0878 if (va->va_end <= tmp_va->va_start)
0879 link = &(*link)->rb_left;
0880 else if (va->va_start >= tmp_va->va_end)
0881 link = &(*link)->rb_right;
0882 else {
0883 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
0884 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
0885
0886 return NULL;
0887 }
0888 } while (*link);
0889
0890 *parent = &tmp_va->rb_node;
0891 return link;
0892 }
0893
0894 static __always_inline struct list_head *
0895 get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
0896 {
0897 struct list_head *list;
0898
0899 if (unlikely(!parent))
0900
0901
0902
0903
0904
0905
0906 return NULL;
0907
0908 list = &rb_entry(parent, struct vmap_area, rb_node)->list;
0909 return (&parent->rb_right == link ? list->next : list);
0910 }
0911
0912 static __always_inline void
0913 __link_va(struct vmap_area *va, struct rb_root *root,
0914 struct rb_node *parent, struct rb_node **link,
0915 struct list_head *head, bool augment)
0916 {
0917
0918
0919
0920
0921 if (likely(parent)) {
0922 head = &rb_entry(parent, struct vmap_area, rb_node)->list;
0923 if (&parent->rb_right != link)
0924 head = head->prev;
0925 }
0926
0927
0928 rb_link_node(&va->rb_node, parent, link);
0929 if (augment) {
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941 rb_insert_augmented(&va->rb_node,
0942 root, &free_vmap_area_rb_augment_cb);
0943 va->subtree_max_size = 0;
0944 } else {
0945 rb_insert_color(&va->rb_node, root);
0946 }
0947
0948
0949 list_add(&va->list, head);
0950 }
0951
0952 static __always_inline void
0953 link_va(struct vmap_area *va, struct rb_root *root,
0954 struct rb_node *parent, struct rb_node **link,
0955 struct list_head *head)
0956 {
0957 __link_va(va, root, parent, link, head, false);
0958 }
0959
0960 static __always_inline void
0961 link_va_augment(struct vmap_area *va, struct rb_root *root,
0962 struct rb_node *parent, struct rb_node **link,
0963 struct list_head *head)
0964 {
0965 __link_va(va, root, parent, link, head, true);
0966 }
0967
0968 static __always_inline void
0969 __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
0970 {
0971 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
0972 return;
0973
0974 if (augment)
0975 rb_erase_augmented(&va->rb_node,
0976 root, &free_vmap_area_rb_augment_cb);
0977 else
0978 rb_erase(&va->rb_node, root);
0979
0980 list_del_init(&va->list);
0981 RB_CLEAR_NODE(&va->rb_node);
0982 }
0983
0984 static __always_inline void
0985 unlink_va(struct vmap_area *va, struct rb_root *root)
0986 {
0987 __unlink_va(va, root, false);
0988 }
0989
0990 static __always_inline void
0991 unlink_va_augment(struct vmap_area *va, struct rb_root *root)
0992 {
0993 __unlink_va(va, root, true);
0994 }
0995
0996 #if DEBUG_AUGMENT_PROPAGATE_CHECK
0997
0998
0999
1000 static __always_inline unsigned long
1001 compute_subtree_max_size(struct vmap_area *va)
1002 {
1003 return max3(va_size(va),
1004 get_subtree_max_size(va->rb_node.rb_left),
1005 get_subtree_max_size(va->rb_node.rb_right));
1006 }
1007
1008 static void
1009 augment_tree_propagate_check(void)
1010 {
1011 struct vmap_area *va;
1012 unsigned long computed_size;
1013
1014 list_for_each_entry(va, &free_vmap_area_list, list) {
1015 computed_size = compute_subtree_max_size(va);
1016 if (computed_size != va->subtree_max_size)
1017 pr_emerg("tree is corrupted: %lu, %lu\n",
1018 va_size(va), va->subtree_max_size);
1019 }
1020 }
1021 #endif
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050 static __always_inline void
1051 augment_tree_propagate_from(struct vmap_area *va)
1052 {
1053
1054
1055
1056
1057
1058 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
1059
1060 #if DEBUG_AUGMENT_PROPAGATE_CHECK
1061 augment_tree_propagate_check();
1062 #endif
1063 }
1064
1065 static void
1066 insert_vmap_area(struct vmap_area *va,
1067 struct rb_root *root, struct list_head *head)
1068 {
1069 struct rb_node **link;
1070 struct rb_node *parent;
1071
1072 link = find_va_links(va, root, NULL, &parent);
1073 if (link)
1074 link_va(va, root, parent, link, head);
1075 }
1076
1077 static void
1078 insert_vmap_area_augment(struct vmap_area *va,
1079 struct rb_node *from, struct rb_root *root,
1080 struct list_head *head)
1081 {
1082 struct rb_node **link;
1083 struct rb_node *parent;
1084
1085 if (from)
1086 link = find_va_links(va, NULL, from, &parent);
1087 else
1088 link = find_va_links(va, root, NULL, &parent);
1089
1090 if (link) {
1091 link_va_augment(va, root, parent, link, head);
1092 augment_tree_propagate_from(va);
1093 }
1094 }
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107 static __always_inline struct vmap_area *
1108 __merge_or_add_vmap_area(struct vmap_area *va,
1109 struct rb_root *root, struct list_head *head, bool augment)
1110 {
1111 struct vmap_area *sibling;
1112 struct list_head *next;
1113 struct rb_node **link;
1114 struct rb_node *parent;
1115 bool merged = false;
1116
1117
1118
1119
1120
1121 link = find_va_links(va, root, NULL, &parent);
1122 if (!link)
1123 return NULL;
1124
1125
1126
1127
1128 next = get_va_next_sibling(parent, link);
1129 if (unlikely(next == NULL))
1130 goto insert;
1131
1132
1133
1134
1135
1136
1137
1138
1139 if (next != head) {
1140 sibling = list_entry(next, struct vmap_area, list);
1141 if (sibling->va_start == va->va_end) {
1142 sibling->va_start = va->va_start;
1143
1144
1145 kmem_cache_free(vmap_area_cachep, va);
1146
1147
1148 va = sibling;
1149 merged = true;
1150 }
1151 }
1152
1153
1154
1155
1156
1157
1158
1159
1160 if (next->prev != head) {
1161 sibling = list_entry(next->prev, struct vmap_area, list);
1162 if (sibling->va_end == va->va_start) {
1163
1164
1165
1166
1167
1168
1169
1170 if (merged)
1171 __unlink_va(va, root, augment);
1172
1173 sibling->va_end = va->va_end;
1174
1175
1176 kmem_cache_free(vmap_area_cachep, va);
1177
1178
1179 va = sibling;
1180 merged = true;
1181 }
1182 }
1183
1184 insert:
1185 if (!merged)
1186 __link_va(va, root, parent, link, head, augment);
1187
1188 return va;
1189 }
1190
1191 static __always_inline struct vmap_area *
1192 merge_or_add_vmap_area(struct vmap_area *va,
1193 struct rb_root *root, struct list_head *head)
1194 {
1195 return __merge_or_add_vmap_area(va, root, head, false);
1196 }
1197
1198 static __always_inline struct vmap_area *
1199 merge_or_add_vmap_area_augment(struct vmap_area *va,
1200 struct rb_root *root, struct list_head *head)
1201 {
1202 va = __merge_or_add_vmap_area(va, root, head, true);
1203 if (va)
1204 augment_tree_propagate_from(va);
1205
1206 return va;
1207 }
1208
1209 static __always_inline bool
1210 is_within_this_va(struct vmap_area *va, unsigned long size,
1211 unsigned long align, unsigned long vstart)
1212 {
1213 unsigned long nva_start_addr;
1214
1215 if (va->va_start > vstart)
1216 nva_start_addr = ALIGN(va->va_start, align);
1217 else
1218 nva_start_addr = ALIGN(vstart, align);
1219
1220
1221 if (nva_start_addr + size < nva_start_addr ||
1222 nva_start_addr < vstart)
1223 return false;
1224
1225 return (nva_start_addr + size <= va->va_end);
1226 }
1227
1228
1229
1230
1231
1232
1233
1234
1235 static __always_inline struct vmap_area *
1236 find_vmap_lowest_match(struct rb_root *root, unsigned long size,
1237 unsigned long align, unsigned long vstart, bool adjust_search_size)
1238 {
1239 struct vmap_area *va;
1240 struct rb_node *node;
1241 unsigned long length;
1242
1243
1244 node = root->rb_node;
1245
1246
1247 length = adjust_search_size ? size + align - 1 : size;
1248
1249 while (node) {
1250 va = rb_entry(node, struct vmap_area, rb_node);
1251
1252 if (get_subtree_max_size(node->rb_left) >= length &&
1253 vstart < va->va_start) {
1254 node = node->rb_left;
1255 } else {
1256 if (is_within_this_va(va, size, align, vstart))
1257 return va;
1258
1259
1260
1261
1262
1263
1264 if (get_subtree_max_size(node->rb_right) >= length) {
1265 node = node->rb_right;
1266 continue;
1267 }
1268
1269
1270
1271
1272
1273
1274
1275 while ((node = rb_parent(node))) {
1276 va = rb_entry(node, struct vmap_area, rb_node);
1277 if (is_within_this_va(va, size, align, vstart))
1278 return va;
1279
1280 if (get_subtree_max_size(node->rb_right) >= length &&
1281 vstart <= va->va_start) {
1282
1283
1284
1285
1286
1287
1288 vstart = va->va_start + 1;
1289 node = node->rb_right;
1290 break;
1291 }
1292 }
1293 }
1294 }
1295
1296 return NULL;
1297 }
1298
1299 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
1300 #include <linux/random.h>
1301
1302 static struct vmap_area *
1303 find_vmap_lowest_linear_match(unsigned long size,
1304 unsigned long align, unsigned long vstart)
1305 {
1306 struct vmap_area *va;
1307
1308 list_for_each_entry(va, &free_vmap_area_list, list) {
1309 if (!is_within_this_va(va, size, align, vstart))
1310 continue;
1311
1312 return va;
1313 }
1314
1315 return NULL;
1316 }
1317
1318 static void
1319 find_vmap_lowest_match_check(unsigned long size, unsigned long align)
1320 {
1321 struct vmap_area *va_1, *va_2;
1322 unsigned long vstart;
1323 unsigned int rnd;
1324
1325 get_random_bytes(&rnd, sizeof(rnd));
1326 vstart = VMALLOC_START + rnd;
1327
1328 va_1 = find_vmap_lowest_match(size, align, vstart, false);
1329 va_2 = find_vmap_lowest_linear_match(size, align, vstart);
1330
1331 if (va_1 != va_2)
1332 pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
1333 va_1, va_2, vstart);
1334 }
1335 #endif
1336
1337 enum fit_type {
1338 NOTHING_FIT = 0,
1339 FL_FIT_TYPE = 1,
1340 LE_FIT_TYPE = 2,
1341 RE_FIT_TYPE = 3,
1342 NE_FIT_TYPE = 4
1343 };
1344
1345 static __always_inline enum fit_type
1346 classify_va_fit_type(struct vmap_area *va,
1347 unsigned long nva_start_addr, unsigned long size)
1348 {
1349 enum fit_type type;
1350
1351
1352 if (nva_start_addr < va->va_start ||
1353 nva_start_addr + size > va->va_end)
1354 return NOTHING_FIT;
1355
1356
1357 if (va->va_start == nva_start_addr) {
1358 if (va->va_end == nva_start_addr + size)
1359 type = FL_FIT_TYPE;
1360 else
1361 type = LE_FIT_TYPE;
1362 } else if (va->va_end == nva_start_addr + size) {
1363 type = RE_FIT_TYPE;
1364 } else {
1365 type = NE_FIT_TYPE;
1366 }
1367
1368 return type;
1369 }
1370
1371 static __always_inline int
1372 adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
1373 struct vmap_area *va, unsigned long nva_start_addr,
1374 unsigned long size)
1375 {
1376 struct vmap_area *lva = NULL;
1377 enum fit_type type = classify_va_fit_type(va, nva_start_addr, size);
1378
1379 if (type == FL_FIT_TYPE) {
1380
1381
1382
1383
1384
1385
1386
1387 unlink_va_augment(va, root);
1388 kmem_cache_free(vmap_area_cachep, va);
1389 } else if (type == LE_FIT_TYPE) {
1390
1391
1392
1393
1394
1395
1396
1397 va->va_start += size;
1398 } else if (type == RE_FIT_TYPE) {
1399
1400
1401
1402
1403
1404
1405
1406 va->va_end = nva_start_addr;
1407 } else if (type == NE_FIT_TYPE) {
1408
1409
1410
1411
1412
1413
1414
1415 lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
1416 if (unlikely(!lva)) {
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442 lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
1443 if (!lva)
1444 return -1;
1445 }
1446
1447
1448
1449
1450 lva->va_start = va->va_start;
1451 lva->va_end = nva_start_addr;
1452
1453
1454
1455
1456 va->va_start = nva_start_addr + size;
1457 } else {
1458 return -1;
1459 }
1460
1461 if (type != FL_FIT_TYPE) {
1462 augment_tree_propagate_from(va);
1463
1464 if (lva)
1465 insert_vmap_area_augment(lva, &va->rb_node, root, head);
1466 }
1467
1468 return 0;
1469 }
1470
1471
1472
1473
1474
1475 static __always_inline unsigned long
1476 __alloc_vmap_area(struct rb_root *root, struct list_head *head,
1477 unsigned long size, unsigned long align,
1478 unsigned long vstart, unsigned long vend)
1479 {
1480 bool adjust_search_size = true;
1481 unsigned long nva_start_addr;
1482 struct vmap_area *va;
1483 int ret;
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
1495 adjust_search_size = false;
1496
1497 va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
1498 if (unlikely(!va))
1499 return vend;
1500
1501 if (va->va_start > vstart)
1502 nva_start_addr = ALIGN(va->va_start, align);
1503 else
1504 nva_start_addr = ALIGN(vstart, align);
1505
1506
1507 if (nva_start_addr + size > vend)
1508 return vend;
1509
1510
1511 ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
1512 if (WARN_ON_ONCE(ret))
1513 return vend;
1514
1515 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
1516 find_vmap_lowest_match_check(size, align);
1517 #endif
1518
1519 return nva_start_addr;
1520 }
1521
1522
1523
1524
1525 static void free_vmap_area(struct vmap_area *va)
1526 {
1527
1528
1529
1530 spin_lock(&vmap_area_lock);
1531 unlink_va(va, &vmap_area_root);
1532 spin_unlock(&vmap_area_lock);
1533
1534
1535
1536
1537 spin_lock(&free_vmap_area_lock);
1538 merge_or_add_vmap_area_augment(va, &free_vmap_area_root, &free_vmap_area_list);
1539 spin_unlock(&free_vmap_area_lock);
1540 }
1541
1542 static inline void
1543 preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node)
1544 {
1545 struct vmap_area *va = NULL;
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556 if (!this_cpu_read(ne_fit_preload_node))
1557 va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
1558
1559 spin_lock(lock);
1560
1561 if (va && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, va))
1562 kmem_cache_free(vmap_area_cachep, va);
1563 }
1564
1565
1566
1567
1568
1569 static struct vmap_area *alloc_vmap_area(unsigned long size,
1570 unsigned long align,
1571 unsigned long vstart, unsigned long vend,
1572 int node, gfp_t gfp_mask)
1573 {
1574 struct vmap_area *va;
1575 unsigned long freed;
1576 unsigned long addr;
1577 int purged = 0;
1578 int ret;
1579
1580 BUG_ON(!size);
1581 BUG_ON(offset_in_page(size));
1582 BUG_ON(!is_power_of_2(align));
1583
1584 if (unlikely(!vmap_initialized))
1585 return ERR_PTR(-EBUSY);
1586
1587 might_sleep();
1588 gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
1589
1590 va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
1591 if (unlikely(!va))
1592 return ERR_PTR(-ENOMEM);
1593
1594
1595
1596
1597
1598 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
1599
1600 retry:
1601 preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
1602 addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
1603 size, align, vstart, vend);
1604 spin_unlock(&free_vmap_area_lock);
1605
1606
1607
1608
1609
1610 if (unlikely(addr == vend))
1611 goto overflow;
1612
1613 va->va_start = addr;
1614 va->va_end = addr + size;
1615 va->vm = NULL;
1616
1617 spin_lock(&vmap_area_lock);
1618 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
1619 spin_unlock(&vmap_area_lock);
1620
1621 BUG_ON(!IS_ALIGNED(va->va_start, align));
1622 BUG_ON(va->va_start < vstart);
1623 BUG_ON(va->va_end > vend);
1624
1625 ret = kasan_populate_vmalloc(addr, size);
1626 if (ret) {
1627 free_vmap_area(va);
1628 return ERR_PTR(ret);
1629 }
1630
1631 return va;
1632
1633 overflow:
1634 if (!purged) {
1635 purge_vmap_area_lazy();
1636 purged = 1;
1637 goto retry;
1638 }
1639
1640 freed = 0;
1641 blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
1642
1643 if (freed > 0) {
1644 purged = 0;
1645 goto retry;
1646 }
1647
1648 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
1649 pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
1650 size);
1651
1652 kmem_cache_free(vmap_area_cachep, va);
1653 return ERR_PTR(-EBUSY);
1654 }
1655
1656 int register_vmap_purge_notifier(struct notifier_block *nb)
1657 {
1658 return blocking_notifier_chain_register(&vmap_notify_list, nb);
1659 }
1660 EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
1661
1662 int unregister_vmap_purge_notifier(struct notifier_block *nb)
1663 {
1664 return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
1665 }
1666 EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684 static unsigned long lazy_max_pages(void)
1685 {
1686 unsigned int log;
1687
1688 log = fls(num_online_cpus());
1689
1690 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
1691 }
1692
1693 static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
1694
1695
1696
1697
1698
1699
1700 static DEFINE_MUTEX(vmap_purge_lock);
1701
1702
1703 static void purge_fragmented_blocks_allcpus(void);
1704
1705
1706
1707
1708 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
1709 {
1710 unsigned long resched_threshold;
1711 struct list_head local_purge_list;
1712 struct vmap_area *va, *n_va;
1713
1714 lockdep_assert_held(&vmap_purge_lock);
1715
1716 spin_lock(&purge_vmap_area_lock);
1717 purge_vmap_area_root = RB_ROOT;
1718 list_replace_init(&purge_vmap_area_list, &local_purge_list);
1719 spin_unlock(&purge_vmap_area_lock);
1720
1721 if (unlikely(list_empty(&local_purge_list)))
1722 return false;
1723
1724 start = min(start,
1725 list_first_entry(&local_purge_list,
1726 struct vmap_area, list)->va_start);
1727
1728 end = max(end,
1729 list_last_entry(&local_purge_list,
1730 struct vmap_area, list)->va_end);
1731
1732 flush_tlb_kernel_range(start, end);
1733 resched_threshold = lazy_max_pages() << 1;
1734
1735 spin_lock(&free_vmap_area_lock);
1736 list_for_each_entry_safe(va, n_va, &local_purge_list, list) {
1737 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
1738 unsigned long orig_start = va->va_start;
1739 unsigned long orig_end = va->va_end;
1740
1741
1742
1743
1744
1745
1746 va = merge_or_add_vmap_area_augment(va, &free_vmap_area_root,
1747 &free_vmap_area_list);
1748
1749 if (!va)
1750 continue;
1751
1752 if (is_vmalloc_or_module_addr((void *)orig_start))
1753 kasan_release_vmalloc(orig_start, orig_end,
1754 va->va_start, va->va_end);
1755
1756 atomic_long_sub(nr, &vmap_lazy_nr);
1757
1758 if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
1759 cond_resched_lock(&free_vmap_area_lock);
1760 }
1761 spin_unlock(&free_vmap_area_lock);
1762 return true;
1763 }
1764
1765
1766
1767
1768 static void purge_vmap_area_lazy(void)
1769 {
1770 mutex_lock(&vmap_purge_lock);
1771 purge_fragmented_blocks_allcpus();
1772 __purge_vmap_area_lazy(ULONG_MAX, 0);
1773 mutex_unlock(&vmap_purge_lock);
1774 }
1775
1776 static void drain_vmap_area_work(struct work_struct *work)
1777 {
1778 unsigned long nr_lazy;
1779
1780 do {
1781 mutex_lock(&vmap_purge_lock);
1782 __purge_vmap_area_lazy(ULONG_MAX, 0);
1783 mutex_unlock(&vmap_purge_lock);
1784
1785
1786 nr_lazy = atomic_long_read(&vmap_lazy_nr);
1787 } while (nr_lazy > lazy_max_pages());
1788 }
1789
1790
1791
1792
1793
1794
1795 static void free_vmap_area_noflush(struct vmap_area *va)
1796 {
1797 unsigned long nr_lazy;
1798
1799 spin_lock(&vmap_area_lock);
1800 unlink_va(va, &vmap_area_root);
1801 spin_unlock(&vmap_area_lock);
1802
1803 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
1804 PAGE_SHIFT, &vmap_lazy_nr);
1805
1806
1807
1808
1809 spin_lock(&purge_vmap_area_lock);
1810 merge_or_add_vmap_area(va,
1811 &purge_vmap_area_root, &purge_vmap_area_list);
1812 spin_unlock(&purge_vmap_area_lock);
1813
1814
1815 if (unlikely(nr_lazy > lazy_max_pages()))
1816 schedule_work(&drain_vmap_work);
1817 }
1818
1819
1820
1821
1822 static void free_unmap_vmap_area(struct vmap_area *va)
1823 {
1824 flush_cache_vunmap(va->va_start, va->va_end);
1825 vunmap_range_noflush(va->va_start, va->va_end);
1826 if (debug_pagealloc_enabled_static())
1827 flush_tlb_kernel_range(va->va_start, va->va_end);
1828
1829 free_vmap_area_noflush(va);
1830 }
1831
1832 struct vmap_area *find_vmap_area(unsigned long addr)
1833 {
1834 struct vmap_area *va;
1835
1836 spin_lock(&vmap_area_lock);
1837 va = __find_vmap_area(addr, &vmap_area_root);
1838 spin_unlock(&vmap_area_lock);
1839
1840 return va;
1841 }
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854 #if BITS_PER_LONG == 32
1855 #define VMALLOC_SPACE (128UL*1024*1024)
1856 #else
1857 #define VMALLOC_SPACE (128UL*1024*1024*1024)
1858 #endif
1859
1860 #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
1861 #define VMAP_MAX_ALLOC BITS_PER_LONG
1862 #define VMAP_BBMAP_BITS_MAX 1024
1863 #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
1864 #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
1865 #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
1866 #define VMAP_BBMAP_BITS \
1867 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
1868 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
1869 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
1870
1871 #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
1872
1873 struct vmap_block_queue {
1874 spinlock_t lock;
1875 struct list_head free;
1876 };
1877
1878 struct vmap_block {
1879 spinlock_t lock;
1880 struct vmap_area *va;
1881 unsigned long free, dirty;
1882 unsigned long dirty_min, dirty_max;
1883 struct list_head free_list;
1884 struct rcu_head rcu_head;
1885 struct list_head purge;
1886 };
1887
1888
1889 static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
1890
1891
1892
1893
1894
1895
1896 static DEFINE_XARRAY(vmap_blocks);
1897
1898
1899
1900
1901
1902
1903
1904
1905 static unsigned long addr_to_vb_idx(unsigned long addr)
1906 {
1907 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
1908 addr /= VMAP_BLOCK_SIZE;
1909 return addr;
1910 }
1911
1912 static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
1913 {
1914 unsigned long addr;
1915
1916 addr = va_start + (pages_off << PAGE_SHIFT);
1917 BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
1918 return (void *)addr;
1919 }
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929 static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
1930 {
1931 struct vmap_block_queue *vbq;
1932 struct vmap_block *vb;
1933 struct vmap_area *va;
1934 unsigned long vb_idx;
1935 int node, err;
1936 void *vaddr;
1937
1938 node = numa_node_id();
1939
1940 vb = kmalloc_node(sizeof(struct vmap_block),
1941 gfp_mask & GFP_RECLAIM_MASK, node);
1942 if (unlikely(!vb))
1943 return ERR_PTR(-ENOMEM);
1944
1945 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
1946 VMALLOC_START, VMALLOC_END,
1947 node, gfp_mask);
1948 if (IS_ERR(va)) {
1949 kfree(vb);
1950 return ERR_CAST(va);
1951 }
1952
1953 vaddr = vmap_block_vaddr(va->va_start, 0);
1954 spin_lock_init(&vb->lock);
1955 vb->va = va;
1956
1957 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
1958 vb->free = VMAP_BBMAP_BITS - (1UL << order);
1959 vb->dirty = 0;
1960 vb->dirty_min = VMAP_BBMAP_BITS;
1961 vb->dirty_max = 0;
1962 INIT_LIST_HEAD(&vb->free_list);
1963
1964 vb_idx = addr_to_vb_idx(va->va_start);
1965 err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
1966 if (err) {
1967 kfree(vb);
1968 free_vmap_area(va);
1969 return ERR_PTR(err);
1970 }
1971
1972 vbq = raw_cpu_ptr(&vmap_block_queue);
1973 spin_lock(&vbq->lock);
1974 list_add_tail_rcu(&vb->free_list, &vbq->free);
1975 spin_unlock(&vbq->lock);
1976
1977 return vaddr;
1978 }
1979
1980 static void free_vmap_block(struct vmap_block *vb)
1981 {
1982 struct vmap_block *tmp;
1983
1984 tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
1985 BUG_ON(tmp != vb);
1986
1987 free_vmap_area_noflush(vb->va);
1988 kfree_rcu(vb, rcu_head);
1989 }
1990
1991 static void purge_fragmented_blocks(int cpu)
1992 {
1993 LIST_HEAD(purge);
1994 struct vmap_block *vb;
1995 struct vmap_block *n_vb;
1996 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1997
1998 rcu_read_lock();
1999 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
2000
2001 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
2002 continue;
2003
2004 spin_lock(&vb->lock);
2005 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
2006 vb->free = 0;
2007 vb->dirty = VMAP_BBMAP_BITS;
2008 vb->dirty_min = 0;
2009 vb->dirty_max = VMAP_BBMAP_BITS;
2010 spin_lock(&vbq->lock);
2011 list_del_rcu(&vb->free_list);
2012 spin_unlock(&vbq->lock);
2013 spin_unlock(&vb->lock);
2014 list_add_tail(&vb->purge, &purge);
2015 } else
2016 spin_unlock(&vb->lock);
2017 }
2018 rcu_read_unlock();
2019
2020 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
2021 list_del(&vb->purge);
2022 free_vmap_block(vb);
2023 }
2024 }
2025
2026 static void purge_fragmented_blocks_allcpus(void)
2027 {
2028 int cpu;
2029
2030 for_each_possible_cpu(cpu)
2031 purge_fragmented_blocks(cpu);
2032 }
2033
2034 static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
2035 {
2036 struct vmap_block_queue *vbq;
2037 struct vmap_block *vb;
2038 void *vaddr = NULL;
2039 unsigned int order;
2040
2041 BUG_ON(offset_in_page(size));
2042 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
2043 if (WARN_ON(size == 0)) {
2044
2045
2046
2047
2048
2049 return NULL;
2050 }
2051 order = get_order(size);
2052
2053 rcu_read_lock();
2054 vbq = raw_cpu_ptr(&vmap_block_queue);
2055 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
2056 unsigned long pages_off;
2057
2058 spin_lock(&vb->lock);
2059 if (vb->free < (1UL << order)) {
2060 spin_unlock(&vb->lock);
2061 continue;
2062 }
2063
2064 pages_off = VMAP_BBMAP_BITS - vb->free;
2065 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
2066 vb->free -= 1UL << order;
2067 if (vb->free == 0) {
2068 spin_lock(&vbq->lock);
2069 list_del_rcu(&vb->free_list);
2070 spin_unlock(&vbq->lock);
2071 }
2072
2073 spin_unlock(&vb->lock);
2074 break;
2075 }
2076
2077 rcu_read_unlock();
2078
2079
2080 if (!vaddr)
2081 vaddr = new_vmap_block(order, gfp_mask);
2082
2083 return vaddr;
2084 }
2085
2086 static void vb_free(unsigned long addr, unsigned long size)
2087 {
2088 unsigned long offset;
2089 unsigned int order;
2090 struct vmap_block *vb;
2091
2092 BUG_ON(offset_in_page(size));
2093 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
2094
2095 flush_cache_vunmap(addr, addr + size);
2096
2097 order = get_order(size);
2098 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
2099 vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
2100
2101 vunmap_range_noflush(addr, addr + size);
2102
2103 if (debug_pagealloc_enabled_static())
2104 flush_tlb_kernel_range(addr, addr + size);
2105
2106 spin_lock(&vb->lock);
2107
2108
2109 vb->dirty_min = min(vb->dirty_min, offset);
2110 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
2111
2112 vb->dirty += 1UL << order;
2113 if (vb->dirty == VMAP_BBMAP_BITS) {
2114 BUG_ON(vb->free);
2115 spin_unlock(&vb->lock);
2116 free_vmap_block(vb);
2117 } else
2118 spin_unlock(&vb->lock);
2119 }
2120
2121 static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
2122 {
2123 int cpu;
2124
2125 if (unlikely(!vmap_initialized))
2126 return;
2127
2128 might_sleep();
2129
2130 for_each_possible_cpu(cpu) {
2131 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
2132 struct vmap_block *vb;
2133
2134 rcu_read_lock();
2135 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
2136 spin_lock(&vb->lock);
2137 if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) {
2138 unsigned long va_start = vb->va->va_start;
2139 unsigned long s, e;
2140
2141 s = va_start + (vb->dirty_min << PAGE_SHIFT);
2142 e = va_start + (vb->dirty_max << PAGE_SHIFT);
2143
2144 start = min(s, start);
2145 end = max(e, end);
2146
2147 flush = 1;
2148 }
2149 spin_unlock(&vb->lock);
2150 }
2151 rcu_read_unlock();
2152 }
2153
2154 mutex_lock(&vmap_purge_lock);
2155 purge_fragmented_blocks_allcpus();
2156 if (!__purge_vmap_area_lazy(start, end) && flush)
2157 flush_tlb_kernel_range(start, end);
2158 mutex_unlock(&vmap_purge_lock);
2159 }
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174 void vm_unmap_aliases(void)
2175 {
2176 unsigned long start = ULONG_MAX, end = 0;
2177 int flush = 0;
2178
2179 _vm_unmap_aliases(start, end, flush);
2180 }
2181 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
2182
2183
2184
2185
2186
2187
2188 void vm_unmap_ram(const void *mem, unsigned int count)
2189 {
2190 unsigned long size = (unsigned long)count << PAGE_SHIFT;
2191 unsigned long addr = (unsigned long)kasan_reset_tag(mem);
2192 struct vmap_area *va;
2193
2194 might_sleep();
2195 BUG_ON(!addr);
2196 BUG_ON(addr < VMALLOC_START);
2197 BUG_ON(addr > VMALLOC_END);
2198 BUG_ON(!PAGE_ALIGNED(addr));
2199
2200 kasan_poison_vmalloc(mem, size);
2201
2202 if (likely(count <= VMAP_MAX_ALLOC)) {
2203 debug_check_no_locks_freed(mem, size);
2204 vb_free(addr, size);
2205 return;
2206 }
2207
2208 va = find_vmap_area(addr);
2209 BUG_ON(!va);
2210 debug_check_no_locks_freed((void *)va->va_start,
2211 (va->va_end - va->va_start));
2212 free_unmap_vmap_area(va);
2213 }
2214 EXPORT_SYMBOL(vm_unmap_ram);
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230 void *vm_map_ram(struct page **pages, unsigned int count, int node)
2231 {
2232 unsigned long size = (unsigned long)count << PAGE_SHIFT;
2233 unsigned long addr;
2234 void *mem;
2235
2236 if (likely(count <= VMAP_MAX_ALLOC)) {
2237 mem = vb_alloc(size, GFP_KERNEL);
2238 if (IS_ERR(mem))
2239 return NULL;
2240 addr = (unsigned long)mem;
2241 } else {
2242 struct vmap_area *va;
2243 va = alloc_vmap_area(size, PAGE_SIZE,
2244 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
2245 if (IS_ERR(va))
2246 return NULL;
2247
2248 addr = va->va_start;
2249 mem = (void *)addr;
2250 }
2251
2252 if (vmap_pages_range(addr, addr + size, PAGE_KERNEL,
2253 pages, PAGE_SHIFT) < 0) {
2254 vm_unmap_ram(mem, count);
2255 return NULL;
2256 }
2257
2258
2259
2260
2261
2262
2263 mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL);
2264
2265 return mem;
2266 }
2267 EXPORT_SYMBOL(vm_map_ram);
2268
2269 static struct vm_struct *vmlist __initdata;
2270
2271 static inline unsigned int vm_area_page_order(struct vm_struct *vm)
2272 {
2273 #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
2274 return vm->page_order;
2275 #else
2276 return 0;
2277 #endif
2278 }
2279
2280 static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order)
2281 {
2282 #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
2283 vm->page_order = order;
2284 #else
2285 BUG_ON(order != 0);
2286 #endif
2287 }
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299 void __init vm_area_add_early(struct vm_struct *vm)
2300 {
2301 struct vm_struct *tmp, **p;
2302
2303 BUG_ON(vmap_initialized);
2304 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
2305 if (tmp->addr >= vm->addr) {
2306 BUG_ON(tmp->addr < vm->addr + vm->size);
2307 break;
2308 } else
2309 BUG_ON(tmp->addr + tmp->size > vm->addr);
2310 }
2311 vm->next = *p;
2312 *p = vm;
2313 }
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327 void __init vm_area_register_early(struct vm_struct *vm, size_t align)
2328 {
2329 unsigned long addr = ALIGN(VMALLOC_START, align);
2330 struct vm_struct *cur, **p;
2331
2332 BUG_ON(vmap_initialized);
2333
2334 for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) {
2335 if ((unsigned long)cur->addr - addr >= vm->size)
2336 break;
2337 addr = ALIGN((unsigned long)cur->addr + cur->size, align);
2338 }
2339
2340 BUG_ON(addr > VMALLOC_END - vm->size);
2341 vm->addr = (void *)addr;
2342 vm->next = *p;
2343 *p = vm;
2344 kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
2345 }
2346
2347 static void vmap_init_free_space(void)
2348 {
2349 unsigned long vmap_start = 1;
2350 const unsigned long vmap_end = ULONG_MAX;
2351 struct vmap_area *busy, *free;
2352
2353
2354
2355
2356
2357
2358
2359 list_for_each_entry(busy, &vmap_area_list, list) {
2360 if (busy->va_start - vmap_start > 0) {
2361 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
2362 if (!WARN_ON_ONCE(!free)) {
2363 free->va_start = vmap_start;
2364 free->va_end = busy->va_start;
2365
2366 insert_vmap_area_augment(free, NULL,
2367 &free_vmap_area_root,
2368 &free_vmap_area_list);
2369 }
2370 }
2371
2372 vmap_start = busy->va_end;
2373 }
2374
2375 if (vmap_end - vmap_start > 0) {
2376 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
2377 if (!WARN_ON_ONCE(!free)) {
2378 free->va_start = vmap_start;
2379 free->va_end = vmap_end;
2380
2381 insert_vmap_area_augment(free, NULL,
2382 &free_vmap_area_root,
2383 &free_vmap_area_list);
2384 }
2385 }
2386 }
2387
2388 void __init vmalloc_init(void)
2389 {
2390 struct vmap_area *va;
2391 struct vm_struct *tmp;
2392 int i;
2393
2394
2395
2396
2397 vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
2398
2399 for_each_possible_cpu(i) {
2400 struct vmap_block_queue *vbq;
2401 struct vfree_deferred *p;
2402
2403 vbq = &per_cpu(vmap_block_queue, i);
2404 spin_lock_init(&vbq->lock);
2405 INIT_LIST_HEAD(&vbq->free);
2406 p = &per_cpu(vfree_deferred, i);
2407 init_llist_head(&p->list);
2408 INIT_WORK(&p->wq, free_work);
2409 }
2410
2411
2412 for (tmp = vmlist; tmp; tmp = tmp->next) {
2413 va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
2414 if (WARN_ON_ONCE(!va))
2415 continue;
2416
2417 va->va_start = (unsigned long)tmp->addr;
2418 va->va_end = va->va_start + tmp->size;
2419 va->vm = tmp;
2420 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
2421 }
2422
2423
2424
2425
2426 vmap_init_free_space();
2427 vmap_initialized = true;
2428 }
2429
2430 static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
2431 struct vmap_area *va, unsigned long flags, const void *caller)
2432 {
2433 vm->flags = flags;
2434 vm->addr = (void *)va->va_start;
2435 vm->size = va->va_end - va->va_start;
2436 vm->caller = caller;
2437 va->vm = vm;
2438 }
2439
2440 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
2441 unsigned long flags, const void *caller)
2442 {
2443 spin_lock(&vmap_area_lock);
2444 setup_vmalloc_vm_locked(vm, va, flags, caller);
2445 spin_unlock(&vmap_area_lock);
2446 }
2447
2448 static void clear_vm_uninitialized_flag(struct vm_struct *vm)
2449 {
2450
2451
2452
2453
2454
2455 smp_wmb();
2456 vm->flags &= ~VM_UNINITIALIZED;
2457 }
2458
2459 static struct vm_struct *__get_vm_area_node(unsigned long size,
2460 unsigned long align, unsigned long shift, unsigned long flags,
2461 unsigned long start, unsigned long end, int node,
2462 gfp_t gfp_mask, const void *caller)
2463 {
2464 struct vmap_area *va;
2465 struct vm_struct *area;
2466 unsigned long requested_size = size;
2467
2468 BUG_ON(in_interrupt());
2469 size = ALIGN(size, 1ul << shift);
2470 if (unlikely(!size))
2471 return NULL;
2472
2473 if (flags & VM_IOREMAP)
2474 align = 1ul << clamp_t(int, get_count_order_long(size),
2475 PAGE_SHIFT, IOREMAP_MAX_ORDER);
2476
2477 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
2478 if (unlikely(!area))
2479 return NULL;
2480
2481 if (!(flags & VM_NO_GUARD))
2482 size += PAGE_SIZE;
2483
2484 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
2485 if (IS_ERR(va)) {
2486 kfree(area);
2487 return NULL;
2488 }
2489
2490 setup_vmalloc_vm(area, va, flags, caller);
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500 if (!(flags & VM_ALLOC))
2501 area->addr = kasan_unpoison_vmalloc(area->addr, requested_size,
2502 KASAN_VMALLOC_PROT_NORMAL);
2503
2504 return area;
2505 }
2506
2507 struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
2508 unsigned long start, unsigned long end,
2509 const void *caller)
2510 {
2511 return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
2512 NUMA_NO_NODE, GFP_KERNEL, caller);
2513 }
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526 struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
2527 {
2528 return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
2529 VMALLOC_START, VMALLOC_END,
2530 NUMA_NO_NODE, GFP_KERNEL,
2531 __builtin_return_address(0));
2532 }
2533
2534 struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
2535 const void *caller)
2536 {
2537 return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
2538 VMALLOC_START, VMALLOC_END,
2539 NUMA_NO_NODE, GFP_KERNEL, caller);
2540 }
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552 struct vm_struct *find_vm_area(const void *addr)
2553 {
2554 struct vmap_area *va;
2555
2556 va = find_vmap_area((unsigned long)addr);
2557 if (!va)
2558 return NULL;
2559
2560 return va->vm;
2561 }
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573 struct vm_struct *remove_vm_area(const void *addr)
2574 {
2575 struct vmap_area *va;
2576
2577 might_sleep();
2578
2579 spin_lock(&vmap_area_lock);
2580 va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
2581 if (va && va->vm) {
2582 struct vm_struct *vm = va->vm;
2583
2584 va->vm = NULL;
2585 spin_unlock(&vmap_area_lock);
2586
2587 kasan_free_module_shadow(vm);
2588 free_unmap_vmap_area(va);
2589
2590 return vm;
2591 }
2592
2593 spin_unlock(&vmap_area_lock);
2594 return NULL;
2595 }
2596
2597 static inline void set_area_direct_map(const struct vm_struct *area,
2598 int (*set_direct_map)(struct page *page))
2599 {
2600 int i;
2601
2602
2603 for (i = 0; i < area->nr_pages; i++)
2604 if (page_address(area->pages[i]))
2605 set_direct_map(area->pages[i]);
2606 }
2607
2608
2609 static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
2610 {
2611 unsigned long start = ULONG_MAX, end = 0;
2612 unsigned int page_order = vm_area_page_order(area);
2613 int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
2614 int flush_dmap = 0;
2615 int i;
2616
2617 remove_vm_area(area->addr);
2618
2619
2620 if (!flush_reset)
2621 return;
2622
2623
2624
2625
2626
2627 if (!deallocate_pages) {
2628 vm_unmap_aliases();
2629 return;
2630 }
2631
2632
2633
2634
2635
2636
2637 for (i = 0; i < area->nr_pages; i += 1U << page_order) {
2638 unsigned long addr = (unsigned long)page_address(area->pages[i]);
2639 if (addr) {
2640 unsigned long page_size;
2641
2642 page_size = PAGE_SIZE << page_order;
2643 start = min(addr, start);
2644 end = max(addr + page_size, end);
2645 flush_dmap = 1;
2646 }
2647 }
2648
2649
2650
2651
2652
2653
2654 set_area_direct_map(area, set_direct_map_invalid_noflush);
2655 _vm_unmap_aliases(start, end, flush_dmap);
2656 set_area_direct_map(area, set_direct_map_default_noflush);
2657 }
2658
2659 static void __vunmap(const void *addr, int deallocate_pages)
2660 {
2661 struct vm_struct *area;
2662
2663 if (!addr)
2664 return;
2665
2666 if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
2667 addr))
2668 return;
2669
2670 area = find_vm_area(addr);
2671 if (unlikely(!area)) {
2672 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
2673 addr);
2674 return;
2675 }
2676
2677 debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
2678 debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
2679
2680 kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
2681
2682 vm_remove_mappings(area, deallocate_pages);
2683
2684 if (deallocate_pages) {
2685 int i;
2686
2687 for (i = 0; i < area->nr_pages; i++) {
2688 struct page *page = area->pages[i];
2689
2690 BUG_ON(!page);
2691 mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
2692
2693
2694
2695
2696 __free_pages(page, 0);
2697 cond_resched();
2698 }
2699 atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
2700
2701 kvfree(area->pages);
2702 }
2703
2704 kfree(area);
2705 }
2706
2707 static inline void __vfree_deferred(const void *addr)
2708 {
2709
2710
2711
2712
2713
2714
2715 struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
2716
2717 if (llist_add((struct llist_node *)addr, &p->list))
2718 schedule_work(&p->wq);
2719 }
2720
2721
2722
2723
2724
2725
2726
2727
2728 void vfree_atomic(const void *addr)
2729 {
2730 BUG_ON(in_nmi());
2731
2732 kmemleak_free(addr);
2733
2734 if (!addr)
2735 return;
2736 __vfree_deferred(addr);
2737 }
2738
2739 static void __vfree(const void *addr)
2740 {
2741 if (unlikely(in_interrupt()))
2742 __vfree_deferred(addr);
2743 else
2744 __vunmap(addr, 1);
2745 }
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764 void vfree(const void *addr)
2765 {
2766 BUG_ON(in_nmi());
2767
2768 kmemleak_free(addr);
2769
2770 might_sleep_if(!in_interrupt());
2771
2772 if (!addr)
2773 return;
2774
2775 __vfree(addr);
2776 }
2777 EXPORT_SYMBOL(vfree);
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788 void vunmap(const void *addr)
2789 {
2790 BUG_ON(in_interrupt());
2791 might_sleep();
2792 if (addr)
2793 __vunmap(addr, 0);
2794 }
2795 EXPORT_SYMBOL(vunmap);
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812 void *vmap(struct page **pages, unsigned int count,
2813 unsigned long flags, pgprot_t prot)
2814 {
2815 struct vm_struct *area;
2816 unsigned long addr;
2817 unsigned long size;
2818
2819 might_sleep();
2820
2821
2822
2823
2824
2825 if (WARN_ON_ONCE(flags & VM_NO_GUARD))
2826 flags &= ~VM_NO_GUARD;
2827
2828 if (count > totalram_pages())
2829 return NULL;
2830
2831 size = (unsigned long)count << PAGE_SHIFT;
2832 area = get_vm_area_caller(size, flags, __builtin_return_address(0));
2833 if (!area)
2834 return NULL;
2835
2836 addr = (unsigned long)area->addr;
2837 if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
2838 pages, PAGE_SHIFT) < 0) {
2839 vunmap(area->addr);
2840 return NULL;
2841 }
2842
2843 if (flags & VM_MAP_PUT_PAGES) {
2844 area->pages = pages;
2845 area->nr_pages = count;
2846 }
2847 return area->addr;
2848 }
2849 EXPORT_SYMBOL(vmap);
2850
2851 #ifdef CONFIG_VMAP_PFN
2852 struct vmap_pfn_data {
2853 unsigned long *pfns;
2854 pgprot_t prot;
2855 unsigned int idx;
2856 };
2857
2858 static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
2859 {
2860 struct vmap_pfn_data *data = private;
2861
2862 if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
2863 return -EINVAL;
2864 *pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
2865 return 0;
2866 }
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877 void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
2878 {
2879 struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
2880 struct vm_struct *area;
2881
2882 area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
2883 __builtin_return_address(0));
2884 if (!area)
2885 return NULL;
2886 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2887 count * PAGE_SIZE, vmap_pfn_apply, &data)) {
2888 free_vm_area(area);
2889 return NULL;
2890 }
2891 return area->addr;
2892 }
2893 EXPORT_SYMBOL_GPL(vmap_pfn);
2894 #endif
2895
2896 static inline unsigned int
2897 vm_area_alloc_pages(gfp_t gfp, int nid,
2898 unsigned int order, unsigned int nr_pages, struct page **pages)
2899 {
2900 unsigned int nr_allocated = 0;
2901 struct page *page;
2902 int i;
2903
2904
2905
2906
2907
2908
2909
2910 if (!order) {
2911 gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL;
2912
2913 while (nr_allocated < nr_pages) {
2914 unsigned int nr, nr_pages_request;
2915
2916
2917
2918
2919
2920
2921
2922 nr_pages_request = min(100U, nr_pages - nr_allocated);
2923
2924
2925
2926
2927
2928
2929 if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
2930 nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
2931 nr_pages_request,
2932 pages + nr_allocated);
2933
2934 else
2935 nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
2936 nr_pages_request,
2937 pages + nr_allocated);
2938
2939 nr_allocated += nr;
2940 cond_resched();
2941
2942
2943
2944
2945
2946 if (nr != nr_pages_request)
2947 break;
2948 }
2949 }
2950
2951
2952
2953 while (nr_allocated < nr_pages) {
2954 if (fatal_signal_pending(current))
2955 break;
2956
2957 if (nid == NUMA_NO_NODE)
2958 page = alloc_pages(gfp, order);
2959 else
2960 page = alloc_pages_node(nid, gfp, order);
2961 if (unlikely(!page))
2962 break;
2963
2964
2965
2966
2967
2968
2969
2970 if (order)
2971 split_page(page, order);
2972
2973
2974
2975
2976
2977
2978 for (i = 0; i < (1U << order); i++)
2979 pages[nr_allocated + i] = page + i;
2980
2981 cond_resched();
2982 nr_allocated += 1U << order;
2983 }
2984
2985 return nr_allocated;
2986 }
2987
2988 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
2989 pgprot_t prot, unsigned int page_shift,
2990 int node)
2991 {
2992 const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
2993 bool nofail = gfp_mask & __GFP_NOFAIL;
2994 unsigned long addr = (unsigned long)area->addr;
2995 unsigned long size = get_vm_area_size(area);
2996 unsigned long array_size;
2997 unsigned int nr_small_pages = size >> PAGE_SHIFT;
2998 unsigned int page_order;
2999 unsigned int flags;
3000 int ret;
3001
3002 array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
3003 gfp_mask |= __GFP_NOWARN;
3004 if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
3005 gfp_mask |= __GFP_HIGHMEM;
3006
3007
3008 if (array_size > PAGE_SIZE) {
3009 area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
3010 area->caller);
3011 } else {
3012 area->pages = kmalloc_node(array_size, nested_gfp, node);
3013 }
3014
3015 if (!area->pages) {
3016 warn_alloc(gfp_mask, NULL,
3017 "vmalloc error: size %lu, failed to allocated page array size %lu",
3018 nr_small_pages * PAGE_SIZE, array_size);
3019 free_vm_area(area);
3020 return NULL;
3021 }
3022
3023 set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
3024 page_order = vm_area_page_order(area);
3025
3026 area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN,
3027 node, page_order, nr_small_pages, area->pages);
3028
3029 atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
3030 if (gfp_mask & __GFP_ACCOUNT) {
3031 int i;
3032
3033 for (i = 0; i < area->nr_pages; i++)
3034 mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
3035 }
3036
3037
3038
3039
3040
3041 if (area->nr_pages != nr_small_pages) {
3042 warn_alloc(gfp_mask, NULL,
3043 "vmalloc error: size %lu, page order %u, failed to allocate pages",
3044 area->nr_pages * PAGE_SIZE, page_order);
3045 goto fail;
3046 }
3047
3048
3049
3050
3051
3052 if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
3053 flags = memalloc_nofs_save();
3054 else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
3055 flags = memalloc_noio_save();
3056
3057 do {
3058 ret = vmap_pages_range(addr, addr + size, prot, area->pages,
3059 page_shift);
3060 if (nofail && (ret < 0))
3061 schedule_timeout_uninterruptible(1);
3062 } while (nofail && (ret < 0));
3063
3064 if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
3065 memalloc_nofs_restore(flags);
3066 else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
3067 memalloc_noio_restore(flags);
3068
3069 if (ret < 0) {
3070 warn_alloc(gfp_mask, NULL,
3071 "vmalloc error: size %lu, failed to map pages",
3072 area->nr_pages * PAGE_SIZE);
3073 goto fail;
3074 }
3075
3076 return area->addr;
3077
3078 fail:
3079 __vfree(area->addr);
3080 return NULL;
3081 }
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111 void *__vmalloc_node_range(unsigned long size, unsigned long align,
3112 unsigned long start, unsigned long end, gfp_t gfp_mask,
3113 pgprot_t prot, unsigned long vm_flags, int node,
3114 const void *caller)
3115 {
3116 struct vm_struct *area;
3117 void *ret;
3118 kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE;
3119 unsigned long real_size = size;
3120 unsigned long real_align = align;
3121 unsigned int shift = PAGE_SHIFT;
3122
3123 if (WARN_ON_ONCE(!size))
3124 return NULL;
3125
3126 if ((size >> PAGE_SHIFT) > totalram_pages()) {
3127 warn_alloc(gfp_mask, NULL,
3128 "vmalloc error: size %lu, exceeds total pages",
3129 real_size);
3130 return NULL;
3131 }
3132
3133 if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) {
3134 unsigned long size_per_node;
3135
3136
3137
3138
3139
3140
3141
3142
3143 size_per_node = size;
3144 if (node == NUMA_NO_NODE)
3145 size_per_node /= num_online_nodes();
3146 if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
3147 shift = PMD_SHIFT;
3148 else
3149 shift = arch_vmap_pte_supported_shift(size_per_node);
3150
3151 align = max(real_align, 1UL << shift);
3152 size = ALIGN(real_size, 1UL << shift);
3153 }
3154
3155 again:
3156 area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
3157 VM_UNINITIALIZED | vm_flags, start, end, node,
3158 gfp_mask, caller);
3159 if (!area) {
3160 bool nofail = gfp_mask & __GFP_NOFAIL;
3161 warn_alloc(gfp_mask, NULL,
3162 "vmalloc error: size %lu, vm_struct allocation failed%s",
3163 real_size, (nofail) ? ". Retrying." : "");
3164 if (nofail) {
3165 schedule_timeout_uninterruptible(1);
3166 goto again;
3167 }
3168 goto fail;
3169 }
3170
3171
3172
3173
3174
3175 if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
3176 if (kasan_hw_tags_enabled()) {
3177
3178
3179
3180
3181 prot = arch_vmap_pgprot_tagged(prot);
3182
3183
3184
3185
3186
3187
3188 gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO;
3189 }
3190
3191
3192 kasan_flags |= KASAN_VMALLOC_PROT_NORMAL;
3193 }
3194
3195
3196 ret = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
3197 if (!ret)
3198 goto fail;
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208 kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
3209 if (!want_init_on_free() && want_init_on_alloc(gfp_mask) &&
3210 (gfp_mask & __GFP_SKIP_ZERO))
3211 kasan_flags |= KASAN_VMALLOC_INIT;
3212
3213 area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);
3214
3215
3216
3217
3218
3219
3220 clear_vm_uninitialized_flag(area);
3221
3222 size = PAGE_ALIGN(size);
3223 if (!(vm_flags & VM_DEFER_KMEMLEAK))
3224 kmemleak_vmalloc(area, size, gfp_mask);
3225
3226 return area->addr;
3227
3228 fail:
3229 if (shift > PAGE_SHIFT) {
3230 shift = PAGE_SHIFT;
3231 align = real_align;
3232 size = real_size;
3233 goto again;
3234 }
3235
3236 return NULL;
3237 }
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258 void *__vmalloc_node(unsigned long size, unsigned long align,
3259 gfp_t gfp_mask, int node, const void *caller)
3260 {
3261 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
3262 gfp_mask, PAGE_KERNEL, 0, node, caller);
3263 }
3264
3265
3266
3267
3268
3269 #ifdef CONFIG_TEST_VMALLOC_MODULE
3270 EXPORT_SYMBOL_GPL(__vmalloc_node);
3271 #endif
3272
3273 void *__vmalloc(unsigned long size, gfp_t gfp_mask)
3274 {
3275 return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
3276 __builtin_return_address(0));
3277 }
3278 EXPORT_SYMBOL(__vmalloc);
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292 void *vmalloc(unsigned long size)
3293 {
3294 return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
3295 __builtin_return_address(0));
3296 }
3297 EXPORT_SYMBOL(vmalloc);
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311 void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
3312 {
3313 return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
3314 gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
3315 NUMA_NO_NODE, __builtin_return_address(0));
3316 }
3317 EXPORT_SYMBOL_GPL(vmalloc_huge);
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 void *vzalloc(unsigned long size)
3333 {
3334 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
3335 __builtin_return_address(0));
3336 }
3337 EXPORT_SYMBOL(vzalloc);
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348 void *vmalloc_user(unsigned long size)
3349 {
3350 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
3351 GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
3352 VM_USERMAP, NUMA_NO_NODE,
3353 __builtin_return_address(0));
3354 }
3355 EXPORT_SYMBOL(vmalloc_user);
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370 void *vmalloc_node(unsigned long size, int node)
3371 {
3372 return __vmalloc_node(size, 1, GFP_KERNEL, node,
3373 __builtin_return_address(0));
3374 }
3375 EXPORT_SYMBOL(vmalloc_node);
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388 void *vzalloc_node(unsigned long size, int node)
3389 {
3390 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
3391 __builtin_return_address(0));
3392 }
3393 EXPORT_SYMBOL(vzalloc_node);
3394
3395 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
3396 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
3397 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
3398 #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
3399 #else
3400
3401
3402
3403
3404 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
3405 #endif
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416 void *vmalloc_32(unsigned long size)
3417 {
3418 return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
3419 __builtin_return_address(0));
3420 }
3421 EXPORT_SYMBOL(vmalloc_32);
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432 void *vmalloc_32_user(unsigned long size)
3433 {
3434 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
3435 GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
3436 VM_USERMAP, NUMA_NO_NODE,
3437 __builtin_return_address(0));
3438 }
3439 EXPORT_SYMBOL(vmalloc_32_user);
3440
3441
3442
3443
3444
3445
3446 static int aligned_vread(char *buf, char *addr, unsigned long count)
3447 {
3448 struct page *p;
3449 int copied = 0;
3450
3451 while (count) {
3452 unsigned long offset, length;
3453
3454 offset = offset_in_page(addr);
3455 length = PAGE_SIZE - offset;
3456 if (length > count)
3457 length = count;
3458 p = vmalloc_to_page(addr);
3459
3460
3461
3462
3463
3464
3465
3466 if (p) {
3467
3468 void *map = kmap_atomic(p);
3469 memcpy(buf, map + offset, length);
3470 kunmap_atomic(map);
3471 } else
3472 memset(buf, 0, length);
3473
3474 addr += length;
3475 buf += length;
3476 copied += length;
3477 count -= length;
3478 }
3479 return copied;
3480 }
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506 long vread(char *buf, char *addr, unsigned long count)
3507 {
3508 struct vmap_area *va;
3509 struct vm_struct *vm;
3510 char *vaddr, *buf_start = buf;
3511 unsigned long buflen = count;
3512 unsigned long n;
3513
3514 addr = kasan_reset_tag(addr);
3515
3516
3517 if ((unsigned long) addr + count < count)
3518 count = -(unsigned long) addr;
3519
3520 spin_lock(&vmap_area_lock);
3521 va = find_vmap_area_exceed_addr((unsigned long)addr);
3522 if (!va)
3523 goto finished;
3524
3525
3526 if ((unsigned long)addr + count <= va->va_start)
3527 goto finished;
3528
3529 list_for_each_entry_from(va, &vmap_area_list, list) {
3530 if (!count)
3531 break;
3532
3533 if (!va->vm)
3534 continue;
3535
3536 vm = va->vm;
3537 vaddr = (char *) vm->addr;
3538 if (addr >= vaddr + get_vm_area_size(vm))
3539 continue;
3540 while (addr < vaddr) {
3541 if (count == 0)
3542 goto finished;
3543 *buf = '\0';
3544 buf++;
3545 addr++;
3546 count--;
3547 }
3548 n = vaddr + get_vm_area_size(vm) - addr;
3549 if (n > count)
3550 n = count;
3551 if (!(vm->flags & VM_IOREMAP))
3552 aligned_vread(buf, addr, n);
3553 else
3554 memset(buf, 0, n);
3555 buf += n;
3556 addr += n;
3557 count -= n;
3558 }
3559 finished:
3560 spin_unlock(&vmap_area_lock);
3561
3562 if (buf == buf_start)
3563 return 0;
3564
3565 if (buf != buf_start + buflen)
3566 memset(buf, 0, buflen - (buf - buf_start));
3567
3568 return buflen;
3569 }
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588 int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
3589 void *kaddr, unsigned long pgoff,
3590 unsigned long size)
3591 {
3592 struct vm_struct *area;
3593 unsigned long off;
3594 unsigned long end_index;
3595
3596 if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
3597 return -EINVAL;
3598
3599 size = PAGE_ALIGN(size);
3600
3601 if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
3602 return -EINVAL;
3603
3604 area = find_vm_area(kaddr);
3605 if (!area)
3606 return -EINVAL;
3607
3608 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
3609 return -EINVAL;
3610
3611 if (check_add_overflow(size, off, &end_index) ||
3612 end_index > get_vm_area_size(area))
3613 return -EINVAL;
3614 kaddr += off;
3615
3616 do {
3617 struct page *page = vmalloc_to_page(kaddr);
3618 int ret;
3619
3620 ret = vm_insert_page(vma, uaddr, page);
3621 if (ret)
3622 return ret;
3623
3624 uaddr += PAGE_SIZE;
3625 kaddr += PAGE_SIZE;
3626 size -= PAGE_SIZE;
3627 } while (size > 0);
3628
3629 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
3630
3631 return 0;
3632 }
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
3649 unsigned long pgoff)
3650 {
3651 return remap_vmalloc_range_partial(vma, vma->vm_start,
3652 addr, pgoff,
3653 vma->vm_end - vma->vm_start);
3654 }
3655 EXPORT_SYMBOL(remap_vmalloc_range);
3656
3657 void free_vm_area(struct vm_struct *area)
3658 {
3659 struct vm_struct *ret;
3660 ret = remove_vm_area(area->addr);
3661 BUG_ON(ret != area);
3662 kfree(area);
3663 }
3664 EXPORT_SYMBOL_GPL(free_vm_area);
3665
3666 #ifdef CONFIG_SMP
3667 static struct vmap_area *node_to_va(struct rb_node *n)
3668 {
3669 return rb_entry_safe(n, struct vmap_area, rb_node);
3670 }
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681 static struct vmap_area *
3682 pvm_find_va_enclose_addr(unsigned long addr)
3683 {
3684 struct vmap_area *va, *tmp;
3685 struct rb_node *n;
3686
3687 n = free_vmap_area_root.rb_node;
3688 va = NULL;
3689
3690 while (n) {
3691 tmp = rb_entry(n, struct vmap_area, rb_node);
3692 if (tmp->va_start <= addr) {
3693 va = tmp;
3694 if (tmp->va_end >= addr)
3695 break;
3696
3697 n = n->rb_right;
3698 } else {
3699 n = n->rb_left;
3700 }
3701 }
3702
3703 return va;
3704 }
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716 static unsigned long
3717 pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
3718 {
3719 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3720 unsigned long addr;
3721
3722 if (likely(*va)) {
3723 list_for_each_entry_from_reverse((*va),
3724 &free_vmap_area_list, list) {
3725 addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
3726 if ((*va)->va_start < addr)
3727 return addr;
3728 }
3729 }
3730
3731 return 0;
3732 }
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
3759 const size_t *sizes, int nr_vms,
3760 size_t align)
3761 {
3762 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
3763 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3764 struct vmap_area **vas, *va;
3765 struct vm_struct **vms;
3766 int area, area2, last_area, term_area;
3767 unsigned long base, start, size, end, last_end, orig_start, orig_end;
3768 bool purged = false;
3769
3770
3771 BUG_ON(offset_in_page(align) || !is_power_of_2(align));
3772 for (last_area = 0, area = 0; area < nr_vms; area++) {
3773 start = offsets[area];
3774 end = start + sizes[area];
3775
3776
3777 BUG_ON(!IS_ALIGNED(offsets[area], align));
3778 BUG_ON(!IS_ALIGNED(sizes[area], align));
3779
3780
3781 if (start > offsets[last_area])
3782 last_area = area;
3783
3784 for (area2 = area + 1; area2 < nr_vms; area2++) {
3785 unsigned long start2 = offsets[area2];
3786 unsigned long end2 = start2 + sizes[area2];
3787
3788 BUG_ON(start2 < end && start < end2);
3789 }
3790 }
3791 last_end = offsets[last_area] + sizes[last_area];
3792
3793 if (vmalloc_end - vmalloc_start < last_end) {
3794 WARN_ON(true);
3795 return NULL;
3796 }
3797
3798 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
3799 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
3800 if (!vas || !vms)
3801 goto err_free2;
3802
3803 for (area = 0; area < nr_vms; area++) {
3804 vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
3805 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
3806 if (!vas[area] || !vms[area])
3807 goto err_free;
3808 }
3809 retry:
3810 spin_lock(&free_vmap_area_lock);
3811
3812
3813 area = term_area = last_area;
3814 start = offsets[area];
3815 end = start + sizes[area];
3816
3817 va = pvm_find_va_enclose_addr(vmalloc_end);
3818 base = pvm_determine_end_from_reverse(&va, align) - end;
3819
3820 while (true) {
3821
3822
3823
3824
3825 if (base + last_end < vmalloc_start + last_end)
3826 goto overflow;
3827
3828
3829
3830
3831 if (va == NULL)
3832 goto overflow;
3833
3834
3835
3836
3837
3838 if (base + end > va->va_end) {
3839 base = pvm_determine_end_from_reverse(&va, align) - end;
3840 term_area = area;
3841 continue;
3842 }
3843
3844
3845
3846
3847 if (base + start < va->va_start) {
3848 va = node_to_va(rb_prev(&va->rb_node));
3849 base = pvm_determine_end_from_reverse(&va, align) - end;
3850 term_area = area;
3851 continue;
3852 }
3853
3854
3855
3856
3857
3858 area = (area + nr_vms - 1) % nr_vms;
3859 if (area == term_area)
3860 break;
3861
3862 start = offsets[area];
3863 end = start + sizes[area];
3864 va = pvm_find_va_enclose_addr(base + end);
3865 }
3866
3867
3868 for (area = 0; area < nr_vms; area++) {
3869 int ret;
3870
3871 start = base + offsets[area];
3872 size = sizes[area];
3873
3874 va = pvm_find_va_enclose_addr(start);
3875 if (WARN_ON_ONCE(va == NULL))
3876
3877 goto recovery;
3878
3879 ret = adjust_va_to_fit_type(&free_vmap_area_root,
3880 &free_vmap_area_list,
3881 va, start, size);
3882 if (WARN_ON_ONCE(unlikely(ret)))
3883
3884 goto recovery;
3885
3886
3887 va = vas[area];
3888 va->va_start = start;
3889 va->va_end = start + size;
3890 }
3891
3892 spin_unlock(&free_vmap_area_lock);
3893
3894
3895 for (area = 0; area < nr_vms; area++) {
3896 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
3897 goto err_free_shadow;
3898 }
3899
3900
3901 spin_lock(&vmap_area_lock);
3902 for (area = 0; area < nr_vms; area++) {
3903 insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
3904
3905 setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
3906 pcpu_get_vm_areas);
3907 }
3908 spin_unlock(&vmap_area_lock);
3909
3910
3911
3912
3913
3914
3915
3916 for (area = 0; area < nr_vms; area++)
3917 vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
3918 vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
3919
3920 kfree(vas);
3921 return vms;
3922
3923 recovery:
3924
3925
3926
3927
3928
3929
3930 while (area--) {
3931 orig_start = vas[area]->va_start;
3932 orig_end = vas[area]->va_end;
3933 va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
3934 &free_vmap_area_list);
3935 if (va)
3936 kasan_release_vmalloc(orig_start, orig_end,
3937 va->va_start, va->va_end);
3938 vas[area] = NULL;
3939 }
3940
3941 overflow:
3942 spin_unlock(&free_vmap_area_lock);
3943 if (!purged) {
3944 purge_vmap_area_lazy();
3945 purged = true;
3946
3947
3948 for (area = 0; area < nr_vms; area++) {
3949 if (vas[area])
3950 continue;
3951
3952 vas[area] = kmem_cache_zalloc(
3953 vmap_area_cachep, GFP_KERNEL);
3954 if (!vas[area])
3955 goto err_free;
3956 }
3957
3958 goto retry;
3959 }
3960
3961 err_free:
3962 for (area = 0; area < nr_vms; area++) {
3963 if (vas[area])
3964 kmem_cache_free(vmap_area_cachep, vas[area]);
3965
3966 kfree(vms[area]);
3967 }
3968 err_free2:
3969 kfree(vas);
3970 kfree(vms);
3971 return NULL;
3972
3973 err_free_shadow:
3974 spin_lock(&free_vmap_area_lock);
3975
3976
3977
3978
3979
3980 for (area = 0; area < nr_vms; area++) {
3981 orig_start = vas[area]->va_start;
3982 orig_end = vas[area]->va_end;
3983 va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
3984 &free_vmap_area_list);
3985 if (va)
3986 kasan_release_vmalloc(orig_start, orig_end,
3987 va->va_start, va->va_end);
3988 vas[area] = NULL;
3989 kfree(vms[area]);
3990 }
3991 spin_unlock(&free_vmap_area_lock);
3992 kfree(vas);
3993 kfree(vms);
3994 return NULL;
3995 }
3996
3997
3998
3999
4000
4001
4002
4003
4004 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
4005 {
4006 int i;
4007
4008 for (i = 0; i < nr_vms; i++)
4009 free_vm_area(vms[i]);
4010 kfree(vms);
4011 }
4012 #endif
4013
4014 #ifdef CONFIG_PRINTK
4015 bool vmalloc_dump_obj(void *object)
4016 {
4017 struct vm_struct *vm;
4018 void *objp = (void *)PAGE_ALIGN((unsigned long)object);
4019
4020 vm = find_vm_area(objp);
4021 if (!vm)
4022 return false;
4023 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
4024 vm->nr_pages, (unsigned long)vm->addr, vm->caller);
4025 return true;
4026 }
4027 #endif
4028
4029 #ifdef CONFIG_PROC_FS
4030 static void *s_start(struct seq_file *m, loff_t *pos)
4031 __acquires(&vmap_purge_lock)
4032 __acquires(&vmap_area_lock)
4033 {
4034 mutex_lock(&vmap_purge_lock);
4035 spin_lock(&vmap_area_lock);
4036
4037 return seq_list_start(&vmap_area_list, *pos);
4038 }
4039
4040 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4041 {
4042 return seq_list_next(p, &vmap_area_list, pos);
4043 }
4044
4045 static void s_stop(struct seq_file *m, void *p)
4046 __releases(&vmap_area_lock)
4047 __releases(&vmap_purge_lock)
4048 {
4049 spin_unlock(&vmap_area_lock);
4050 mutex_unlock(&vmap_purge_lock);
4051 }
4052
4053 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
4054 {
4055 if (IS_ENABLED(CONFIG_NUMA)) {
4056 unsigned int nr, *counters = m->private;
4057 unsigned int step = 1U << vm_area_page_order(v);
4058
4059 if (!counters)
4060 return;
4061
4062 if (v->flags & VM_UNINITIALIZED)
4063 return;
4064
4065 smp_rmb();
4066
4067 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
4068
4069 for (nr = 0; nr < v->nr_pages; nr += step)
4070 counters[page_to_nid(v->pages[nr])] += step;
4071 for_each_node_state(nr, N_HIGH_MEMORY)
4072 if (counters[nr])
4073 seq_printf(m, " N%u=%u", nr, counters[nr]);
4074 }
4075 }
4076
4077 static void show_purge_info(struct seq_file *m)
4078 {
4079 struct vmap_area *va;
4080
4081 spin_lock(&purge_vmap_area_lock);
4082 list_for_each_entry(va, &purge_vmap_area_list, list) {
4083 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
4084 (void *)va->va_start, (void *)va->va_end,
4085 va->va_end - va->va_start);
4086 }
4087 spin_unlock(&purge_vmap_area_lock);
4088 }
4089
4090 static int s_show(struct seq_file *m, void *p)
4091 {
4092 struct vmap_area *va;
4093 struct vm_struct *v;
4094
4095 va = list_entry(p, struct vmap_area, list);
4096
4097
4098
4099
4100
4101 if (!va->vm) {
4102 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
4103 (void *)va->va_start, (void *)va->va_end,
4104 va->va_end - va->va_start);
4105
4106 goto final;
4107 }
4108
4109 v = va->vm;
4110
4111 seq_printf(m, "0x%pK-0x%pK %7ld",
4112 v->addr, v->addr + v->size, v->size);
4113
4114 if (v->caller)
4115 seq_printf(m, " %pS", v->caller);
4116
4117 if (v->nr_pages)
4118 seq_printf(m, " pages=%d", v->nr_pages);
4119
4120 if (v->phys_addr)
4121 seq_printf(m, " phys=%pa", &v->phys_addr);
4122
4123 if (v->flags & VM_IOREMAP)
4124 seq_puts(m, " ioremap");
4125
4126 if (v->flags & VM_ALLOC)
4127 seq_puts(m, " vmalloc");
4128
4129 if (v->flags & VM_MAP)
4130 seq_puts(m, " vmap");
4131
4132 if (v->flags & VM_USERMAP)
4133 seq_puts(m, " user");
4134
4135 if (v->flags & VM_DMA_COHERENT)
4136 seq_puts(m, " dma-coherent");
4137
4138 if (is_vmalloc_addr(v->pages))
4139 seq_puts(m, " vpages");
4140
4141 show_numa_info(m, v);
4142 seq_putc(m, '\n');
4143
4144
4145
4146
4147 final:
4148 if (list_is_last(&va->list, &vmap_area_list))
4149 show_purge_info(m);
4150
4151 return 0;
4152 }
4153
4154 static const struct seq_operations vmalloc_op = {
4155 .start = s_start,
4156 .next = s_next,
4157 .stop = s_stop,
4158 .show = s_show,
4159 };
4160
4161 static int __init proc_vmalloc_init(void)
4162 {
4163 if (IS_ENABLED(CONFIG_NUMA))
4164 proc_create_seq_private("vmallocinfo", 0400, NULL,
4165 &vmalloc_op,
4166 nr_node_ids * sizeof(unsigned int), NULL);
4167 else
4168 proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
4169 return 0;
4170 }
4171 module_init(proc_vmalloc_init);
4172
4173 #endif