0001
0002 #include <linux/kernel.h>
0003 #include <linux/errno.h>
0004 #include <linux/err.h>
0005 #include <linux/spinlock.h>
0006
0007 #include <linux/mm.h>
0008 #include <linux/memremap.h>
0009 #include <linux/pagemap.h>
0010 #include <linux/rmap.h>
0011 #include <linux/swap.h>
0012 #include <linux/swapops.h>
0013 #include <linux/secretmem.h>
0014
0015 #include <linux/sched/signal.h>
0016 #include <linux/rwsem.h>
0017 #include <linux/hugetlb.h>
0018 #include <linux/migrate.h>
0019 #include <linux/mm_inline.h>
0020 #include <linux/sched/mm.h>
0021
0022 #include <asm/mmu_context.h>
0023 #include <asm/tlbflush.h>
0024
0025 #include "internal.h"
0026
0027 struct follow_page_context {
0028 struct dev_pagemap *pgmap;
0029 unsigned int page_mask;
0030 };
0031
0032 static inline void sanity_check_pinned_pages(struct page **pages,
0033 unsigned long npages)
0034 {
0035 if (!IS_ENABLED(CONFIG_DEBUG_VM))
0036 return;
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 for (; npages; npages--, pages++) {
0051 struct page *page = *pages;
0052 struct folio *folio = page_folio(page);
0053
0054 if (!folio_test_anon(folio))
0055 continue;
0056 if (!folio_test_large(folio) || folio_test_hugetlb(folio))
0057 VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
0058 else
0059
0060 VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
0061 !PageAnonExclusive(page), page);
0062 }
0063 }
0064
0065
0066
0067
0068
0069 static inline struct folio *try_get_folio(struct page *page, int refs)
0070 {
0071 struct folio *folio;
0072
0073 retry:
0074 folio = page_folio(page);
0075 if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
0076 return NULL;
0077 if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
0078 return NULL;
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 if (unlikely(page_folio(page) != folio)) {
0090 if (!put_devmap_managed_page_refs(&folio->page, refs))
0091 folio_put_refs(folio, refs);
0092 goto retry;
0093 }
0094
0095 return folio;
0096 }
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124 struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
0125 {
0126 if (flags & FOLL_GET)
0127 return try_get_folio(page, refs);
0128 else if (flags & FOLL_PIN) {
0129 struct folio *folio;
0130
0131
0132
0133
0134
0135
0136 if (unlikely((flags & FOLL_LONGTERM) &&
0137 !is_longterm_pinnable_page(page)))
0138 return NULL;
0139
0140
0141
0142
0143
0144 folio = try_get_folio(page, refs);
0145 if (!folio)
0146 return NULL;
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156 if (folio_test_large(folio))
0157 atomic_add(refs, folio_pincount_ptr(folio));
0158 else
0159 folio_ref_add(folio,
0160 refs * (GUP_PIN_COUNTING_BIAS - 1));
0161 node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
0162
0163 return folio;
0164 }
0165
0166 WARN_ON_ONCE(1);
0167 return NULL;
0168 }
0169
0170 static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
0171 {
0172 if (flags & FOLL_PIN) {
0173 node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
0174 if (folio_test_large(folio))
0175 atomic_sub(refs, folio_pincount_ptr(folio));
0176 else
0177 refs *= GUP_PIN_COUNTING_BIAS;
0178 }
0179
0180 if (!put_devmap_managed_page_refs(&folio->page, refs))
0181 folio_put_refs(folio, refs);
0182 }
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202 bool __must_check try_grab_page(struct page *page, unsigned int flags)
0203 {
0204 struct folio *folio = page_folio(page);
0205
0206 WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
0207 if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
0208 return false;
0209
0210 if (flags & FOLL_GET)
0211 folio_ref_inc(folio);
0212 else if (flags & FOLL_PIN) {
0213
0214
0215
0216
0217
0218 if (folio_test_large(folio)) {
0219 folio_ref_add(folio, 1);
0220 atomic_add(1, folio_pincount_ptr(folio));
0221 } else {
0222 folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
0223 }
0224
0225 node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
0226 }
0227
0228 return true;
0229 }
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240 void unpin_user_page(struct page *page)
0241 {
0242 sanity_check_pinned_pages(&page, 1);
0243 gup_put_folio(page_folio(page), 1, FOLL_PIN);
0244 }
0245 EXPORT_SYMBOL(unpin_user_page);
0246
0247 static inline struct folio *gup_folio_range_next(struct page *start,
0248 unsigned long npages, unsigned long i, unsigned int *ntails)
0249 {
0250 struct page *next = nth_page(start, i);
0251 struct folio *folio = page_folio(next);
0252 unsigned int nr = 1;
0253
0254 if (folio_test_large(folio))
0255 nr = min_t(unsigned int, npages - i,
0256 folio_nr_pages(folio) - folio_page_idx(folio, next));
0257
0258 *ntails = nr;
0259 return folio;
0260 }
0261
0262 static inline struct folio *gup_folio_next(struct page **list,
0263 unsigned long npages, unsigned long i, unsigned int *ntails)
0264 {
0265 struct folio *folio = page_folio(list[i]);
0266 unsigned int nr;
0267
0268 for (nr = i + 1; nr < npages; nr++) {
0269 if (page_folio(list[nr]) != folio)
0270 break;
0271 }
0272
0273 *ntails = nr - i;
0274 return folio;
0275 }
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299 void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
0300 bool make_dirty)
0301 {
0302 unsigned long i;
0303 struct folio *folio;
0304 unsigned int nr;
0305
0306 if (!make_dirty) {
0307 unpin_user_pages(pages, npages);
0308 return;
0309 }
0310
0311 sanity_check_pinned_pages(pages, npages);
0312 for (i = 0; i < npages; i += nr) {
0313 folio = gup_folio_next(pages, npages, i, &nr);
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334 if (!folio_test_dirty(folio)) {
0335 folio_lock(folio);
0336 folio_mark_dirty(folio);
0337 folio_unlock(folio);
0338 }
0339 gup_put_folio(folio, nr, FOLL_PIN);
0340 }
0341 }
0342 EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365 void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
0366 bool make_dirty)
0367 {
0368 unsigned long i;
0369 struct folio *folio;
0370 unsigned int nr;
0371
0372 for (i = 0; i < npages; i += nr) {
0373 folio = gup_folio_range_next(page, npages, i, &nr);
0374 if (make_dirty && !folio_test_dirty(folio)) {
0375 folio_lock(folio);
0376 folio_mark_dirty(folio);
0377 folio_unlock(folio);
0378 }
0379 gup_put_folio(folio, nr, FOLL_PIN);
0380 }
0381 }
0382 EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
0383
0384 static void unpin_user_pages_lockless(struct page **pages, unsigned long npages)
0385 {
0386 unsigned long i;
0387 struct folio *folio;
0388 unsigned int nr;
0389
0390
0391
0392
0393
0394
0395 for (i = 0; i < npages; i += nr) {
0396 folio = gup_folio_next(pages, npages, i, &nr);
0397 gup_put_folio(folio, nr, FOLL_PIN);
0398 }
0399 }
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410 void unpin_user_pages(struct page **pages, unsigned long npages)
0411 {
0412 unsigned long i;
0413 struct folio *folio;
0414 unsigned int nr;
0415
0416
0417
0418
0419
0420
0421 if (WARN_ON(IS_ERR_VALUE(npages)))
0422 return;
0423
0424 sanity_check_pinned_pages(pages, npages);
0425 for (i = 0; i < npages; i += nr) {
0426 folio = gup_folio_next(pages, npages, i, &nr);
0427 gup_put_folio(folio, nr, FOLL_PIN);
0428 }
0429 }
0430 EXPORT_SYMBOL(unpin_user_pages);
0431
0432
0433
0434
0435
0436
0437 static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
0438 {
0439 if (!test_bit(MMF_HAS_PINNED, mm_flags))
0440 set_bit(MMF_HAS_PINNED, mm_flags);
0441 }
0442
0443 #ifdef CONFIG_MMU
0444 static struct page *no_page_table(struct vm_area_struct *vma,
0445 unsigned int flags)
0446 {
0447
0448
0449
0450
0451
0452
0453
0454
0455 if ((flags & FOLL_DUMP) &&
0456 (vma_is_anonymous(vma) || !vma->vm_ops->fault))
0457 return ERR_PTR(-EFAULT);
0458 return NULL;
0459 }
0460
0461 static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
0462 pte_t *pte, unsigned int flags)
0463 {
0464 if (flags & FOLL_TOUCH) {
0465 pte_t entry = *pte;
0466
0467 if (flags & FOLL_WRITE)
0468 entry = pte_mkdirty(entry);
0469 entry = pte_mkyoung(entry);
0470
0471 if (!pte_same(*pte, entry)) {
0472 set_pte_at(vma->vm_mm, address, pte, entry);
0473 update_mmu_cache(vma, address, pte);
0474 }
0475 }
0476
0477
0478 return -EEXIST;
0479 }
0480
0481
0482 static inline bool can_follow_write_pte(pte_t pte, struct page *page,
0483 struct vm_area_struct *vma,
0484 unsigned int flags)
0485 {
0486
0487 if (pte_write(pte))
0488 return true;
0489
0490
0491 if (!(flags & FOLL_FORCE))
0492 return false;
0493
0494
0495 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
0496 return false;
0497
0498
0499 if (!(vma->vm_flags & VM_MAYWRITE))
0500 return false;
0501
0502
0503 if (vma->vm_flags & VM_WRITE)
0504 return false;
0505
0506
0507
0508
0509
0510 if (!page || !PageAnon(page) || !PageAnonExclusive(page))
0511 return false;
0512
0513
0514 if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
0515 return false;
0516 return !userfaultfd_pte_wp(vma, pte);
0517 }
0518
0519 static struct page *follow_page_pte(struct vm_area_struct *vma,
0520 unsigned long address, pmd_t *pmd, unsigned int flags,
0521 struct dev_pagemap **pgmap)
0522 {
0523 struct mm_struct *mm = vma->vm_mm;
0524 struct page *page;
0525 spinlock_t *ptl;
0526 pte_t *ptep, pte;
0527 int ret;
0528
0529
0530 if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
0531 (FOLL_PIN | FOLL_GET)))
0532 return ERR_PTR(-EINVAL);
0533 retry:
0534 if (unlikely(pmd_bad(*pmd)))
0535 return no_page_table(vma, flags);
0536
0537 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
0538 pte = *ptep;
0539 if (!pte_present(pte)) {
0540 swp_entry_t entry;
0541
0542
0543
0544
0545
0546 if (likely(!(flags & FOLL_MIGRATION)))
0547 goto no_page;
0548 if (pte_none(pte))
0549 goto no_page;
0550 entry = pte_to_swp_entry(pte);
0551 if (!is_migration_entry(entry))
0552 goto no_page;
0553 pte_unmap_unlock(ptep, ptl);
0554 migration_entry_wait(mm, pmd, address);
0555 goto retry;
0556 }
0557 if ((flags & FOLL_NUMA) && pte_protnone(pte))
0558 goto no_page;
0559
0560 page = vm_normal_page(vma, address, pte);
0561
0562
0563
0564
0565
0566 if ((flags & FOLL_WRITE) &&
0567 !can_follow_write_pte(pte, page, vma, flags)) {
0568 page = NULL;
0569 goto out;
0570 }
0571
0572 if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
0573
0574
0575
0576
0577
0578 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
0579 if (*pgmap)
0580 page = pte_page(pte);
0581 else
0582 goto no_page;
0583 } else if (unlikely(!page)) {
0584 if (flags & FOLL_DUMP) {
0585
0586 page = ERR_PTR(-EFAULT);
0587 goto out;
0588 }
0589
0590 if (is_zero_pfn(pte_pfn(pte))) {
0591 page = pte_page(pte);
0592 } else {
0593 ret = follow_pfn_pte(vma, address, ptep, flags);
0594 page = ERR_PTR(ret);
0595 goto out;
0596 }
0597 }
0598
0599 if (!pte_write(pte) && gup_must_unshare(flags, page)) {
0600 page = ERR_PTR(-EMLINK);
0601 goto out;
0602 }
0603
0604 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
0605 !PageAnonExclusive(page), page);
0606
0607
0608 if (unlikely(!try_grab_page(page, flags))) {
0609 page = ERR_PTR(-ENOMEM);
0610 goto out;
0611 }
0612
0613
0614
0615
0616
0617 if (flags & FOLL_PIN) {
0618 ret = arch_make_page_accessible(page);
0619 if (ret) {
0620 unpin_user_page(page);
0621 page = ERR_PTR(ret);
0622 goto out;
0623 }
0624 }
0625 if (flags & FOLL_TOUCH) {
0626 if ((flags & FOLL_WRITE) &&
0627 !pte_dirty(pte) && !PageDirty(page))
0628 set_page_dirty(page);
0629
0630
0631
0632
0633
0634 mark_page_accessed(page);
0635 }
0636 out:
0637 pte_unmap_unlock(ptep, ptl);
0638 return page;
0639 no_page:
0640 pte_unmap_unlock(ptep, ptl);
0641 if (!pte_none(pte))
0642 return NULL;
0643 return no_page_table(vma, flags);
0644 }
0645
0646 static struct page *follow_pmd_mask(struct vm_area_struct *vma,
0647 unsigned long address, pud_t *pudp,
0648 unsigned int flags,
0649 struct follow_page_context *ctx)
0650 {
0651 pmd_t *pmd, pmdval;
0652 spinlock_t *ptl;
0653 struct page *page;
0654 struct mm_struct *mm = vma->vm_mm;
0655
0656 pmd = pmd_offset(pudp, address);
0657
0658
0659
0660
0661 pmdval = READ_ONCE(*pmd);
0662 if (pmd_none(pmdval))
0663 return no_page_table(vma, flags);
0664 if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
0665 page = follow_huge_pmd(mm, address, pmd, flags);
0666 if (page)
0667 return page;
0668 return no_page_table(vma, flags);
0669 }
0670 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
0671 page = follow_huge_pd(vma, address,
0672 __hugepd(pmd_val(pmdval)), flags,
0673 PMD_SHIFT);
0674 if (page)
0675 return page;
0676 return no_page_table(vma, flags);
0677 }
0678 retry:
0679 if (!pmd_present(pmdval)) {
0680
0681
0682
0683
0684 VM_BUG_ON(!thp_migration_supported() ||
0685 !is_pmd_migration_entry(pmdval));
0686
0687 if (likely(!(flags & FOLL_MIGRATION)))
0688 return no_page_table(vma, flags);
0689
0690 pmd_migration_entry_wait(mm, pmd);
0691 pmdval = READ_ONCE(*pmd);
0692
0693
0694
0695
0696 if (pmd_none(pmdval))
0697 return no_page_table(vma, flags);
0698 goto retry;
0699 }
0700 if (pmd_devmap(pmdval)) {
0701 ptl = pmd_lock(mm, pmd);
0702 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
0703 spin_unlock(ptl);
0704 if (page)
0705 return page;
0706 }
0707 if (likely(!pmd_trans_huge(pmdval)))
0708 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
0709
0710 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
0711 return no_page_table(vma, flags);
0712
0713 retry_locked:
0714 ptl = pmd_lock(mm, pmd);
0715 if (unlikely(pmd_none(*pmd))) {
0716 spin_unlock(ptl);
0717 return no_page_table(vma, flags);
0718 }
0719 if (unlikely(!pmd_present(*pmd))) {
0720 spin_unlock(ptl);
0721 if (likely(!(flags & FOLL_MIGRATION)))
0722 return no_page_table(vma, flags);
0723 pmd_migration_entry_wait(mm, pmd);
0724 goto retry_locked;
0725 }
0726 if (unlikely(!pmd_trans_huge(*pmd))) {
0727 spin_unlock(ptl);
0728 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
0729 }
0730 if (flags & FOLL_SPLIT_PMD) {
0731 int ret;
0732 page = pmd_page(*pmd);
0733 if (is_huge_zero_page(page)) {
0734 spin_unlock(ptl);
0735 ret = 0;
0736 split_huge_pmd(vma, pmd, address);
0737 if (pmd_trans_unstable(pmd))
0738 ret = -EBUSY;
0739 } else {
0740 spin_unlock(ptl);
0741 split_huge_pmd(vma, pmd, address);
0742 ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
0743 }
0744
0745 return ret ? ERR_PTR(ret) :
0746 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
0747 }
0748 page = follow_trans_huge_pmd(vma, address, pmd, flags);
0749 spin_unlock(ptl);
0750 ctx->page_mask = HPAGE_PMD_NR - 1;
0751 return page;
0752 }
0753
0754 static struct page *follow_pud_mask(struct vm_area_struct *vma,
0755 unsigned long address, p4d_t *p4dp,
0756 unsigned int flags,
0757 struct follow_page_context *ctx)
0758 {
0759 pud_t *pud;
0760 spinlock_t *ptl;
0761 struct page *page;
0762 struct mm_struct *mm = vma->vm_mm;
0763
0764 pud = pud_offset(p4dp, address);
0765 if (pud_none(*pud))
0766 return no_page_table(vma, flags);
0767 if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
0768 page = follow_huge_pud(mm, address, pud, flags);
0769 if (page)
0770 return page;
0771 return no_page_table(vma, flags);
0772 }
0773 if (is_hugepd(__hugepd(pud_val(*pud)))) {
0774 page = follow_huge_pd(vma, address,
0775 __hugepd(pud_val(*pud)), flags,
0776 PUD_SHIFT);
0777 if (page)
0778 return page;
0779 return no_page_table(vma, flags);
0780 }
0781 if (pud_devmap(*pud)) {
0782 ptl = pud_lock(mm, pud);
0783 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
0784 spin_unlock(ptl);
0785 if (page)
0786 return page;
0787 }
0788 if (unlikely(pud_bad(*pud)))
0789 return no_page_table(vma, flags);
0790
0791 return follow_pmd_mask(vma, address, pud, flags, ctx);
0792 }
0793
0794 static struct page *follow_p4d_mask(struct vm_area_struct *vma,
0795 unsigned long address, pgd_t *pgdp,
0796 unsigned int flags,
0797 struct follow_page_context *ctx)
0798 {
0799 p4d_t *p4d;
0800 struct page *page;
0801
0802 p4d = p4d_offset(pgdp, address);
0803 if (p4d_none(*p4d))
0804 return no_page_table(vma, flags);
0805 BUILD_BUG_ON(p4d_huge(*p4d));
0806 if (unlikely(p4d_bad(*p4d)))
0807 return no_page_table(vma, flags);
0808
0809 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
0810 page = follow_huge_pd(vma, address,
0811 __hugepd(p4d_val(*p4d)), flags,
0812 P4D_SHIFT);
0813 if (page)
0814 return page;
0815 return no_page_table(vma, flags);
0816 }
0817 return follow_pud_mask(vma, address, p4d, flags, ctx);
0818 }
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844 static struct page *follow_page_mask(struct vm_area_struct *vma,
0845 unsigned long address, unsigned int flags,
0846 struct follow_page_context *ctx)
0847 {
0848 pgd_t *pgd;
0849 struct page *page;
0850 struct mm_struct *mm = vma->vm_mm;
0851
0852 ctx->page_mask = 0;
0853
0854
0855 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
0856 if (!IS_ERR(page)) {
0857 WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
0858 return page;
0859 }
0860
0861 pgd = pgd_offset(mm, address);
0862
0863 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
0864 return no_page_table(vma, flags);
0865
0866 if (pgd_huge(*pgd)) {
0867 page = follow_huge_pgd(mm, address, pgd, flags);
0868 if (page)
0869 return page;
0870 return no_page_table(vma, flags);
0871 }
0872 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
0873 page = follow_huge_pd(vma, address,
0874 __hugepd(pgd_val(*pgd)), flags,
0875 PGDIR_SHIFT);
0876 if (page)
0877 return page;
0878 return no_page_table(vma, flags);
0879 }
0880
0881 return follow_p4d_mask(vma, address, pgd, flags, ctx);
0882 }
0883
0884 struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
0885 unsigned int foll_flags)
0886 {
0887 struct follow_page_context ctx = { NULL };
0888 struct page *page;
0889
0890 if (vma_is_secretmem(vma))
0891 return NULL;
0892
0893 if (foll_flags & FOLL_PIN)
0894 return NULL;
0895
0896 page = follow_page_mask(vma, address, foll_flags, &ctx);
0897 if (ctx.pgmap)
0898 put_dev_pagemap(ctx.pgmap);
0899 return page;
0900 }
0901
0902 static int get_gate_page(struct mm_struct *mm, unsigned long address,
0903 unsigned int gup_flags, struct vm_area_struct **vma,
0904 struct page **page)
0905 {
0906 pgd_t *pgd;
0907 p4d_t *p4d;
0908 pud_t *pud;
0909 pmd_t *pmd;
0910 pte_t *pte;
0911 int ret = -EFAULT;
0912
0913
0914 if (gup_flags & FOLL_WRITE)
0915 return -EFAULT;
0916 if (address > TASK_SIZE)
0917 pgd = pgd_offset_k(address);
0918 else
0919 pgd = pgd_offset_gate(mm, address);
0920 if (pgd_none(*pgd))
0921 return -EFAULT;
0922 p4d = p4d_offset(pgd, address);
0923 if (p4d_none(*p4d))
0924 return -EFAULT;
0925 pud = pud_offset(p4d, address);
0926 if (pud_none(*pud))
0927 return -EFAULT;
0928 pmd = pmd_offset(pud, address);
0929 if (!pmd_present(*pmd))
0930 return -EFAULT;
0931 VM_BUG_ON(pmd_trans_huge(*pmd));
0932 pte = pte_offset_map(pmd, address);
0933 if (pte_none(*pte))
0934 goto unmap;
0935 *vma = get_gate_vma(mm);
0936 if (!page)
0937 goto out;
0938 *page = vm_normal_page(*vma, address, *pte);
0939 if (!*page) {
0940 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
0941 goto unmap;
0942 *page = pte_page(*pte);
0943 }
0944 if (unlikely(!try_grab_page(*page, gup_flags))) {
0945 ret = -ENOMEM;
0946 goto unmap;
0947 }
0948 out:
0949 ret = 0;
0950 unmap:
0951 pte_unmap(pte);
0952 return ret;
0953 }
0954
0955
0956
0957
0958
0959
0960 static int faultin_page(struct vm_area_struct *vma,
0961 unsigned long address, unsigned int *flags, bool unshare,
0962 int *locked)
0963 {
0964 unsigned int fault_flags = 0;
0965 vm_fault_t ret;
0966
0967 if (*flags & FOLL_NOFAULT)
0968 return -EFAULT;
0969 if (*flags & FOLL_WRITE)
0970 fault_flags |= FAULT_FLAG_WRITE;
0971 if (*flags & FOLL_REMOTE)
0972 fault_flags |= FAULT_FLAG_REMOTE;
0973 if (locked)
0974 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
0975 if (*flags & FOLL_NOWAIT)
0976 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
0977 if (*flags & FOLL_TRIED) {
0978
0979
0980
0981
0982 fault_flags |= FAULT_FLAG_TRIED;
0983 }
0984 if (unshare) {
0985 fault_flags |= FAULT_FLAG_UNSHARE;
0986
0987 VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
0988 }
0989
0990 ret = handle_mm_fault(vma, address, fault_flags, NULL);
0991
0992 if (ret & VM_FAULT_COMPLETED) {
0993
0994
0995
0996
0997 WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT);
0998 if (locked)
0999 *locked = 0;
1000
1001
1002
1003
1004
1005
1006
1007 return -EAGAIN;
1008 }
1009
1010 if (ret & VM_FAULT_ERROR) {
1011 int err = vm_fault_to_errno(ret, *flags);
1012
1013 if (err)
1014 return err;
1015 BUG();
1016 }
1017
1018 if (ret & VM_FAULT_RETRY) {
1019 if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
1020 *locked = 0;
1021 return -EBUSY;
1022 }
1023
1024 return 0;
1025 }
1026
1027 static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
1028 {
1029 vm_flags_t vm_flags = vma->vm_flags;
1030 int write = (gup_flags & FOLL_WRITE);
1031 int foreign = (gup_flags & FOLL_REMOTE);
1032
1033 if (vm_flags & (VM_IO | VM_PFNMAP))
1034 return -EFAULT;
1035
1036 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
1037 return -EFAULT;
1038
1039 if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
1040 return -EOPNOTSUPP;
1041
1042 if (vma_is_secretmem(vma))
1043 return -EFAULT;
1044
1045 if (write) {
1046 if (!(vm_flags & VM_WRITE)) {
1047 if (!(gup_flags & FOLL_FORCE))
1048 return -EFAULT;
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058 if (!is_cow_mapping(vm_flags))
1059 return -EFAULT;
1060 }
1061 } else if (!(vm_flags & VM_READ)) {
1062 if (!(gup_flags & FOLL_FORCE))
1063 return -EFAULT;
1064
1065
1066
1067
1068 if (!(vm_flags & VM_MAYREAD))
1069 return -EFAULT;
1070 }
1071
1072
1073
1074
1075 if (!arch_vma_access_permitted(vma, write, false, foreign))
1076 return -EFAULT;
1077 return 0;
1078 }
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140 static long __get_user_pages(struct mm_struct *mm,
1141 unsigned long start, unsigned long nr_pages,
1142 unsigned int gup_flags, struct page **pages,
1143 struct vm_area_struct **vmas, int *locked)
1144 {
1145 long ret = 0, i = 0;
1146 struct vm_area_struct *vma = NULL;
1147 struct follow_page_context ctx = { NULL };
1148
1149 if (!nr_pages)
1150 return 0;
1151
1152 start = untagged_addr(start);
1153
1154 VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
1155
1156
1157
1158
1159
1160
1161 if (!(gup_flags & FOLL_FORCE))
1162 gup_flags |= FOLL_NUMA;
1163
1164 do {
1165 struct page *page;
1166 unsigned int foll_flags = gup_flags;
1167 unsigned int page_increm;
1168
1169
1170 if (!vma || start >= vma->vm_end) {
1171 vma = find_extend_vma(mm, start);
1172 if (!vma && in_gate_area(mm, start)) {
1173 ret = get_gate_page(mm, start & PAGE_MASK,
1174 gup_flags, &vma,
1175 pages ? &pages[i] : NULL);
1176 if (ret)
1177 goto out;
1178 ctx.page_mask = 0;
1179 goto next_page;
1180 }
1181
1182 if (!vma) {
1183 ret = -EFAULT;
1184 goto out;
1185 }
1186 ret = check_vma_flags(vma, gup_flags);
1187 if (ret)
1188 goto out;
1189
1190 if (is_vm_hugetlb_page(vma)) {
1191 i = follow_hugetlb_page(mm, vma, pages, vmas,
1192 &start, &nr_pages, i,
1193 gup_flags, locked);
1194 if (locked && *locked == 0) {
1195
1196
1197
1198
1199
1200 BUG_ON(gup_flags & FOLL_NOWAIT);
1201 goto out;
1202 }
1203 continue;
1204 }
1205 }
1206 retry:
1207
1208
1209
1210
1211 if (fatal_signal_pending(current)) {
1212 ret = -EINTR;
1213 goto out;
1214 }
1215 cond_resched();
1216
1217 page = follow_page_mask(vma, start, foll_flags, &ctx);
1218 if (!page || PTR_ERR(page) == -EMLINK) {
1219 ret = faultin_page(vma, start, &foll_flags,
1220 PTR_ERR(page) == -EMLINK, locked);
1221 switch (ret) {
1222 case 0:
1223 goto retry;
1224 case -EBUSY:
1225 case -EAGAIN:
1226 ret = 0;
1227 fallthrough;
1228 case -EFAULT:
1229 case -ENOMEM:
1230 case -EHWPOISON:
1231 goto out;
1232 }
1233 BUG();
1234 } else if (PTR_ERR(page) == -EEXIST) {
1235
1236
1237
1238
1239
1240
1241 if (pages) {
1242 ret = PTR_ERR(page);
1243 goto out;
1244 }
1245
1246 goto next_page;
1247 } else if (IS_ERR(page)) {
1248 ret = PTR_ERR(page);
1249 goto out;
1250 }
1251 if (pages) {
1252 pages[i] = page;
1253 flush_anon_page(vma, page, start);
1254 flush_dcache_page(page);
1255 ctx.page_mask = 0;
1256 }
1257 next_page:
1258 if (vmas) {
1259 vmas[i] = vma;
1260 ctx.page_mask = 0;
1261 }
1262 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
1263 if (page_increm > nr_pages)
1264 page_increm = nr_pages;
1265 i += page_increm;
1266 start += page_increm * PAGE_SIZE;
1267 nr_pages -= page_increm;
1268 } while (nr_pages);
1269 out:
1270 if (ctx.pgmap)
1271 put_dev_pagemap(ctx.pgmap);
1272 return i ? i : ret;
1273 }
1274
1275 static bool vma_permits_fault(struct vm_area_struct *vma,
1276 unsigned int fault_flags)
1277 {
1278 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
1279 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
1280 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
1281
1282 if (!(vm_flags & vma->vm_flags))
1283 return false;
1284
1285
1286
1287
1288
1289
1290
1291
1292 if (!arch_vma_access_permitted(vma, write, false, foreign))
1293 return false;
1294
1295 return true;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 int fixup_user_fault(struct mm_struct *mm,
1328 unsigned long address, unsigned int fault_flags,
1329 bool *unlocked)
1330 {
1331 struct vm_area_struct *vma;
1332 vm_fault_t ret;
1333
1334 address = untagged_addr(address);
1335
1336 if (unlocked)
1337 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
1338
1339 retry:
1340 vma = find_extend_vma(mm, address);
1341 if (!vma || address < vma->vm_start)
1342 return -EFAULT;
1343
1344 if (!vma_permits_fault(vma, fault_flags))
1345 return -EFAULT;
1346
1347 if ((fault_flags & FAULT_FLAG_KILLABLE) &&
1348 fatal_signal_pending(current))
1349 return -EINTR;
1350
1351 ret = handle_mm_fault(vma, address, fault_flags, NULL);
1352
1353 if (ret & VM_FAULT_COMPLETED) {
1354
1355
1356
1357
1358
1359 mmap_read_lock(mm);
1360 *unlocked = true;
1361 return 0;
1362 }
1363
1364 if (ret & VM_FAULT_ERROR) {
1365 int err = vm_fault_to_errno(ret, 0);
1366
1367 if (err)
1368 return err;
1369 BUG();
1370 }
1371
1372 if (ret & VM_FAULT_RETRY) {
1373 mmap_read_lock(mm);
1374 *unlocked = true;
1375 fault_flags |= FAULT_FLAG_TRIED;
1376 goto retry;
1377 }
1378
1379 return 0;
1380 }
1381 EXPORT_SYMBOL_GPL(fixup_user_fault);
1382
1383
1384
1385
1386
1387 static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
1388 unsigned long start,
1389 unsigned long nr_pages,
1390 struct page **pages,
1391 struct vm_area_struct **vmas,
1392 int *locked,
1393 unsigned int flags)
1394 {
1395 long ret, pages_done;
1396 bool lock_dropped;
1397
1398 if (locked) {
1399
1400 BUG_ON(vmas);
1401
1402 BUG_ON(*locked != 1);
1403 }
1404
1405 if (flags & FOLL_PIN)
1406 mm_set_has_pinned_flag(&mm->flags);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417 if (pages && !(flags & FOLL_PIN))
1418 flags |= FOLL_GET;
1419
1420 pages_done = 0;
1421 lock_dropped = false;
1422 for (;;) {
1423 ret = __get_user_pages(mm, start, nr_pages, flags, pages,
1424 vmas, locked);
1425 if (!locked)
1426
1427 return ret;
1428
1429
1430 if (!*locked) {
1431 BUG_ON(ret < 0);
1432 BUG_ON(ret >= nr_pages);
1433 }
1434
1435 if (ret > 0) {
1436 nr_pages -= ret;
1437 pages_done += ret;
1438 if (!nr_pages)
1439 break;
1440 }
1441 if (*locked) {
1442
1443
1444
1445
1446 if (!pages_done)
1447 pages_done = ret;
1448 break;
1449 }
1450
1451
1452
1453
1454 if (likely(pages))
1455 pages += ret;
1456 start += ret << PAGE_SHIFT;
1457 lock_dropped = true;
1458
1459 retry:
1460
1461
1462
1463
1464
1465
1466
1467
1468 if (fatal_signal_pending(current)) {
1469 if (!pages_done)
1470 pages_done = -EINTR;
1471 break;
1472 }
1473
1474 ret = mmap_read_lock_killable(mm);
1475 if (ret) {
1476 BUG_ON(ret > 0);
1477 if (!pages_done)
1478 pages_done = ret;
1479 break;
1480 }
1481
1482 *locked = 1;
1483 ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
1484 pages, NULL, locked);
1485 if (!*locked) {
1486
1487 BUG_ON(ret != 0);
1488 goto retry;
1489 }
1490 if (ret != 1) {
1491 BUG_ON(ret > 1);
1492 if (!pages_done)
1493 pages_done = ret;
1494 break;
1495 }
1496 nr_pages--;
1497 pages_done++;
1498 if (!nr_pages)
1499 break;
1500 if (likely(pages))
1501 pages++;
1502 start += PAGE_SIZE;
1503 }
1504 if (lock_dropped && *locked) {
1505
1506
1507
1508
1509 mmap_read_unlock(mm);
1510 *locked = 0;
1511 }
1512 return pages_done;
1513 }
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 long populate_vma_page_range(struct vm_area_struct *vma,
1536 unsigned long start, unsigned long end, int *locked)
1537 {
1538 struct mm_struct *mm = vma->vm_mm;
1539 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1540 int gup_flags;
1541 long ret;
1542
1543 VM_BUG_ON(!PAGE_ALIGNED(start));
1544 VM_BUG_ON(!PAGE_ALIGNED(end));
1545 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1546 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1547 mmap_assert_locked(mm);
1548
1549
1550
1551
1552
1553 if (vma->vm_flags & VM_LOCKONFAULT)
1554 return nr_pages;
1555
1556 gup_flags = FOLL_TOUCH;
1557
1558
1559
1560
1561
1562 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1563 gup_flags |= FOLL_WRITE;
1564
1565
1566
1567
1568
1569 if (vma_is_accessible(vma))
1570 gup_flags |= FOLL_FORCE;
1571
1572
1573
1574
1575
1576 ret = __get_user_pages(mm, start, nr_pages, gup_flags,
1577 NULL, NULL, locked);
1578 lru_add_drain();
1579 return ret;
1580 }
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605 long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
1606 unsigned long end, bool write, int *locked)
1607 {
1608 struct mm_struct *mm = vma->vm_mm;
1609 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1610 int gup_flags;
1611 long ret;
1612
1613 VM_BUG_ON(!PAGE_ALIGNED(start));
1614 VM_BUG_ON(!PAGE_ALIGNED(end));
1615 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1616 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1617 mmap_assert_locked(mm);
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628 gup_flags = FOLL_TOUCH | FOLL_HWPOISON;
1629 if (write)
1630 gup_flags |= FOLL_WRITE;
1631
1632
1633
1634
1635
1636 if (check_vma_flags(vma, gup_flags))
1637 return -EINVAL;
1638
1639 ret = __get_user_pages(mm, start, nr_pages, gup_flags,
1640 NULL, NULL, locked);
1641 lru_add_drain();
1642 return ret;
1643 }
1644
1645
1646
1647
1648
1649
1650
1651
1652 int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1653 {
1654 struct mm_struct *mm = current->mm;
1655 unsigned long end, nstart, nend;
1656 struct vm_area_struct *vma = NULL;
1657 int locked = 0;
1658 long ret = 0;
1659
1660 end = start + len;
1661
1662 for (nstart = start; nstart < end; nstart = nend) {
1663
1664
1665
1666
1667 if (!locked) {
1668 locked = 1;
1669 mmap_read_lock(mm);
1670 vma = find_vma(mm, nstart);
1671 } else if (nstart >= vma->vm_end)
1672 vma = vma->vm_next;
1673 if (!vma || vma->vm_start >= end)
1674 break;
1675
1676
1677
1678
1679 nend = min(end, vma->vm_end);
1680 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1681 continue;
1682 if (nstart < vma->vm_start)
1683 nstart = vma->vm_start;
1684
1685
1686
1687
1688
1689 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1690 if (ret < 0) {
1691 if (ignore_errors) {
1692 ret = 0;
1693 continue;
1694 }
1695 break;
1696 }
1697 nend = nstart + ret * PAGE_SIZE;
1698 ret = 0;
1699 }
1700 if (locked)
1701 mmap_read_unlock(mm);
1702 return ret;
1703 }
1704 #else
1705 static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
1706 unsigned long nr_pages, struct page **pages,
1707 struct vm_area_struct **vmas, int *locked,
1708 unsigned int foll_flags)
1709 {
1710 struct vm_area_struct *vma;
1711 unsigned long vm_flags;
1712 long i;
1713
1714
1715
1716
1717 vm_flags = (foll_flags & FOLL_WRITE) ?
1718 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1719 vm_flags &= (foll_flags & FOLL_FORCE) ?
1720 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1721
1722 for (i = 0; i < nr_pages; i++) {
1723 vma = find_vma(mm, start);
1724 if (!vma)
1725 goto finish_or_fault;
1726
1727
1728 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1729 !(vm_flags & vma->vm_flags))
1730 goto finish_or_fault;
1731
1732 if (pages) {
1733 pages[i] = virt_to_page((void *)start);
1734 if (pages[i])
1735 get_page(pages[i]);
1736 }
1737 if (vmas)
1738 vmas[i] = vma;
1739 start = (start + PAGE_SIZE) & PAGE_MASK;
1740 }
1741
1742 return i;
1743
1744 finish_or_fault:
1745 return i ? : -EFAULT;
1746 }
1747 #endif
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757 size_t fault_in_writeable(char __user *uaddr, size_t size)
1758 {
1759 char __user *start = uaddr, *end;
1760
1761 if (unlikely(size == 0))
1762 return 0;
1763 if (!user_write_access_begin(uaddr, size))
1764 return size;
1765 if (!PAGE_ALIGNED(uaddr)) {
1766 unsafe_put_user(0, uaddr, out);
1767 uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
1768 }
1769 end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
1770 if (unlikely(end < start))
1771 end = NULL;
1772 while (uaddr != end) {
1773 unsafe_put_user(0, uaddr, out);
1774 uaddr += PAGE_SIZE;
1775 }
1776
1777 out:
1778 user_write_access_end();
1779 if (size > uaddr - start)
1780 return size - (uaddr - start);
1781 return 0;
1782 }
1783 EXPORT_SYMBOL(fault_in_writeable);
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797 size_t fault_in_subpage_writeable(char __user *uaddr, size_t size)
1798 {
1799 size_t faulted_in;
1800
1801
1802
1803
1804
1805
1806 faulted_in = size - fault_in_writeable(uaddr, size);
1807 if (faulted_in)
1808 faulted_in -= probe_subpage_writeable(uaddr, faulted_in);
1809
1810 return size - faulted_in;
1811 }
1812 EXPORT_SYMBOL(fault_in_subpage_writeable);
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
1833 {
1834 unsigned long start = (unsigned long)uaddr, end;
1835 struct mm_struct *mm = current->mm;
1836 bool unlocked = false;
1837
1838 if (unlikely(size == 0))
1839 return 0;
1840 end = PAGE_ALIGN(start + size);
1841 if (end < start)
1842 end = 0;
1843
1844 mmap_read_lock(mm);
1845 do {
1846 if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
1847 break;
1848 start = (start + PAGE_SIZE) & PAGE_MASK;
1849 } while (start != end);
1850 mmap_read_unlock(mm);
1851
1852 if (size > (unsigned long)uaddr - start)
1853 return size - ((unsigned long)uaddr - start);
1854 return 0;
1855 }
1856 EXPORT_SYMBOL(fault_in_safe_writeable);
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866 size_t fault_in_readable(const char __user *uaddr, size_t size)
1867 {
1868 const char __user *start = uaddr, *end;
1869 volatile char c;
1870
1871 if (unlikely(size == 0))
1872 return 0;
1873 if (!user_read_access_begin(uaddr, size))
1874 return size;
1875 if (!PAGE_ALIGNED(uaddr)) {
1876 unsafe_get_user(c, uaddr, out);
1877 uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
1878 }
1879 end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
1880 if (unlikely(end < start))
1881 end = NULL;
1882 while (uaddr != end) {
1883 unsafe_get_user(c, uaddr, out);
1884 uaddr += PAGE_SIZE;
1885 }
1886
1887 out:
1888 user_read_access_end();
1889 (void)c;
1890 if (size > uaddr - start)
1891 return size - (uaddr - start);
1892 return 0;
1893 }
1894 EXPORT_SYMBOL(fault_in_readable);
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910 #ifdef CONFIG_ELF_CORE
1911 struct page *get_dump_page(unsigned long addr)
1912 {
1913 struct mm_struct *mm = current->mm;
1914 struct page *page;
1915 int locked = 1;
1916 int ret;
1917
1918 if (mmap_read_lock_killable(mm))
1919 return NULL;
1920 ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
1921 FOLL_FORCE | FOLL_DUMP | FOLL_GET);
1922 if (locked)
1923 mmap_read_unlock(mm);
1924 return (ret == 1) ? page : NULL;
1925 }
1926 #endif
1927
1928 #ifdef CONFIG_MIGRATION
1929
1930
1931
1932
1933
1934
1935 static long check_and_migrate_movable_pages(unsigned long nr_pages,
1936 struct page **pages,
1937 unsigned int gup_flags)
1938 {
1939 unsigned long isolation_error_count = 0, i;
1940 struct folio *prev_folio = NULL;
1941 LIST_HEAD(movable_page_list);
1942 bool drain_allow = true, coherent_pages = false;
1943 int ret = 0;
1944
1945 for (i = 0; i < nr_pages; i++) {
1946 struct folio *folio = page_folio(pages[i]);
1947
1948 if (folio == prev_folio)
1949 continue;
1950 prev_folio = folio;
1951
1952
1953
1954
1955
1956
1957
1958 if (folio_is_device_coherent(folio)) {
1959
1960
1961
1962
1963 pages[i] = 0;
1964 coherent_pages = true;
1965
1966
1967
1968
1969
1970 if (gup_flags & FOLL_PIN) {
1971 get_page(&folio->page);
1972 unpin_user_page(&folio->page);
1973 }
1974
1975 ret = migrate_device_coherent_page(&folio->page);
1976 if (ret)
1977 goto unpin_pages;
1978
1979 continue;
1980 }
1981
1982 if (folio_is_longterm_pinnable(folio))
1983 continue;
1984
1985
1986
1987 if (folio_test_hugetlb(folio)) {
1988 if (isolate_hugetlb(&folio->page,
1989 &movable_page_list))
1990 isolation_error_count++;
1991 continue;
1992 }
1993
1994 if (!folio_test_lru(folio) && drain_allow) {
1995 lru_add_drain_all();
1996 drain_allow = false;
1997 }
1998
1999 if (folio_isolate_lru(folio)) {
2000 isolation_error_count++;
2001 continue;
2002 }
2003 list_add_tail(&folio->lru, &movable_page_list);
2004 node_stat_mod_folio(folio,
2005 NR_ISOLATED_ANON + folio_is_file_lru(folio),
2006 folio_nr_pages(folio));
2007 }
2008
2009 if (!list_empty(&movable_page_list) || isolation_error_count ||
2010 coherent_pages)
2011 goto unpin_pages;
2012
2013
2014
2015
2016
2017 return nr_pages;
2018
2019 unpin_pages:
2020
2021
2022
2023 for (i = 0; i < nr_pages; i++) {
2024 if (!pages[i])
2025 continue;
2026
2027 if (gup_flags & FOLL_PIN)
2028 unpin_user_page(pages[i]);
2029 else
2030 put_page(pages[i]);
2031 }
2032
2033 if (!list_empty(&movable_page_list)) {
2034 struct migration_target_control mtc = {
2035 .nid = NUMA_NO_NODE,
2036 .gfp_mask = GFP_USER | __GFP_NOWARN,
2037 };
2038
2039 ret = migrate_pages(&movable_page_list, alloc_migration_target,
2040 NULL, (unsigned long)&mtc, MIGRATE_SYNC,
2041 MR_LONGTERM_PIN, NULL);
2042 if (ret > 0)
2043 ret = -ENOMEM;
2044 }
2045
2046 if (ret && !list_empty(&movable_page_list))
2047 putback_movable_pages(&movable_page_list);
2048 return ret;
2049 }
2050 #else
2051 static long check_and_migrate_movable_pages(unsigned long nr_pages,
2052 struct page **pages,
2053 unsigned int gup_flags)
2054 {
2055 return nr_pages;
2056 }
2057 #endif
2058
2059
2060
2061
2062
2063 static long __gup_longterm_locked(struct mm_struct *mm,
2064 unsigned long start,
2065 unsigned long nr_pages,
2066 struct page **pages,
2067 struct vm_area_struct **vmas,
2068 unsigned int gup_flags)
2069 {
2070 unsigned int flags;
2071 long rc;
2072
2073 if (!(gup_flags & FOLL_LONGTERM))
2074 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
2075 NULL, gup_flags);
2076 flags = memalloc_pin_save();
2077 do {
2078 rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
2079 NULL, gup_flags);
2080 if (rc <= 0)
2081 break;
2082 rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
2083 } while (!rc);
2084 memalloc_pin_restore(flags);
2085
2086 return rc;
2087 }
2088
2089 static bool is_valid_gup_flags(unsigned int gup_flags)
2090 {
2091
2092
2093
2094
2095 if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
2096 return false;
2097
2098
2099
2100
2101
2102 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2103 return false;
2104
2105 return true;
2106 }
2107
2108 #ifdef CONFIG_MMU
2109 static long __get_user_pages_remote(struct mm_struct *mm,
2110 unsigned long start, unsigned long nr_pages,
2111 unsigned int gup_flags, struct page **pages,
2112 struct vm_area_struct **vmas, int *locked)
2113 {
2114
2115
2116
2117
2118
2119
2120
2121 if (gup_flags & FOLL_LONGTERM) {
2122 if (WARN_ON_ONCE(locked))
2123 return -EINVAL;
2124
2125
2126
2127
2128 return __gup_longterm_locked(mm, start, nr_pages, pages,
2129 vmas, gup_flags | FOLL_TOUCH |
2130 FOLL_REMOTE);
2131 }
2132
2133 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
2134 locked,
2135 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
2136 }
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198 long get_user_pages_remote(struct mm_struct *mm,
2199 unsigned long start, unsigned long nr_pages,
2200 unsigned int gup_flags, struct page **pages,
2201 struct vm_area_struct **vmas, int *locked)
2202 {
2203 if (!is_valid_gup_flags(gup_flags))
2204 return -EINVAL;
2205
2206 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
2207 pages, vmas, locked);
2208 }
2209 EXPORT_SYMBOL(get_user_pages_remote);
2210
2211 #else
2212 long get_user_pages_remote(struct mm_struct *mm,
2213 unsigned long start, unsigned long nr_pages,
2214 unsigned int gup_flags, struct page **pages,
2215 struct vm_area_struct **vmas, int *locked)
2216 {
2217 return 0;
2218 }
2219
2220 static long __get_user_pages_remote(struct mm_struct *mm,
2221 unsigned long start, unsigned long nr_pages,
2222 unsigned int gup_flags, struct page **pages,
2223 struct vm_area_struct **vmas, int *locked)
2224 {
2225 return 0;
2226 }
2227 #endif
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245 long get_user_pages(unsigned long start, unsigned long nr_pages,
2246 unsigned int gup_flags, struct page **pages,
2247 struct vm_area_struct **vmas)
2248 {
2249 if (!is_valid_gup_flags(gup_flags))
2250 return -EINVAL;
2251
2252 return __gup_longterm_locked(current->mm, start, nr_pages,
2253 pages, vmas, gup_flags | FOLL_TOUCH);
2254 }
2255 EXPORT_SYMBOL(get_user_pages);
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2273 struct page **pages, unsigned int gup_flags)
2274 {
2275 struct mm_struct *mm = current->mm;
2276 int locked = 1;
2277 long ret;
2278
2279
2280
2281
2282
2283
2284
2285 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2286 return -EINVAL;
2287
2288 mmap_read_lock(mm);
2289 ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
2290 &locked, gup_flags | FOLL_TOUCH);
2291 if (locked)
2292 mmap_read_unlock(mm);
2293 return ret;
2294 }
2295 EXPORT_SYMBOL(get_user_pages_unlocked);
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330 #ifdef CONFIG_HAVE_FAST_GUP
2331
2332 static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
2333 unsigned int flags,
2334 struct page **pages)
2335 {
2336 while ((*nr) - nr_start) {
2337 struct page *page = pages[--(*nr)];
2338
2339 ClearPageReferenced(page);
2340 if (flags & FOLL_PIN)
2341 unpin_user_page(page);
2342 else
2343 put_page(page);
2344 }
2345 }
2346
2347 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
2368 unsigned long end, unsigned int flags,
2369 struct page **pages, int *nr)
2370 {
2371 struct dev_pagemap *pgmap = NULL;
2372 int nr_start = *nr, ret = 0;
2373 pte_t *ptep, *ptem;
2374
2375 ptem = ptep = pte_offset_map(&pmd, addr);
2376 do {
2377 pte_t pte = ptep_get_lockless(ptep);
2378 struct page *page;
2379 struct folio *folio;
2380
2381
2382
2383
2384
2385 if (pte_protnone(pte))
2386 goto pte_unmap;
2387
2388 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2389 goto pte_unmap;
2390
2391 if (pte_devmap(pte)) {
2392 if (unlikely(flags & FOLL_LONGTERM))
2393 goto pte_unmap;
2394
2395 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
2396 if (unlikely(!pgmap)) {
2397 undo_dev_pagemap(nr, nr_start, flags, pages);
2398 goto pte_unmap;
2399 }
2400 } else if (pte_special(pte))
2401 goto pte_unmap;
2402
2403 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2404 page = pte_page(pte);
2405
2406 folio = try_grab_folio(page, 1, flags);
2407 if (!folio)
2408 goto pte_unmap;
2409
2410 if (unlikely(page_is_secretmem(page))) {
2411 gup_put_folio(folio, 1, flags);
2412 goto pte_unmap;
2413 }
2414
2415 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
2416 unlikely(pte_val(pte) != pte_val(*ptep))) {
2417 gup_put_folio(folio, 1, flags);
2418 goto pte_unmap;
2419 }
2420
2421 if (!pte_write(pte) && gup_must_unshare(flags, page)) {
2422 gup_put_folio(folio, 1, flags);
2423 goto pte_unmap;
2424 }
2425
2426
2427
2428
2429
2430
2431
2432 if (flags & FOLL_PIN) {
2433 ret = arch_make_page_accessible(page);
2434 if (ret) {
2435 gup_put_folio(folio, 1, flags);
2436 goto pte_unmap;
2437 }
2438 }
2439 folio_set_referenced(folio);
2440 pages[*nr] = page;
2441 (*nr)++;
2442 } while (ptep++, addr += PAGE_SIZE, addr != end);
2443
2444 ret = 1;
2445
2446 pte_unmap:
2447 if (pgmap)
2448 put_dev_pagemap(pgmap);
2449 pte_unmap(ptem);
2450 return ret;
2451 }
2452 #else
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463 static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
2464 unsigned long end, unsigned int flags,
2465 struct page **pages, int *nr)
2466 {
2467 return 0;
2468 }
2469 #endif
2470
2471 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
2472 static int __gup_device_huge(unsigned long pfn, unsigned long addr,
2473 unsigned long end, unsigned int flags,
2474 struct page **pages, int *nr)
2475 {
2476 int nr_start = *nr;
2477 struct dev_pagemap *pgmap = NULL;
2478
2479 do {
2480 struct page *page = pfn_to_page(pfn);
2481
2482 pgmap = get_dev_pagemap(pfn, pgmap);
2483 if (unlikely(!pgmap)) {
2484 undo_dev_pagemap(nr, nr_start, flags, pages);
2485 break;
2486 }
2487 SetPageReferenced(page);
2488 pages[*nr] = page;
2489 if (unlikely(!try_grab_page(page, flags))) {
2490 undo_dev_pagemap(nr, nr_start, flags, pages);
2491 break;
2492 }
2493 (*nr)++;
2494 pfn++;
2495 } while (addr += PAGE_SIZE, addr != end);
2496
2497 put_dev_pagemap(pgmap);
2498 return addr == end;
2499 }
2500
2501 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2502 unsigned long end, unsigned int flags,
2503 struct page **pages, int *nr)
2504 {
2505 unsigned long fault_pfn;
2506 int nr_start = *nr;
2507
2508 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2509 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2510 return 0;
2511
2512 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2513 undo_dev_pagemap(nr, nr_start, flags, pages);
2514 return 0;
2515 }
2516 return 1;
2517 }
2518
2519 static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2520 unsigned long end, unsigned int flags,
2521 struct page **pages, int *nr)
2522 {
2523 unsigned long fault_pfn;
2524 int nr_start = *nr;
2525
2526 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2527 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2528 return 0;
2529
2530 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2531 undo_dev_pagemap(nr, nr_start, flags, pages);
2532 return 0;
2533 }
2534 return 1;
2535 }
2536 #else
2537 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2538 unsigned long end, unsigned int flags,
2539 struct page **pages, int *nr)
2540 {
2541 BUILD_BUG();
2542 return 0;
2543 }
2544
2545 static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
2546 unsigned long end, unsigned int flags,
2547 struct page **pages, int *nr)
2548 {
2549 BUILD_BUG();
2550 return 0;
2551 }
2552 #endif
2553
2554 static int record_subpages(struct page *page, unsigned long addr,
2555 unsigned long end, struct page **pages)
2556 {
2557 int nr;
2558
2559 for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
2560 pages[nr] = nth_page(page, nr);
2561
2562 return nr;
2563 }
2564
2565 #ifdef CONFIG_ARCH_HAS_HUGEPD
2566 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
2567 unsigned long sz)
2568 {
2569 unsigned long __boundary = (addr + sz) & ~(sz-1);
2570 return (__boundary - 1 < end - 1) ? __boundary : end;
2571 }
2572
2573 static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
2574 unsigned long end, unsigned int flags,
2575 struct page **pages, int *nr)
2576 {
2577 unsigned long pte_end;
2578 struct page *page;
2579 struct folio *folio;
2580 pte_t pte;
2581 int refs;
2582
2583 pte_end = (addr + sz) & ~(sz-1);
2584 if (pte_end < end)
2585 end = pte_end;
2586
2587 pte = huge_ptep_get(ptep);
2588
2589 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2590 return 0;
2591
2592
2593 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2594
2595 page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
2596 refs = record_subpages(page, addr, end, pages + *nr);
2597
2598 folio = try_grab_folio(page, refs, flags);
2599 if (!folio)
2600 return 0;
2601
2602 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2603 gup_put_folio(folio, refs, flags);
2604 return 0;
2605 }
2606
2607 if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) {
2608 gup_put_folio(folio, refs, flags);
2609 return 0;
2610 }
2611
2612 *nr += refs;
2613 folio_set_referenced(folio);
2614 return 1;
2615 }
2616
2617 static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2618 unsigned int pdshift, unsigned long end, unsigned int flags,
2619 struct page **pages, int *nr)
2620 {
2621 pte_t *ptep;
2622 unsigned long sz = 1UL << hugepd_shift(hugepd);
2623 unsigned long next;
2624
2625 ptep = hugepte_offset(hugepd, addr, pdshift);
2626 do {
2627 next = hugepte_addr_end(addr, end, sz);
2628 if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
2629 return 0;
2630 } while (ptep++, addr = next, addr != end);
2631
2632 return 1;
2633 }
2634 #else
2635 static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2636 unsigned int pdshift, unsigned long end, unsigned int flags,
2637 struct page **pages, int *nr)
2638 {
2639 return 0;
2640 }
2641 #endif
2642
2643 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2644 unsigned long end, unsigned int flags,
2645 struct page **pages, int *nr)
2646 {
2647 struct page *page;
2648 struct folio *folio;
2649 int refs;
2650
2651 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
2652 return 0;
2653
2654 if (pmd_devmap(orig)) {
2655 if (unlikely(flags & FOLL_LONGTERM))
2656 return 0;
2657 return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
2658 pages, nr);
2659 }
2660
2661 page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
2662 refs = record_subpages(page, addr, end, pages + *nr);
2663
2664 folio = try_grab_folio(page, refs, flags);
2665 if (!folio)
2666 return 0;
2667
2668 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2669 gup_put_folio(folio, refs, flags);
2670 return 0;
2671 }
2672
2673 if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) {
2674 gup_put_folio(folio, refs, flags);
2675 return 0;
2676 }
2677
2678 *nr += refs;
2679 folio_set_referenced(folio);
2680 return 1;
2681 }
2682
2683 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2684 unsigned long end, unsigned int flags,
2685 struct page **pages, int *nr)
2686 {
2687 struct page *page;
2688 struct folio *folio;
2689 int refs;
2690
2691 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
2692 return 0;
2693
2694 if (pud_devmap(orig)) {
2695 if (unlikely(flags & FOLL_LONGTERM))
2696 return 0;
2697 return __gup_device_huge_pud(orig, pudp, addr, end, flags,
2698 pages, nr);
2699 }
2700
2701 page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
2702 refs = record_subpages(page, addr, end, pages + *nr);
2703
2704 folio = try_grab_folio(page, refs, flags);
2705 if (!folio)
2706 return 0;
2707
2708 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2709 gup_put_folio(folio, refs, flags);
2710 return 0;
2711 }
2712
2713 if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) {
2714 gup_put_folio(folio, refs, flags);
2715 return 0;
2716 }
2717
2718 *nr += refs;
2719 folio_set_referenced(folio);
2720 return 1;
2721 }
2722
2723 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
2724 unsigned long end, unsigned int flags,
2725 struct page **pages, int *nr)
2726 {
2727 int refs;
2728 struct page *page;
2729 struct folio *folio;
2730
2731 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
2732 return 0;
2733
2734 BUILD_BUG_ON(pgd_devmap(orig));
2735
2736 page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
2737 refs = record_subpages(page, addr, end, pages + *nr);
2738
2739 folio = try_grab_folio(page, refs, flags);
2740 if (!folio)
2741 return 0;
2742
2743 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
2744 gup_put_folio(folio, refs, flags);
2745 return 0;
2746 }
2747
2748 *nr += refs;
2749 folio_set_referenced(folio);
2750 return 1;
2751 }
2752
2753 static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
2754 unsigned int flags, struct page **pages, int *nr)
2755 {
2756 unsigned long next;
2757 pmd_t *pmdp;
2758
2759 pmdp = pmd_offset_lockless(pudp, pud, addr);
2760 do {
2761 pmd_t pmd = READ_ONCE(*pmdp);
2762
2763 next = pmd_addr_end(addr, end);
2764 if (!pmd_present(pmd))
2765 return 0;
2766
2767 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2768 pmd_devmap(pmd))) {
2769
2770
2771
2772
2773
2774 if (pmd_protnone(pmd))
2775 return 0;
2776
2777 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2778 pages, nr))
2779 return 0;
2780
2781 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2782
2783
2784
2785
2786 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2787 PMD_SHIFT, next, flags, pages, nr))
2788 return 0;
2789 } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
2790 return 0;
2791 } while (pmdp++, addr = next, addr != end);
2792
2793 return 1;
2794 }
2795
2796 static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
2797 unsigned int flags, struct page **pages, int *nr)
2798 {
2799 unsigned long next;
2800 pud_t *pudp;
2801
2802 pudp = pud_offset_lockless(p4dp, p4d, addr);
2803 do {
2804 pud_t pud = READ_ONCE(*pudp);
2805
2806 next = pud_addr_end(addr, end);
2807 if (unlikely(!pud_present(pud)))
2808 return 0;
2809 if (unlikely(pud_huge(pud))) {
2810 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2811 pages, nr))
2812 return 0;
2813 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2814 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2815 PUD_SHIFT, next, flags, pages, nr))
2816 return 0;
2817 } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
2818 return 0;
2819 } while (pudp++, addr = next, addr != end);
2820
2821 return 1;
2822 }
2823
2824 static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
2825 unsigned int flags, struct page **pages, int *nr)
2826 {
2827 unsigned long next;
2828 p4d_t *p4dp;
2829
2830 p4dp = p4d_offset_lockless(pgdp, pgd, addr);
2831 do {
2832 p4d_t p4d = READ_ONCE(*p4dp);
2833
2834 next = p4d_addr_end(addr, end);
2835 if (p4d_none(p4d))
2836 return 0;
2837 BUILD_BUG_ON(p4d_huge(p4d));
2838 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2839 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2840 P4D_SHIFT, next, flags, pages, nr))
2841 return 0;
2842 } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
2843 return 0;
2844 } while (p4dp++, addr = next, addr != end);
2845
2846 return 1;
2847 }
2848
2849 static void gup_pgd_range(unsigned long addr, unsigned long end,
2850 unsigned int flags, struct page **pages, int *nr)
2851 {
2852 unsigned long next;
2853 pgd_t *pgdp;
2854
2855 pgdp = pgd_offset(current->mm, addr);
2856 do {
2857 pgd_t pgd = READ_ONCE(*pgdp);
2858
2859 next = pgd_addr_end(addr, end);
2860 if (pgd_none(pgd))
2861 return;
2862 if (unlikely(pgd_huge(pgd))) {
2863 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2864 pages, nr))
2865 return;
2866 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2867 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2868 PGDIR_SHIFT, next, flags, pages, nr))
2869 return;
2870 } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
2871 return;
2872 } while (pgdp++, addr = next, addr != end);
2873 }
2874 #else
2875 static inline void gup_pgd_range(unsigned long addr, unsigned long end,
2876 unsigned int flags, struct page **pages, int *nr)
2877 {
2878 }
2879 #endif
2880
2881 #ifndef gup_fast_permitted
2882
2883
2884
2885
2886 static bool gup_fast_permitted(unsigned long start, unsigned long end)
2887 {
2888 return true;
2889 }
2890 #endif
2891
2892 static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2893 unsigned int gup_flags, struct page **pages)
2894 {
2895 int ret;
2896
2897
2898
2899
2900
2901 if (gup_flags & FOLL_LONGTERM) {
2902 mmap_read_lock(current->mm);
2903 ret = __gup_longterm_locked(current->mm,
2904 start, nr_pages,
2905 pages, NULL, gup_flags);
2906 mmap_read_unlock(current->mm);
2907 } else {
2908 ret = get_user_pages_unlocked(start, nr_pages,
2909 pages, gup_flags);
2910 }
2911
2912 return ret;
2913 }
2914
2915 static unsigned long lockless_pages_from_mm(unsigned long start,
2916 unsigned long end,
2917 unsigned int gup_flags,
2918 struct page **pages)
2919 {
2920 unsigned long flags;
2921 int nr_pinned = 0;
2922 unsigned seq;
2923
2924 if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
2925 !gup_fast_permitted(start, end))
2926 return 0;
2927
2928 if (gup_flags & FOLL_PIN) {
2929 seq = raw_read_seqcount(¤t->mm->write_protect_seq);
2930 if (seq & 1)
2931 return 0;
2932 }
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945 local_irq_save(flags);
2946 gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
2947 local_irq_restore(flags);
2948
2949
2950
2951
2952
2953 if (gup_flags & FOLL_PIN) {
2954 if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) {
2955 unpin_user_pages_lockless(pages, nr_pinned);
2956 return 0;
2957 } else {
2958 sanity_check_pinned_pages(pages, nr_pinned);
2959 }
2960 }
2961 return nr_pinned;
2962 }
2963
2964 static int internal_get_user_pages_fast(unsigned long start,
2965 unsigned long nr_pages,
2966 unsigned int gup_flags,
2967 struct page **pages)
2968 {
2969 unsigned long len, end;
2970 unsigned long nr_pinned;
2971 int ret;
2972
2973 if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
2974 FOLL_FORCE | FOLL_PIN | FOLL_GET |
2975 FOLL_FAST_ONLY | FOLL_NOFAULT)))
2976 return -EINVAL;
2977
2978 if (gup_flags & FOLL_PIN)
2979 mm_set_has_pinned_flag(¤t->mm->flags);
2980
2981 if (!(gup_flags & FOLL_FAST_ONLY))
2982 might_lock_read(¤t->mm->mmap_lock);
2983
2984 start = untagged_addr(start) & PAGE_MASK;
2985 len = nr_pages << PAGE_SHIFT;
2986 if (check_add_overflow(start, len, &end))
2987 return 0;
2988 if (unlikely(!access_ok((void __user *)start, len)))
2989 return -EFAULT;
2990
2991 nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
2992 if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
2993 return nr_pinned;
2994
2995
2996 start += nr_pinned << PAGE_SHIFT;
2997 pages += nr_pinned;
2998 ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
2999 pages);
3000 if (ret < 0) {
3001
3002
3003
3004
3005 if (nr_pinned)
3006 return nr_pinned;
3007 return ret;
3008 }
3009 return ret + nr_pinned;
3010 }
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032 int get_user_pages_fast_only(unsigned long start, int nr_pages,
3033 unsigned int gup_flags, struct page **pages)
3034 {
3035 int nr_pinned;
3036
3037
3038
3039
3040
3041
3042
3043 gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
3044
3045 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
3046 pages);
3047
3048
3049
3050
3051
3052
3053
3054 if (nr_pinned < 0)
3055 nr_pinned = 0;
3056
3057 return nr_pinned;
3058 }
3059 EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077 int get_user_pages_fast(unsigned long start, int nr_pages,
3078 unsigned int gup_flags, struct page **pages)
3079 {
3080 if (!is_valid_gup_flags(gup_flags))
3081 return -EINVAL;
3082
3083
3084
3085
3086
3087
3088
3089 gup_flags |= FOLL_GET;
3090 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
3091 }
3092 EXPORT_SYMBOL_GPL(get_user_pages_fast);
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110 int pin_user_pages_fast(unsigned long start, int nr_pages,
3111 unsigned int gup_flags, struct page **pages)
3112 {
3113
3114 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3115 return -EINVAL;
3116
3117 if (WARN_ON_ONCE(!pages))
3118 return -EINVAL;
3119
3120 gup_flags |= FOLL_PIN;
3121 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
3122 }
3123 EXPORT_SYMBOL_GPL(pin_user_pages_fast);
3124
3125
3126
3127
3128
3129
3130
3131 int pin_user_pages_fast_only(unsigned long start, int nr_pages,
3132 unsigned int gup_flags, struct page **pages)
3133 {
3134 int nr_pinned;
3135
3136
3137
3138
3139
3140 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3141 return 0;
3142
3143 if (WARN_ON_ONCE(!pages))
3144 return 0;
3145
3146
3147
3148
3149 gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
3150 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
3151 pages);
3152
3153
3154
3155
3156
3157 if (nr_pinned < 0)
3158 nr_pinned = 0;
3159
3160 return nr_pinned;
3161 }
3162 EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186 long pin_user_pages_remote(struct mm_struct *mm,
3187 unsigned long start, unsigned long nr_pages,
3188 unsigned int gup_flags, struct page **pages,
3189 struct vm_area_struct **vmas, int *locked)
3190 {
3191
3192 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3193 return -EINVAL;
3194
3195 if (WARN_ON_ONCE(!pages))
3196 return -EINVAL;
3197
3198 gup_flags |= FOLL_PIN;
3199 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
3200 pages, vmas, locked);
3201 }
3202 EXPORT_SYMBOL(pin_user_pages_remote);
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221 long pin_user_pages(unsigned long start, unsigned long nr_pages,
3222 unsigned int gup_flags, struct page **pages,
3223 struct vm_area_struct **vmas)
3224 {
3225
3226 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3227 return -EINVAL;
3228
3229 if (WARN_ON_ONCE(!pages))
3230 return -EINVAL;
3231
3232 gup_flags |= FOLL_PIN;
3233 return __gup_longterm_locked(current->mm, start, nr_pages,
3234 pages, vmas, gup_flags);
3235 }
3236 EXPORT_SYMBOL(pin_user_pages);
3237
3238
3239
3240
3241
3242
3243 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
3244 struct page **pages, unsigned int gup_flags)
3245 {
3246
3247 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3248 return -EINVAL;
3249
3250 if (WARN_ON_ONCE(!pages))
3251 return -EINVAL;
3252
3253 gup_flags |= FOLL_PIN;
3254 return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
3255 }
3256 EXPORT_SYMBOL(pin_user_pages_unlocked);