Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *  mm/userfaultfd.c
0004  *
0005  *  Copyright (C) 2015  Red Hat, Inc.
0006  */
0007 
0008 #include <linux/mm.h>
0009 #include <linux/sched/signal.h>
0010 #include <linux/pagemap.h>
0011 #include <linux/rmap.h>
0012 #include <linux/swap.h>
0013 #include <linux/swapops.h>
0014 #include <linux/userfaultfd_k.h>
0015 #include <linux/mmu_notifier.h>
0016 #include <linux/hugetlb.h>
0017 #include <linux/shmem_fs.h>
0018 #include <asm/tlbflush.h>
0019 #include <asm/tlb.h>
0020 #include "internal.h"
0021 
0022 static __always_inline
0023 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
0024                     unsigned long dst_start,
0025                     unsigned long len)
0026 {
0027     /*
0028      * Make sure that the dst range is both valid and fully within a
0029      * single existing vma.
0030      */
0031     struct vm_area_struct *dst_vma;
0032 
0033     dst_vma = find_vma(dst_mm, dst_start);
0034     if (!dst_vma)
0035         return NULL;
0036 
0037     if (dst_start < dst_vma->vm_start ||
0038         dst_start + len > dst_vma->vm_end)
0039         return NULL;
0040 
0041     /*
0042      * Check the vma is registered in uffd, this is required to
0043      * enforce the VM_MAYWRITE check done at uffd registration
0044      * time.
0045      */
0046     if (!dst_vma->vm_userfaultfd_ctx.ctx)
0047         return NULL;
0048 
0049     return dst_vma;
0050 }
0051 
0052 /*
0053  * Install PTEs, to map dst_addr (within dst_vma) to page.
0054  *
0055  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
0056  * and anon, and for both shared and private VMAs.
0057  */
0058 int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
0059                  struct vm_area_struct *dst_vma,
0060                  unsigned long dst_addr, struct page *page,
0061                  bool newly_allocated, bool wp_copy)
0062 {
0063     int ret;
0064     pte_t _dst_pte, *dst_pte;
0065     bool writable = dst_vma->vm_flags & VM_WRITE;
0066     bool vm_shared = dst_vma->vm_flags & VM_SHARED;
0067     bool page_in_cache = page->mapping;
0068     spinlock_t *ptl;
0069     struct inode *inode;
0070     pgoff_t offset, max_off;
0071 
0072     _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
0073     _dst_pte = pte_mkdirty(_dst_pte);
0074     if (page_in_cache && !vm_shared)
0075         writable = false;
0076 
0077     /*
0078      * Always mark a PTE as write-protected when needed, regardless of
0079      * VM_WRITE, which the user might change.
0080      */
0081     if (wp_copy) {
0082         _dst_pte = pte_mkuffd_wp(_dst_pte);
0083         writable = false;
0084     }
0085 
0086     if (writable)
0087         _dst_pte = pte_mkwrite(_dst_pte);
0088     else
0089         /*
0090          * We need this to make sure write bit removed; as mk_pte()
0091          * could return a pte with write bit set.
0092          */
0093         _dst_pte = pte_wrprotect(_dst_pte);
0094 
0095     dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
0096 
0097     if (vma_is_shmem(dst_vma)) {
0098         /* serialize against truncate with the page table lock */
0099         inode = dst_vma->vm_file->f_inode;
0100         offset = linear_page_index(dst_vma, dst_addr);
0101         max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
0102         ret = -EFAULT;
0103         if (unlikely(offset >= max_off))
0104             goto out_unlock;
0105     }
0106 
0107     ret = -EEXIST;
0108     /*
0109      * We allow to overwrite a pte marker: consider when both MISSING|WP
0110      * registered, we firstly wr-protect a none pte which has no page cache
0111      * page backing it, then access the page.
0112      */
0113     if (!pte_none_mostly(*dst_pte))
0114         goto out_unlock;
0115 
0116     if (page_in_cache) {
0117         /* Usually, cache pages are already added to LRU */
0118         if (newly_allocated)
0119             lru_cache_add(page);
0120         page_add_file_rmap(page, dst_vma, false);
0121     } else {
0122         page_add_new_anon_rmap(page, dst_vma, dst_addr);
0123         lru_cache_add_inactive_or_unevictable(page, dst_vma);
0124     }
0125 
0126     /*
0127      * Must happen after rmap, as mm_counter() checks mapping (via
0128      * PageAnon()), which is set by __page_set_anon_rmap().
0129      */
0130     inc_mm_counter(dst_mm, mm_counter(page));
0131 
0132     set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
0133 
0134     /* No need to invalidate - it was non-present before */
0135     update_mmu_cache(dst_vma, dst_addr, dst_pte);
0136     ret = 0;
0137 out_unlock:
0138     pte_unmap_unlock(dst_pte, ptl);
0139     return ret;
0140 }
0141 
0142 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
0143                 pmd_t *dst_pmd,
0144                 struct vm_area_struct *dst_vma,
0145                 unsigned long dst_addr,
0146                 unsigned long src_addr,
0147                 struct page **pagep,
0148                 bool wp_copy)
0149 {
0150     void *page_kaddr;
0151     int ret;
0152     struct page *page;
0153 
0154     if (!*pagep) {
0155         ret = -ENOMEM;
0156         page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
0157         if (!page)
0158             goto out;
0159 
0160         page_kaddr = kmap_atomic(page);
0161         ret = copy_from_user(page_kaddr,
0162                      (const void __user *) src_addr,
0163                      PAGE_SIZE);
0164         kunmap_atomic(page_kaddr);
0165 
0166         /* fallback to copy_from_user outside mmap_lock */
0167         if (unlikely(ret)) {
0168             ret = -ENOENT;
0169             *pagep = page;
0170             /* don't free the page */
0171             goto out;
0172         }
0173 
0174         flush_dcache_page(page);
0175     } else {
0176         page = *pagep;
0177         *pagep = NULL;
0178     }
0179 
0180     /*
0181      * The memory barrier inside __SetPageUptodate makes sure that
0182      * preceding stores to the page contents become visible before
0183      * the set_pte_at() write.
0184      */
0185     __SetPageUptodate(page);
0186 
0187     ret = -ENOMEM;
0188     if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
0189         goto out_release;
0190 
0191     ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
0192                        page, true, wp_copy);
0193     if (ret)
0194         goto out_release;
0195 out:
0196     return ret;
0197 out_release:
0198     put_page(page);
0199     goto out;
0200 }
0201 
0202 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
0203                   pmd_t *dst_pmd,
0204                   struct vm_area_struct *dst_vma,
0205                   unsigned long dst_addr)
0206 {
0207     pte_t _dst_pte, *dst_pte;
0208     spinlock_t *ptl;
0209     int ret;
0210     pgoff_t offset, max_off;
0211     struct inode *inode;
0212 
0213     _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
0214                      dst_vma->vm_page_prot));
0215     dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
0216     if (dst_vma->vm_file) {
0217         /* the shmem MAP_PRIVATE case requires checking the i_size */
0218         inode = dst_vma->vm_file->f_inode;
0219         offset = linear_page_index(dst_vma, dst_addr);
0220         max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
0221         ret = -EFAULT;
0222         if (unlikely(offset >= max_off))
0223             goto out_unlock;
0224     }
0225     ret = -EEXIST;
0226     if (!pte_none(*dst_pte))
0227         goto out_unlock;
0228     set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
0229     /* No need to invalidate - it was non-present before */
0230     update_mmu_cache(dst_vma, dst_addr, dst_pte);
0231     ret = 0;
0232 out_unlock:
0233     pte_unmap_unlock(dst_pte, ptl);
0234     return ret;
0235 }
0236 
0237 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
0238 static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
0239                 pmd_t *dst_pmd,
0240                 struct vm_area_struct *dst_vma,
0241                 unsigned long dst_addr,
0242                 bool wp_copy)
0243 {
0244     struct inode *inode = file_inode(dst_vma->vm_file);
0245     pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
0246     struct page *page;
0247     int ret;
0248 
0249     ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
0250     /* Our caller expects us to return -EFAULT if we failed to find page. */
0251     if (ret == -ENOENT)
0252         ret = -EFAULT;
0253     if (ret)
0254         goto out;
0255     if (!page) {
0256         ret = -EFAULT;
0257         goto out;
0258     }
0259 
0260     if (PageHWPoison(page)) {
0261         ret = -EIO;
0262         goto out_release;
0263     }
0264 
0265     ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
0266                        page, false, wp_copy);
0267     if (ret)
0268         goto out_release;
0269 
0270     unlock_page(page);
0271     ret = 0;
0272 out:
0273     return ret;
0274 out_release:
0275     unlock_page(page);
0276     put_page(page);
0277     goto out;
0278 }
0279 
0280 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
0281 {
0282     pgd_t *pgd;
0283     p4d_t *p4d;
0284     pud_t *pud;
0285 
0286     pgd = pgd_offset(mm, address);
0287     p4d = p4d_alloc(mm, pgd, address);
0288     if (!p4d)
0289         return NULL;
0290     pud = pud_alloc(mm, p4d, address);
0291     if (!pud)
0292         return NULL;
0293     /*
0294      * Note that we didn't run this because the pmd was
0295      * missing, the *pmd may be already established and in
0296      * turn it may also be a trans_huge_pmd.
0297      */
0298     return pmd_alloc(mm, pud, address);
0299 }
0300 
0301 #ifdef CONFIG_HUGETLB_PAGE
0302 /*
0303  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
0304  * called with mmap_lock held, it will release mmap_lock before returning.
0305  */
0306 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
0307                           struct vm_area_struct *dst_vma,
0308                           unsigned long dst_start,
0309                           unsigned long src_start,
0310                           unsigned long len,
0311                           enum mcopy_atomic_mode mode,
0312                           bool wp_copy)
0313 {
0314     int vm_shared = dst_vma->vm_flags & VM_SHARED;
0315     ssize_t err;
0316     pte_t *dst_pte;
0317     unsigned long src_addr, dst_addr;
0318     long copied;
0319     struct page *page;
0320     unsigned long vma_hpagesize;
0321     pgoff_t idx;
0322     u32 hash;
0323     struct address_space *mapping;
0324 
0325     /*
0326      * There is no default zero huge page for all huge page sizes as
0327      * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
0328      * by THP.  Since we can not reliably insert a zero page, this
0329      * feature is not supported.
0330      */
0331     if (mode == MCOPY_ATOMIC_ZEROPAGE) {
0332         mmap_read_unlock(dst_mm);
0333         return -EINVAL;
0334     }
0335 
0336     src_addr = src_start;
0337     dst_addr = dst_start;
0338     copied = 0;
0339     page = NULL;
0340     vma_hpagesize = vma_kernel_pagesize(dst_vma);
0341 
0342     /*
0343      * Validate alignment based on huge page size
0344      */
0345     err = -EINVAL;
0346     if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
0347         goto out_unlock;
0348 
0349 retry:
0350     /*
0351      * On routine entry dst_vma is set.  If we had to drop mmap_lock and
0352      * retry, dst_vma will be set to NULL and we must lookup again.
0353      */
0354     if (!dst_vma) {
0355         err = -ENOENT;
0356         dst_vma = find_dst_vma(dst_mm, dst_start, len);
0357         if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
0358             goto out_unlock;
0359 
0360         err = -EINVAL;
0361         if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
0362             goto out_unlock;
0363 
0364         vm_shared = dst_vma->vm_flags & VM_SHARED;
0365     }
0366 
0367     /*
0368      * If not shared, ensure the dst_vma has a anon_vma.
0369      */
0370     err = -ENOMEM;
0371     if (!vm_shared) {
0372         if (unlikely(anon_vma_prepare(dst_vma)))
0373             goto out_unlock;
0374     }
0375 
0376     while (src_addr < src_start + len) {
0377         BUG_ON(dst_addr >= dst_start + len);
0378 
0379         /*
0380          * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
0381          * i_mmap_rwsem ensures the dst_pte remains valid even
0382          * in the case of shared pmds.  fault mutex prevents
0383          * races with other faulting threads.
0384          */
0385         mapping = dst_vma->vm_file->f_mapping;
0386         i_mmap_lock_read(mapping);
0387         idx = linear_page_index(dst_vma, dst_addr);
0388         hash = hugetlb_fault_mutex_hash(mapping, idx);
0389         mutex_lock(&hugetlb_fault_mutex_table[hash]);
0390 
0391         err = -ENOMEM;
0392         dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
0393         if (!dst_pte) {
0394             mutex_unlock(&hugetlb_fault_mutex_table[hash]);
0395             i_mmap_unlock_read(mapping);
0396             goto out_unlock;
0397         }
0398 
0399         if (mode != MCOPY_ATOMIC_CONTINUE &&
0400             !huge_pte_none_mostly(huge_ptep_get(dst_pte))) {
0401             err = -EEXIST;
0402             mutex_unlock(&hugetlb_fault_mutex_table[hash]);
0403             i_mmap_unlock_read(mapping);
0404             goto out_unlock;
0405         }
0406 
0407         err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
0408                            dst_addr, src_addr, mode, &page,
0409                            wp_copy);
0410 
0411         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
0412         i_mmap_unlock_read(mapping);
0413 
0414         cond_resched();
0415 
0416         if (unlikely(err == -ENOENT)) {
0417             mmap_read_unlock(dst_mm);
0418             BUG_ON(!page);
0419 
0420             err = copy_huge_page_from_user(page,
0421                         (const void __user *)src_addr,
0422                         vma_hpagesize / PAGE_SIZE,
0423                         true);
0424             if (unlikely(err)) {
0425                 err = -EFAULT;
0426                 goto out;
0427             }
0428             mmap_read_lock(dst_mm);
0429 
0430             dst_vma = NULL;
0431             goto retry;
0432         } else
0433             BUG_ON(page);
0434 
0435         if (!err) {
0436             dst_addr += vma_hpagesize;
0437             src_addr += vma_hpagesize;
0438             copied += vma_hpagesize;
0439 
0440             if (fatal_signal_pending(current))
0441                 err = -EINTR;
0442         }
0443         if (err)
0444             break;
0445     }
0446 
0447 out_unlock:
0448     mmap_read_unlock(dst_mm);
0449 out:
0450     if (page)
0451         put_page(page);
0452     BUG_ON(copied < 0);
0453     BUG_ON(err > 0);
0454     BUG_ON(!copied && !err);
0455     return copied ? copied : err;
0456 }
0457 #else /* !CONFIG_HUGETLB_PAGE */
0458 /* fail at build time if gcc attempts to use this */
0459 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
0460                       struct vm_area_struct *dst_vma,
0461                       unsigned long dst_start,
0462                       unsigned long src_start,
0463                       unsigned long len,
0464                       enum mcopy_atomic_mode mode,
0465                       bool wp_copy);
0466 #endif /* CONFIG_HUGETLB_PAGE */
0467 
0468 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
0469                         pmd_t *dst_pmd,
0470                         struct vm_area_struct *dst_vma,
0471                         unsigned long dst_addr,
0472                         unsigned long src_addr,
0473                         struct page **page,
0474                         enum mcopy_atomic_mode mode,
0475                         bool wp_copy)
0476 {
0477     ssize_t err;
0478 
0479     if (mode == MCOPY_ATOMIC_CONTINUE) {
0480         return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
0481                         wp_copy);
0482     }
0483 
0484     /*
0485      * The normal page fault path for a shmem will invoke the
0486      * fault, fill the hole in the file and COW it right away. The
0487      * result generates plain anonymous memory. So when we are
0488      * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
0489      * generate anonymous memory directly without actually filling
0490      * the hole. For the MAP_PRIVATE case the robustness check
0491      * only happens in the pagetable (to verify it's still none)
0492      * and not in the radix tree.
0493      */
0494     if (!(dst_vma->vm_flags & VM_SHARED)) {
0495         if (mode == MCOPY_ATOMIC_NORMAL)
0496             err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
0497                            dst_addr, src_addr, page,
0498                            wp_copy);
0499         else
0500             err = mfill_zeropage_pte(dst_mm, dst_pmd,
0501                          dst_vma, dst_addr);
0502     } else {
0503         err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
0504                          dst_addr, src_addr,
0505                          mode != MCOPY_ATOMIC_NORMAL,
0506                          wp_copy, page);
0507     }
0508 
0509     return err;
0510 }
0511 
0512 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
0513                           unsigned long dst_start,
0514                           unsigned long src_start,
0515                           unsigned long len,
0516                           enum mcopy_atomic_mode mcopy_mode,
0517                           atomic_t *mmap_changing,
0518                           __u64 mode)
0519 {
0520     struct vm_area_struct *dst_vma;
0521     ssize_t err;
0522     pmd_t *dst_pmd;
0523     unsigned long src_addr, dst_addr;
0524     long copied;
0525     struct page *page;
0526     bool wp_copy;
0527 
0528     /*
0529      * Sanitize the command parameters:
0530      */
0531     BUG_ON(dst_start & ~PAGE_MASK);
0532     BUG_ON(len & ~PAGE_MASK);
0533 
0534     /* Does the address range wrap, or is the span zero-sized? */
0535     BUG_ON(src_start + len <= src_start);
0536     BUG_ON(dst_start + len <= dst_start);
0537 
0538     src_addr = src_start;
0539     dst_addr = dst_start;
0540     copied = 0;
0541     page = NULL;
0542 retry:
0543     mmap_read_lock(dst_mm);
0544 
0545     /*
0546      * If memory mappings are changing because of non-cooperative
0547      * operation (e.g. mremap) running in parallel, bail out and
0548      * request the user to retry later
0549      */
0550     err = -EAGAIN;
0551     if (mmap_changing && atomic_read(mmap_changing))
0552         goto out_unlock;
0553 
0554     /*
0555      * Make sure the vma is not shared, that the dst range is
0556      * both valid and fully within a single existing vma.
0557      */
0558     err = -ENOENT;
0559     dst_vma = find_dst_vma(dst_mm, dst_start, len);
0560     if (!dst_vma)
0561         goto out_unlock;
0562 
0563     err = -EINVAL;
0564     /*
0565      * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
0566      * it will overwrite vm_ops, so vma_is_anonymous must return false.
0567      */
0568     if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
0569         dst_vma->vm_flags & VM_SHARED))
0570         goto out_unlock;
0571 
0572     /*
0573      * validate 'mode' now that we know the dst_vma: don't allow
0574      * a wrprotect copy if the userfaultfd didn't register as WP.
0575      */
0576     wp_copy = mode & UFFDIO_COPY_MODE_WP;
0577     if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
0578         goto out_unlock;
0579 
0580     /*
0581      * If this is a HUGETLB vma, pass off to appropriate routine
0582      */
0583     if (is_vm_hugetlb_page(dst_vma))
0584         return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
0585                            src_start, len, mcopy_mode,
0586                            wp_copy);
0587 
0588     if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
0589         goto out_unlock;
0590     if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
0591         goto out_unlock;
0592 
0593     /*
0594      * Ensure the dst_vma has a anon_vma or this page
0595      * would get a NULL anon_vma when moved in the
0596      * dst_vma.
0597      */
0598     err = -ENOMEM;
0599     if (!(dst_vma->vm_flags & VM_SHARED) &&
0600         unlikely(anon_vma_prepare(dst_vma)))
0601         goto out_unlock;
0602 
0603     while (src_addr < src_start + len) {
0604         pmd_t dst_pmdval;
0605 
0606         BUG_ON(dst_addr >= dst_start + len);
0607 
0608         dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
0609         if (unlikely(!dst_pmd)) {
0610             err = -ENOMEM;
0611             break;
0612         }
0613 
0614         dst_pmdval = pmd_read_atomic(dst_pmd);
0615         /*
0616          * If the dst_pmd is mapped as THP don't
0617          * override it and just be strict.
0618          */
0619         if (unlikely(pmd_trans_huge(dst_pmdval))) {
0620             err = -EEXIST;
0621             break;
0622         }
0623         if (unlikely(pmd_none(dst_pmdval)) &&
0624             unlikely(__pte_alloc(dst_mm, dst_pmd))) {
0625             err = -ENOMEM;
0626             break;
0627         }
0628         /* If an huge pmd materialized from under us fail */
0629         if (unlikely(pmd_trans_huge(*dst_pmd))) {
0630             err = -EFAULT;
0631             break;
0632         }
0633 
0634         BUG_ON(pmd_none(*dst_pmd));
0635         BUG_ON(pmd_trans_huge(*dst_pmd));
0636 
0637         err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
0638                        src_addr, &page, mcopy_mode, wp_copy);
0639         cond_resched();
0640 
0641         if (unlikely(err == -ENOENT)) {
0642             void *page_kaddr;
0643 
0644             mmap_read_unlock(dst_mm);
0645             BUG_ON(!page);
0646 
0647             page_kaddr = kmap(page);
0648             err = copy_from_user(page_kaddr,
0649                          (const void __user *) src_addr,
0650                          PAGE_SIZE);
0651             kunmap(page);
0652             if (unlikely(err)) {
0653                 err = -EFAULT;
0654                 goto out;
0655             }
0656             flush_dcache_page(page);
0657             goto retry;
0658         } else
0659             BUG_ON(page);
0660 
0661         if (!err) {
0662             dst_addr += PAGE_SIZE;
0663             src_addr += PAGE_SIZE;
0664             copied += PAGE_SIZE;
0665 
0666             if (fatal_signal_pending(current))
0667                 err = -EINTR;
0668         }
0669         if (err)
0670             break;
0671     }
0672 
0673 out_unlock:
0674     mmap_read_unlock(dst_mm);
0675 out:
0676     if (page)
0677         put_page(page);
0678     BUG_ON(copied < 0);
0679     BUG_ON(err > 0);
0680     BUG_ON(!copied && !err);
0681     return copied ? copied : err;
0682 }
0683 
0684 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
0685              unsigned long src_start, unsigned long len,
0686              atomic_t *mmap_changing, __u64 mode)
0687 {
0688     return __mcopy_atomic(dst_mm, dst_start, src_start, len,
0689                   MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
0690 }
0691 
0692 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
0693                unsigned long len, atomic_t *mmap_changing)
0694 {
0695     return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
0696                   mmap_changing, 0);
0697 }
0698 
0699 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
0700                unsigned long len, atomic_t *mmap_changing)
0701 {
0702     return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
0703                   mmap_changing, 0);
0704 }
0705 
0706 void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
0707            unsigned long start, unsigned long len, bool enable_wp)
0708 {
0709     struct mmu_gather tlb;
0710     pgprot_t newprot;
0711 
0712     if (enable_wp)
0713         newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
0714     else
0715         newprot = vm_get_page_prot(dst_vma->vm_flags);
0716 
0717     tlb_gather_mmu(&tlb, dst_mm);
0718     change_protection(&tlb, dst_vma, start, start + len, newprot,
0719               enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
0720     tlb_finish_mmu(&tlb);
0721 }
0722 
0723 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
0724             unsigned long len, bool enable_wp,
0725             atomic_t *mmap_changing)
0726 {
0727     struct vm_area_struct *dst_vma;
0728     unsigned long page_mask;
0729     int err;
0730 
0731     /*
0732      * Sanitize the command parameters:
0733      */
0734     BUG_ON(start & ~PAGE_MASK);
0735     BUG_ON(len & ~PAGE_MASK);
0736 
0737     /* Does the address range wrap, or is the span zero-sized? */
0738     BUG_ON(start + len <= start);
0739 
0740     mmap_read_lock(dst_mm);
0741 
0742     /*
0743      * If memory mappings are changing because of non-cooperative
0744      * operation (e.g. mremap) running in parallel, bail out and
0745      * request the user to retry later
0746      */
0747     err = -EAGAIN;
0748     if (mmap_changing && atomic_read(mmap_changing))
0749         goto out_unlock;
0750 
0751     err = -ENOENT;
0752     dst_vma = find_dst_vma(dst_mm, start, len);
0753 
0754     if (!dst_vma)
0755         goto out_unlock;
0756     if (!userfaultfd_wp(dst_vma))
0757         goto out_unlock;
0758     if (!vma_can_userfault(dst_vma, dst_vma->vm_flags))
0759         goto out_unlock;
0760 
0761     if (is_vm_hugetlb_page(dst_vma)) {
0762         err = -EINVAL;
0763         page_mask = vma_kernel_pagesize(dst_vma) - 1;
0764         if ((start & page_mask) || (len & page_mask))
0765             goto out_unlock;
0766     }
0767 
0768     uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
0769 
0770     err = 0;
0771 out_unlock:
0772     mmap_read_unlock(dst_mm);
0773     return err;
0774 }