Back to home page

LXR

 
 

    


0001 /*
0002  *  linux/mm/mincore.c
0003  *
0004  * Copyright (C) 1994-2006  Linus Torvalds
0005  */
0006 
0007 /*
0008  * The mincore() system call.
0009  */
0010 #include <linux/pagemap.h>
0011 #include <linux/gfp.h>
0012 #include <linux/mm.h>
0013 #include <linux/mman.h>
0014 #include <linux/syscalls.h>
0015 #include <linux/swap.h>
0016 #include <linux/swapops.h>
0017 #include <linux/hugetlb.h>
0018 
0019 #include <linux/uaccess.h>
0020 #include <asm/pgtable.h>
0021 
0022 static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
0023             unsigned long end, struct mm_walk *walk)
0024 {
0025 #ifdef CONFIG_HUGETLB_PAGE
0026     unsigned char present;
0027     unsigned char *vec = walk->private;
0028 
0029     /*
0030      * Hugepages under user process are always in RAM and never
0031      * swapped out, but theoretically it needs to be checked.
0032      */
0033     present = pte && !huge_pte_none(huge_ptep_get(pte));
0034     for (; addr != end; vec++, addr += PAGE_SIZE)
0035         *vec = present;
0036     walk->private = vec;
0037 #else
0038     BUG();
0039 #endif
0040     return 0;
0041 }
0042 
0043 /*
0044  * Later we can get more picky about what "in core" means precisely.
0045  * For now, simply check to see if the page is in the page cache,
0046  * and is up to date; i.e. that no page-in operation would be required
0047  * at this time if an application were to map and access this page.
0048  */
0049 static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
0050 {
0051     unsigned char present = 0;
0052     struct page *page;
0053 
0054     /*
0055      * When tmpfs swaps out a page from a file, any process mapping that
0056      * file will not get a swp_entry_t in its pte, but rather it is like
0057      * any other file mapping (ie. marked !present and faulted in with
0058      * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
0059      */
0060 #ifdef CONFIG_SWAP
0061     if (shmem_mapping(mapping)) {
0062         page = find_get_entry(mapping, pgoff);
0063         /*
0064          * shmem/tmpfs may return swap: account for swapcache
0065          * page too.
0066          */
0067         if (radix_tree_exceptional_entry(page)) {
0068             swp_entry_t swp = radix_to_swp_entry(page);
0069             page = find_get_page(swap_address_space(swp),
0070                          swp_offset(swp));
0071         }
0072     } else
0073         page = find_get_page(mapping, pgoff);
0074 #else
0075     page = find_get_page(mapping, pgoff);
0076 #endif
0077     if (page) {
0078         present = PageUptodate(page);
0079         put_page(page);
0080     }
0081 
0082     return present;
0083 }
0084 
0085 static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
0086                 struct vm_area_struct *vma, unsigned char *vec)
0087 {
0088     unsigned long nr = (end - addr) >> PAGE_SHIFT;
0089     int i;
0090 
0091     if (vma->vm_file) {
0092         pgoff_t pgoff;
0093 
0094         pgoff = linear_page_index(vma, addr);
0095         for (i = 0; i < nr; i++, pgoff++)
0096             vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
0097     } else {
0098         for (i = 0; i < nr; i++)
0099             vec[i] = 0;
0100     }
0101     return nr;
0102 }
0103 
0104 static int mincore_unmapped_range(unsigned long addr, unsigned long end,
0105                    struct mm_walk *walk)
0106 {
0107     walk->private += __mincore_unmapped_range(addr, end,
0108                           walk->vma, walk->private);
0109     return 0;
0110 }
0111 
0112 static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0113             struct mm_walk *walk)
0114 {
0115     spinlock_t *ptl;
0116     struct vm_area_struct *vma = walk->vma;
0117     pte_t *ptep;
0118     unsigned char *vec = walk->private;
0119     int nr = (end - addr) >> PAGE_SHIFT;
0120 
0121     ptl = pmd_trans_huge_lock(pmd, vma);
0122     if (ptl) {
0123         memset(vec, 1, nr);
0124         spin_unlock(ptl);
0125         goto out;
0126     }
0127 
0128     if (pmd_trans_unstable(pmd)) {
0129         __mincore_unmapped_range(addr, end, vma, vec);
0130         goto out;
0131     }
0132 
0133     ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
0134     for (; addr != end; ptep++, addr += PAGE_SIZE) {
0135         pte_t pte = *ptep;
0136 
0137         if (pte_none(pte))
0138             __mincore_unmapped_range(addr, addr + PAGE_SIZE,
0139                          vma, vec);
0140         else if (pte_present(pte))
0141             *vec = 1;
0142         else { /* pte is a swap entry */
0143             swp_entry_t entry = pte_to_swp_entry(pte);
0144 
0145             if (non_swap_entry(entry)) {
0146                 /*
0147                  * migration or hwpoison entries are always
0148                  * uptodate
0149                  */
0150                 *vec = 1;
0151             } else {
0152 #ifdef CONFIG_SWAP
0153                 *vec = mincore_page(swap_address_space(entry),
0154                             swp_offset(entry));
0155 #else
0156                 WARN_ON(1);
0157                 *vec = 1;
0158 #endif
0159             }
0160         }
0161         vec++;
0162     }
0163     pte_unmap_unlock(ptep - 1, ptl);
0164 out:
0165     walk->private += nr;
0166     cond_resched();
0167     return 0;
0168 }
0169 
0170 /*
0171  * Do a chunk of "sys_mincore()". We've already checked
0172  * all the arguments, we hold the mmap semaphore: we should
0173  * just return the amount of info we're asked for.
0174  */
0175 static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
0176 {
0177     struct vm_area_struct *vma;
0178     unsigned long end;
0179     int err;
0180     struct mm_walk mincore_walk = {
0181         .pmd_entry = mincore_pte_range,
0182         .pte_hole = mincore_unmapped_range,
0183         .hugetlb_entry = mincore_hugetlb,
0184         .private = vec,
0185     };
0186 
0187     vma = find_vma(current->mm, addr);
0188     if (!vma || addr < vma->vm_start)
0189         return -ENOMEM;
0190     mincore_walk.mm = vma->vm_mm;
0191     end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
0192     err = walk_page_range(addr, end, &mincore_walk);
0193     if (err < 0)
0194         return err;
0195     return (end - addr) >> PAGE_SHIFT;
0196 }
0197 
0198 /*
0199  * The mincore(2) system call.
0200  *
0201  * mincore() returns the memory residency status of the pages in the
0202  * current process's address space specified by [addr, addr + len).
0203  * The status is returned in a vector of bytes.  The least significant
0204  * bit of each byte is 1 if the referenced page is in memory, otherwise
0205  * it is zero.
0206  *
0207  * Because the status of a page can change after mincore() checks it
0208  * but before it returns to the application, the returned vector may
0209  * contain stale information.  Only locked pages are guaranteed to
0210  * remain in memory.
0211  *
0212  * return values:
0213  *  zero    - success
0214  *  -EFAULT - vec points to an illegal address
0215  *  -EINVAL - addr is not a multiple of PAGE_SIZE
0216  *  -ENOMEM - Addresses in the range [addr, addr + len] are
0217  *      invalid for the address space of this process, or
0218  *      specify one or more pages which are not currently
0219  *      mapped
0220  *  -EAGAIN - A kernel resource was temporarily unavailable.
0221  */
0222 SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
0223         unsigned char __user *, vec)
0224 {
0225     long retval;
0226     unsigned long pages;
0227     unsigned char *tmp;
0228 
0229     /* Check the start address: needs to be page-aligned.. */
0230     if (start & ~PAGE_MASK)
0231         return -EINVAL;
0232 
0233     /* ..and we need to be passed a valid user-space range */
0234     if (!access_ok(VERIFY_READ, (void __user *) start, len))
0235         return -ENOMEM;
0236 
0237     /* This also avoids any overflows on PAGE_ALIGN */
0238     pages = len >> PAGE_SHIFT;
0239     pages += (offset_in_page(len)) != 0;
0240 
0241     if (!access_ok(VERIFY_WRITE, vec, pages))
0242         return -EFAULT;
0243 
0244     tmp = (void *) __get_free_page(GFP_USER);
0245     if (!tmp)
0246         return -EAGAIN;
0247 
0248     retval = 0;
0249     while (pages) {
0250         /*
0251          * Do at most PAGE_SIZE entries per iteration, due to
0252          * the temporary buffer size.
0253          */
0254         down_read(&current->mm->mmap_sem);
0255         retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
0256         up_read(&current->mm->mmap_sem);
0257 
0258         if (retval <= 0)
0259             break;
0260         if (copy_to_user(vec, tmp, retval)) {
0261             retval = -EFAULT;
0262             break;
0263         }
0264         pages -= retval;
0265         vec += retval;
0266         start += retval << PAGE_SHIFT;
0267         retval = 0;
0268     }
0269     free_page((unsigned long) tmp);
0270     return retval;
0271 }