0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/pagemap.h>
0012 #include <linux/gfp.h>
0013 #include <linux/pagewalk.h>
0014 #include <linux/mman.h>
0015 #include <linux/syscalls.h>
0016 #include <linux/swap.h>
0017 #include <linux/swapops.h>
0018 #include <linux/shmem_fs.h>
0019 #include <linux/hugetlb.h>
0020 #include <linux/pgtable.h>
0021
0022 #include <linux/uaccess.h>
0023 #include "swap.h"
0024
0025 static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
0026 unsigned long end, struct mm_walk *walk)
0027 {
0028 #ifdef CONFIG_HUGETLB_PAGE
0029 unsigned char present;
0030 unsigned char *vec = walk->private;
0031
0032
0033
0034
0035
0036 present = pte && !huge_pte_none(huge_ptep_get(pte));
0037 for (; addr != end; vec++, addr += PAGE_SIZE)
0038 *vec = present;
0039 walk->private = vec;
0040 #else
0041 BUG();
0042 #endif
0043 return 0;
0044 }
0045
0046
0047
0048
0049
0050
0051
0052 static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
0053 {
0054 unsigned char present = 0;
0055 struct page *page;
0056
0057
0058
0059
0060
0061
0062
0063 page = find_get_incore_page(mapping, index);
0064 if (page) {
0065 present = PageUptodate(page);
0066 put_page(page);
0067 }
0068
0069 return present;
0070 }
0071
0072 static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
0073 struct vm_area_struct *vma, unsigned char *vec)
0074 {
0075 unsigned long nr = (end - addr) >> PAGE_SHIFT;
0076 int i;
0077
0078 if (vma->vm_file) {
0079 pgoff_t pgoff;
0080
0081 pgoff = linear_page_index(vma, addr);
0082 for (i = 0; i < nr; i++, pgoff++)
0083 vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
0084 } else {
0085 for (i = 0; i < nr; i++)
0086 vec[i] = 0;
0087 }
0088 return nr;
0089 }
0090
0091 static int mincore_unmapped_range(unsigned long addr, unsigned long end,
0092 __always_unused int depth,
0093 struct mm_walk *walk)
0094 {
0095 walk->private += __mincore_unmapped_range(addr, end,
0096 walk->vma, walk->private);
0097 return 0;
0098 }
0099
0100 static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0101 struct mm_walk *walk)
0102 {
0103 spinlock_t *ptl;
0104 struct vm_area_struct *vma = walk->vma;
0105 pte_t *ptep;
0106 unsigned char *vec = walk->private;
0107 int nr = (end - addr) >> PAGE_SHIFT;
0108
0109 ptl = pmd_trans_huge_lock(pmd, vma);
0110 if (ptl) {
0111 memset(vec, 1, nr);
0112 spin_unlock(ptl);
0113 goto out;
0114 }
0115
0116 if (pmd_trans_unstable(pmd)) {
0117 __mincore_unmapped_range(addr, end, vma, vec);
0118 goto out;
0119 }
0120
0121 ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
0122 for (; addr != end; ptep++, addr += PAGE_SIZE) {
0123 pte_t pte = *ptep;
0124
0125
0126 if (pte_none_mostly(pte))
0127 __mincore_unmapped_range(addr, addr + PAGE_SIZE,
0128 vma, vec);
0129 else if (pte_present(pte))
0130 *vec = 1;
0131 else {
0132 swp_entry_t entry = pte_to_swp_entry(pte);
0133
0134 if (non_swap_entry(entry)) {
0135
0136
0137
0138
0139 *vec = 1;
0140 } else {
0141 #ifdef CONFIG_SWAP
0142 *vec = mincore_page(swap_address_space(entry),
0143 swp_offset(entry));
0144 #else
0145 WARN_ON(1);
0146 *vec = 1;
0147 #endif
0148 }
0149 }
0150 vec++;
0151 }
0152 pte_unmap_unlock(ptep - 1, ptl);
0153 out:
0154 walk->private += nr;
0155 cond_resched();
0156 return 0;
0157 }
0158
0159 static inline bool can_do_mincore(struct vm_area_struct *vma)
0160 {
0161 if (vma_is_anonymous(vma))
0162 return true;
0163 if (!vma->vm_file)
0164 return false;
0165
0166
0167
0168
0169
0170
0171 return inode_owner_or_capable(&init_user_ns,
0172 file_inode(vma->vm_file)) ||
0173 file_permission(vma->vm_file, MAY_WRITE) == 0;
0174 }
0175
0176 static const struct mm_walk_ops mincore_walk_ops = {
0177 .pmd_entry = mincore_pte_range,
0178 .pte_hole = mincore_unmapped_range,
0179 .hugetlb_entry = mincore_hugetlb,
0180 };
0181
0182
0183
0184
0185
0186
0187 static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
0188 {
0189 struct vm_area_struct *vma;
0190 unsigned long end;
0191 int err;
0192
0193 vma = find_vma(current->mm, addr);
0194 if (!vma || addr < vma->vm_start)
0195 return -ENOMEM;
0196 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
0197 if (!can_do_mincore(vma)) {
0198 unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
0199 memset(vec, 1, pages);
0200 return pages;
0201 }
0202 err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
0203 if (err < 0)
0204 return err;
0205 return (end - addr) >> PAGE_SHIFT;
0206 }
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232 SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
0233 unsigned char __user *, vec)
0234 {
0235 long retval;
0236 unsigned long pages;
0237 unsigned char *tmp;
0238
0239 start = untagged_addr(start);
0240
0241
0242 if (start & ~PAGE_MASK)
0243 return -EINVAL;
0244
0245
0246 if (!access_ok((void __user *) start, len))
0247 return -ENOMEM;
0248
0249
0250 pages = len >> PAGE_SHIFT;
0251 pages += (offset_in_page(len)) != 0;
0252
0253 if (!access_ok(vec, pages))
0254 return -EFAULT;
0255
0256 tmp = (void *) __get_free_page(GFP_USER);
0257 if (!tmp)
0258 return -EAGAIN;
0259
0260 retval = 0;
0261 while (pages) {
0262
0263
0264
0265
0266 mmap_read_lock(current->mm);
0267 retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
0268 mmap_read_unlock(current->mm);
0269
0270 if (retval <= 0)
0271 break;
0272 if (copy_to_user(vec, tmp, retval)) {
0273 retval = -EFAULT;
0274 break;
0275 }
0276 pages -= retval;
0277 vec += retval;
0278 start += retval << PAGE_SHIFT;
0279 retval = 0;
0280 }
0281 free_page((unsigned long) tmp);
0282 return retval;
0283 }