0001
0002 #include <linux/pagewalk.h>
0003 #include <linux/highmem.h>
0004 #include <linux/sched.h>
0005 #include <linux/hugetlb.h>
0006
0007
0008
0009
0010
0011
0012 static int real_depth(int depth)
0013 {
0014 if (depth == 3 && PTRS_PER_PMD == 1)
0015 depth = 2;
0016 if (depth == 2 && PTRS_PER_PUD == 1)
0017 depth = 1;
0018 if (depth == 1 && PTRS_PER_P4D == 1)
0019 depth = 0;
0020 return depth;
0021 }
0022
0023 static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
0024 unsigned long end, struct mm_walk *walk)
0025 {
0026 const struct mm_walk_ops *ops = walk->ops;
0027 int err = 0;
0028
0029 for (;;) {
0030 err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
0031 if (err)
0032 break;
0033 if (addr >= end - PAGE_SIZE)
0034 break;
0035 addr += PAGE_SIZE;
0036 pte++;
0037 }
0038 return err;
0039 }
0040
0041 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0042 struct mm_walk *walk)
0043 {
0044 pte_t *pte;
0045 int err = 0;
0046 spinlock_t *ptl;
0047
0048 if (walk->no_vma) {
0049 pte = pte_offset_map(pmd, addr);
0050 err = walk_pte_range_inner(pte, addr, end, walk);
0051 pte_unmap(pte);
0052 } else {
0053 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
0054 err = walk_pte_range_inner(pte, addr, end, walk);
0055 pte_unmap_unlock(pte, ptl);
0056 }
0057
0058 return err;
0059 }
0060
0061 #ifdef CONFIG_ARCH_HAS_HUGEPD
0062 static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
0063 unsigned long end, struct mm_walk *walk, int pdshift)
0064 {
0065 int err = 0;
0066 const struct mm_walk_ops *ops = walk->ops;
0067 int shift = hugepd_shift(*phpd);
0068 int page_size = 1 << shift;
0069
0070 if (!ops->pte_entry)
0071 return 0;
0072
0073 if (addr & (page_size - 1))
0074 return 0;
0075
0076 for (;;) {
0077 pte_t *pte;
0078
0079 spin_lock(&walk->mm->page_table_lock);
0080 pte = hugepte_offset(*phpd, addr, pdshift);
0081 err = ops->pte_entry(pte, addr, addr + page_size, walk);
0082 spin_unlock(&walk->mm->page_table_lock);
0083
0084 if (err)
0085 break;
0086 if (addr >= end - page_size)
0087 break;
0088 addr += page_size;
0089 }
0090 return err;
0091 }
0092 #else
0093 static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
0094 unsigned long end, struct mm_walk *walk, int pdshift)
0095 {
0096 return 0;
0097 }
0098 #endif
0099
0100 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
0101 struct mm_walk *walk)
0102 {
0103 pmd_t *pmd;
0104 unsigned long next;
0105 const struct mm_walk_ops *ops = walk->ops;
0106 int err = 0;
0107 int depth = real_depth(3);
0108
0109 pmd = pmd_offset(pud, addr);
0110 do {
0111 again:
0112 next = pmd_addr_end(addr, end);
0113 if (pmd_none(*pmd)) {
0114 if (ops->pte_hole)
0115 err = ops->pte_hole(addr, next, depth, walk);
0116 if (err)
0117 break;
0118 continue;
0119 }
0120
0121 walk->action = ACTION_SUBTREE;
0122
0123
0124
0125
0126
0127 if (ops->pmd_entry)
0128 err = ops->pmd_entry(pmd, addr, next, walk);
0129 if (err)
0130 break;
0131
0132 if (walk->action == ACTION_AGAIN)
0133 goto again;
0134
0135
0136
0137
0138
0139 if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
0140 walk->action == ACTION_CONTINUE ||
0141 !(ops->pte_entry))
0142 continue;
0143
0144 if (walk->vma) {
0145 split_huge_pmd(walk->vma, pmd, addr);
0146 if (pmd_trans_unstable(pmd))
0147 goto again;
0148 }
0149
0150 if (is_hugepd(__hugepd(pmd_val(*pmd))))
0151 err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
0152 else
0153 err = walk_pte_range(pmd, addr, next, walk);
0154 if (err)
0155 break;
0156 } while (pmd++, addr = next, addr != end);
0157
0158 return err;
0159 }
0160
0161 static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
0162 struct mm_walk *walk)
0163 {
0164 pud_t *pud;
0165 unsigned long next;
0166 const struct mm_walk_ops *ops = walk->ops;
0167 int err = 0;
0168 int depth = real_depth(2);
0169
0170 pud = pud_offset(p4d, addr);
0171 do {
0172 again:
0173 next = pud_addr_end(addr, end);
0174 if (pud_none(*pud)) {
0175 if (ops->pte_hole)
0176 err = ops->pte_hole(addr, next, depth, walk);
0177 if (err)
0178 break;
0179 continue;
0180 }
0181
0182 walk->action = ACTION_SUBTREE;
0183
0184 if (ops->pud_entry)
0185 err = ops->pud_entry(pud, addr, next, walk);
0186 if (err)
0187 break;
0188
0189 if (walk->action == ACTION_AGAIN)
0190 goto again;
0191
0192 if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
0193 walk->action == ACTION_CONTINUE ||
0194 !(ops->pmd_entry || ops->pte_entry))
0195 continue;
0196
0197 if (walk->vma)
0198 split_huge_pud(walk->vma, pud, addr);
0199 if (pud_none(*pud))
0200 goto again;
0201
0202 if (is_hugepd(__hugepd(pud_val(*pud))))
0203 err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
0204 else
0205 err = walk_pmd_range(pud, addr, next, walk);
0206 if (err)
0207 break;
0208 } while (pud++, addr = next, addr != end);
0209
0210 return err;
0211 }
0212
0213 static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
0214 struct mm_walk *walk)
0215 {
0216 p4d_t *p4d;
0217 unsigned long next;
0218 const struct mm_walk_ops *ops = walk->ops;
0219 int err = 0;
0220 int depth = real_depth(1);
0221
0222 p4d = p4d_offset(pgd, addr);
0223 do {
0224 next = p4d_addr_end(addr, end);
0225 if (p4d_none_or_clear_bad(p4d)) {
0226 if (ops->pte_hole)
0227 err = ops->pte_hole(addr, next, depth, walk);
0228 if (err)
0229 break;
0230 continue;
0231 }
0232 if (ops->p4d_entry) {
0233 err = ops->p4d_entry(p4d, addr, next, walk);
0234 if (err)
0235 break;
0236 }
0237 if (is_hugepd(__hugepd(p4d_val(*p4d))))
0238 err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
0239 else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
0240 err = walk_pud_range(p4d, addr, next, walk);
0241 if (err)
0242 break;
0243 } while (p4d++, addr = next, addr != end);
0244
0245 return err;
0246 }
0247
0248 static int walk_pgd_range(unsigned long addr, unsigned long end,
0249 struct mm_walk *walk)
0250 {
0251 pgd_t *pgd;
0252 unsigned long next;
0253 const struct mm_walk_ops *ops = walk->ops;
0254 int err = 0;
0255
0256 if (walk->pgd)
0257 pgd = walk->pgd + pgd_index(addr);
0258 else
0259 pgd = pgd_offset(walk->mm, addr);
0260 do {
0261 next = pgd_addr_end(addr, end);
0262 if (pgd_none_or_clear_bad(pgd)) {
0263 if (ops->pte_hole)
0264 err = ops->pte_hole(addr, next, 0, walk);
0265 if (err)
0266 break;
0267 continue;
0268 }
0269 if (ops->pgd_entry) {
0270 err = ops->pgd_entry(pgd, addr, next, walk);
0271 if (err)
0272 break;
0273 }
0274 if (is_hugepd(__hugepd(pgd_val(*pgd))))
0275 err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
0276 else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
0277 err = walk_p4d_range(pgd, addr, next, walk);
0278 if (err)
0279 break;
0280 } while (pgd++, addr = next, addr != end);
0281
0282 return err;
0283 }
0284
0285 #ifdef CONFIG_HUGETLB_PAGE
0286 static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
0287 unsigned long end)
0288 {
0289 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
0290 return boundary < end ? boundary : end;
0291 }
0292
0293 static int walk_hugetlb_range(unsigned long addr, unsigned long end,
0294 struct mm_walk *walk)
0295 {
0296 struct vm_area_struct *vma = walk->vma;
0297 struct hstate *h = hstate_vma(vma);
0298 unsigned long next;
0299 unsigned long hmask = huge_page_mask(h);
0300 unsigned long sz = huge_page_size(h);
0301 pte_t *pte;
0302 const struct mm_walk_ops *ops = walk->ops;
0303 int err = 0;
0304
0305 do {
0306 next = hugetlb_entry_end(h, addr, end);
0307 pte = huge_pte_offset(walk->mm, addr & hmask, sz);
0308
0309 if (pte)
0310 err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
0311 else if (ops->pte_hole)
0312 err = ops->pte_hole(addr, next, -1, walk);
0313
0314 if (err)
0315 break;
0316 } while (addr = next, addr != end);
0317
0318 return err;
0319 }
0320
0321 #else
0322 static int walk_hugetlb_range(unsigned long addr, unsigned long end,
0323 struct mm_walk *walk)
0324 {
0325 return 0;
0326 }
0327
0328 #endif
0329
0330
0331
0332
0333
0334
0335
0336 static int walk_page_test(unsigned long start, unsigned long end,
0337 struct mm_walk *walk)
0338 {
0339 struct vm_area_struct *vma = walk->vma;
0340 const struct mm_walk_ops *ops = walk->ops;
0341
0342 if (ops->test_walk)
0343 return ops->test_walk(start, end, walk);
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353 if (vma->vm_flags & VM_PFNMAP) {
0354 int err = 1;
0355 if (ops->pte_hole)
0356 err = ops->pte_hole(start, end, -1, walk);
0357 return err ? err : 1;
0358 }
0359 return 0;
0360 }
0361
0362 static int __walk_page_range(unsigned long start, unsigned long end,
0363 struct mm_walk *walk)
0364 {
0365 int err = 0;
0366 struct vm_area_struct *vma = walk->vma;
0367 const struct mm_walk_ops *ops = walk->ops;
0368
0369 if (ops->pre_vma) {
0370 err = ops->pre_vma(start, end, walk);
0371 if (err)
0372 return err;
0373 }
0374
0375 if (is_vm_hugetlb_page(vma)) {
0376 if (ops->hugetlb_entry)
0377 err = walk_hugetlb_range(start, end, walk);
0378 } else
0379 err = walk_pgd_range(start, end, walk);
0380
0381 if (ops->post_vma)
0382 ops->post_vma(walk);
0383
0384 return err;
0385 }
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427 int walk_page_range(struct mm_struct *mm, unsigned long start,
0428 unsigned long end, const struct mm_walk_ops *ops,
0429 void *private)
0430 {
0431 int err = 0;
0432 unsigned long next;
0433 struct vm_area_struct *vma;
0434 struct mm_walk walk = {
0435 .ops = ops,
0436 .mm = mm,
0437 .private = private,
0438 };
0439
0440 if (start >= end)
0441 return -EINVAL;
0442
0443 if (!walk.mm)
0444 return -EINVAL;
0445
0446 mmap_assert_locked(walk.mm);
0447
0448 vma = find_vma(walk.mm, start);
0449 do {
0450 if (!vma) {
0451 walk.vma = NULL;
0452 next = end;
0453 if (ops->pte_hole)
0454 err = ops->pte_hole(start, next, -1, &walk);
0455 } else if (start < vma->vm_start) {
0456 walk.vma = NULL;
0457 next = min(end, vma->vm_start);
0458 if (ops->pte_hole)
0459 err = ops->pte_hole(start, next, -1, &walk);
0460 } else {
0461 walk.vma = vma;
0462 next = min(end, vma->vm_end);
0463 vma = vma->vm_next;
0464
0465 err = walk_page_test(start, next, &walk);
0466 if (err > 0) {
0467
0468
0469
0470
0471
0472 err = 0;
0473 continue;
0474 }
0475 if (err < 0)
0476 break;
0477 err = __walk_page_range(start, next, &walk);
0478 }
0479 if (err)
0480 break;
0481 } while (start = next, start < end);
0482 return err;
0483 }
0484
0485
0486
0487
0488
0489
0490
0491 int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
0492 unsigned long end, const struct mm_walk_ops *ops,
0493 pgd_t *pgd,
0494 void *private)
0495 {
0496 struct mm_walk walk = {
0497 .ops = ops,
0498 .mm = mm,
0499 .pgd = pgd,
0500 .private = private,
0501 .no_vma = true
0502 };
0503
0504 if (start >= end || !walk.mm)
0505 return -EINVAL;
0506
0507 mmap_assert_write_locked(walk.mm);
0508
0509 return walk_pgd_range(start, end, &walk);
0510 }
0511
0512 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
0513 void *private)
0514 {
0515 struct mm_walk walk = {
0516 .ops = ops,
0517 .mm = vma->vm_mm,
0518 .vma = vma,
0519 .private = private,
0520 };
0521 int err;
0522
0523 if (!walk.mm)
0524 return -EINVAL;
0525
0526 mmap_assert_locked(walk.mm);
0527
0528 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
0529 if (err > 0)
0530 return 0;
0531 if (err < 0)
0532 return err;
0533 return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
0534 }
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566 int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
0567 pgoff_t nr, const struct mm_walk_ops *ops,
0568 void *private)
0569 {
0570 struct mm_walk walk = {
0571 .ops = ops,
0572 .private = private,
0573 };
0574 struct vm_area_struct *vma;
0575 pgoff_t vba, vea, cba, cea;
0576 unsigned long start_addr, end_addr;
0577 int err = 0;
0578
0579 lockdep_assert_held(&mapping->i_mmap_rwsem);
0580 vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
0581 first_index + nr - 1) {
0582
0583 vba = vma->vm_pgoff;
0584 vea = vba + vma_pages(vma);
0585 cba = first_index;
0586 cba = max(cba, vba);
0587 cea = first_index + nr;
0588 cea = min(cea, vea);
0589
0590 start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
0591 end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
0592 if (start_addr >= end_addr)
0593 continue;
0594
0595 walk.vma = vma;
0596 walk.mm = vma->vm_mm;
0597
0598 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
0599 if (err > 0) {
0600 err = 0;
0601 break;
0602 } else if (err < 0)
0603 break;
0604
0605 err = __walk_page_range(start_addr, end_addr, &walk);
0606 if (err)
0607 break;
0608 }
0609
0610 return err;
0611 }