0001
0002
0003
0004
0005
0006
0007
0008 #define pr_fmt(fmt) "damon-va: " fmt
0009
0010 #include <asm-generic/mman-common.h>
0011 #include <linux/highmem.h>
0012 #include <linux/hugetlb.h>
0013 #include <linux/mmu_notifier.h>
0014 #include <linux/page_idle.h>
0015 #include <linux/pagewalk.h>
0016 #include <linux/sched/mm.h>
0017
0018 #include "ops-common.h"
0019
0020 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
0021 #undef DAMON_MIN_REGION
0022 #define DAMON_MIN_REGION 1
0023 #endif
0024
0025
0026
0027
0028
0029 static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
0030 {
0031 return get_pid_task(t->pid, PIDTYPE_PID);
0032 }
0033
0034
0035
0036
0037
0038
0039
0040
0041 static struct mm_struct *damon_get_mm(struct damon_target *t)
0042 {
0043 struct task_struct *task;
0044 struct mm_struct *mm;
0045
0046 task = damon_get_task_struct(t);
0047 if (!task)
0048 return NULL;
0049
0050 mm = get_task_mm(task);
0051 put_task_struct(task);
0052 return mm;
0053 }
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 static int damon_va_evenly_split_region(struct damon_target *t,
0065 struct damon_region *r, unsigned int nr_pieces)
0066 {
0067 unsigned long sz_orig, sz_piece, orig_end;
0068 struct damon_region *n = NULL, *next;
0069 unsigned long start;
0070
0071 if (!r || !nr_pieces)
0072 return -EINVAL;
0073
0074 orig_end = r->ar.end;
0075 sz_orig = r->ar.end - r->ar.start;
0076 sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
0077
0078 if (!sz_piece)
0079 return -EINVAL;
0080
0081 r->ar.end = r->ar.start + sz_piece;
0082 next = damon_next_region(r);
0083 for (start = r->ar.end; start + sz_piece <= orig_end;
0084 start += sz_piece) {
0085 n = damon_new_region(start, start + sz_piece);
0086 if (!n)
0087 return -ENOMEM;
0088 damon_insert_region(n, r, next, t);
0089 r = n;
0090 }
0091
0092 if (n)
0093 n->ar.end = orig_end;
0094
0095 return 0;
0096 }
0097
0098 static unsigned long sz_range(struct damon_addr_range *r)
0099 {
0100 return r->end - r->start;
0101 }
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 static int __damon_va_three_regions(struct vm_area_struct *vma,
0117 struct damon_addr_range regions[3])
0118 {
0119 struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0};
0120 struct vm_area_struct *last_vma = NULL;
0121 unsigned long start = 0;
0122 struct rb_root rbroot;
0123
0124
0125 for (; vma; vma = vma->vm_next) {
0126 if (!last_vma) {
0127 start = vma->vm_start;
0128 goto next;
0129 }
0130
0131 if (vma->rb_subtree_gap <= sz_range(&second_gap)) {
0132 rbroot.rb_node = &vma->vm_rb;
0133 vma = rb_entry(rb_last(&rbroot),
0134 struct vm_area_struct, vm_rb);
0135 goto next;
0136 }
0137
0138 gap.start = last_vma->vm_end;
0139 gap.end = vma->vm_start;
0140 if (sz_range(&gap) > sz_range(&second_gap)) {
0141 swap(gap, second_gap);
0142 if (sz_range(&second_gap) > sz_range(&first_gap))
0143 swap(second_gap, first_gap);
0144 }
0145 next:
0146 last_vma = vma;
0147 }
0148
0149 if (!sz_range(&second_gap) || !sz_range(&first_gap))
0150 return -EINVAL;
0151
0152
0153 if (first_gap.start > second_gap.start)
0154 swap(first_gap, second_gap);
0155
0156
0157 regions[0].start = ALIGN(start, DAMON_MIN_REGION);
0158 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
0159 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
0160 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
0161 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
0162 regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION);
0163
0164 return 0;
0165 }
0166
0167
0168
0169
0170
0171
0172 static int damon_va_three_regions(struct damon_target *t,
0173 struct damon_addr_range regions[3])
0174 {
0175 struct mm_struct *mm;
0176 int rc;
0177
0178 mm = damon_get_mm(t);
0179 if (!mm)
0180 return -EINVAL;
0181
0182 mmap_read_lock(mm);
0183 rc = __damon_va_three_regions(mm->mmap, regions);
0184 mmap_read_unlock(mm);
0185
0186 mmput(mm);
0187 return rc;
0188 }
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232 static void __damon_va_init_regions(struct damon_ctx *ctx,
0233 struct damon_target *t)
0234 {
0235 struct damon_target *ti;
0236 struct damon_region *r;
0237 struct damon_addr_range regions[3];
0238 unsigned long sz = 0, nr_pieces;
0239 int i, tidx = 0;
0240
0241 if (damon_va_three_regions(t, regions)) {
0242 damon_for_each_target(ti, ctx) {
0243 if (ti == t)
0244 break;
0245 tidx++;
0246 }
0247 pr_debug("Failed to get three regions of %dth target\n", tidx);
0248 return;
0249 }
0250
0251 for (i = 0; i < 3; i++)
0252 sz += regions[i].end - regions[i].start;
0253 if (ctx->min_nr_regions)
0254 sz /= ctx->min_nr_regions;
0255 if (sz < DAMON_MIN_REGION)
0256 sz = DAMON_MIN_REGION;
0257
0258
0259 for (i = 0; i < 3; i++) {
0260 r = damon_new_region(regions[i].start, regions[i].end);
0261 if (!r) {
0262 pr_err("%d'th init region creation failed\n", i);
0263 return;
0264 }
0265 damon_add_region(r, t);
0266
0267 nr_pieces = (regions[i].end - regions[i].start) / sz;
0268 damon_va_evenly_split_region(t, r, nr_pieces);
0269 }
0270 }
0271
0272
0273 static void damon_va_init(struct damon_ctx *ctx)
0274 {
0275 struct damon_target *t;
0276
0277 damon_for_each_target(t, ctx) {
0278
0279 if (!damon_nr_regions(t))
0280 __damon_va_init_regions(ctx, t);
0281 }
0282 }
0283
0284
0285
0286
0287 static void damon_va_update(struct damon_ctx *ctx)
0288 {
0289 struct damon_addr_range three_regions[3];
0290 struct damon_target *t;
0291
0292 damon_for_each_target(t, ctx) {
0293 if (damon_va_three_regions(t, three_regions))
0294 continue;
0295 damon_set_regions(t, three_regions, 3);
0296 }
0297 }
0298
0299 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
0300 unsigned long next, struct mm_walk *walk)
0301 {
0302 pte_t *pte;
0303 spinlock_t *ptl;
0304
0305 if (pmd_huge(*pmd)) {
0306 ptl = pmd_lock(walk->mm, pmd);
0307 if (pmd_huge(*pmd)) {
0308 damon_pmdp_mkold(pmd, walk->mm, addr);
0309 spin_unlock(ptl);
0310 return 0;
0311 }
0312 spin_unlock(ptl);
0313 }
0314
0315 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
0316 return 0;
0317 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
0318 if (!pte_present(*pte))
0319 goto out;
0320 damon_ptep_mkold(pte, walk->mm, addr);
0321 out:
0322 pte_unmap_unlock(pte, ptl);
0323 return 0;
0324 }
0325
0326 #ifdef CONFIG_HUGETLB_PAGE
0327 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
0328 struct vm_area_struct *vma, unsigned long addr)
0329 {
0330 bool referenced = false;
0331 pte_t entry = huge_ptep_get(pte);
0332 struct page *page = pte_page(entry);
0333
0334 get_page(page);
0335
0336 if (pte_young(entry)) {
0337 referenced = true;
0338 entry = pte_mkold(entry);
0339 set_huge_pte_at(mm, addr, pte, entry);
0340 }
0341
0342 #ifdef CONFIG_MMU_NOTIFIER
0343 if (mmu_notifier_clear_young(mm, addr,
0344 addr + huge_page_size(hstate_vma(vma))))
0345 referenced = true;
0346 #endif
0347
0348 if (referenced)
0349 set_page_young(page);
0350
0351 set_page_idle(page);
0352 put_page(page);
0353 }
0354
0355 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
0356 unsigned long addr, unsigned long end,
0357 struct mm_walk *walk)
0358 {
0359 struct hstate *h = hstate_vma(walk->vma);
0360 spinlock_t *ptl;
0361 pte_t entry;
0362
0363 ptl = huge_pte_lock(h, walk->mm, pte);
0364 entry = huge_ptep_get(pte);
0365 if (!pte_present(entry))
0366 goto out;
0367
0368 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
0369
0370 out:
0371 spin_unlock(ptl);
0372 return 0;
0373 }
0374 #else
0375 #define damon_mkold_hugetlb_entry NULL
0376 #endif
0377
0378 static const struct mm_walk_ops damon_mkold_ops = {
0379 .pmd_entry = damon_mkold_pmd_entry,
0380 .hugetlb_entry = damon_mkold_hugetlb_entry,
0381 };
0382
0383 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
0384 {
0385 mmap_read_lock(mm);
0386 walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
0387 mmap_read_unlock(mm);
0388 }
0389
0390
0391
0392
0393
0394 static void __damon_va_prepare_access_check(struct damon_ctx *ctx,
0395 struct mm_struct *mm, struct damon_region *r)
0396 {
0397 r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
0398
0399 damon_va_mkold(mm, r->sampling_addr);
0400 }
0401
0402 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
0403 {
0404 struct damon_target *t;
0405 struct mm_struct *mm;
0406 struct damon_region *r;
0407
0408 damon_for_each_target(t, ctx) {
0409 mm = damon_get_mm(t);
0410 if (!mm)
0411 continue;
0412 damon_for_each_region(r, t)
0413 __damon_va_prepare_access_check(ctx, mm, r);
0414 mmput(mm);
0415 }
0416 }
0417
0418 struct damon_young_walk_private {
0419 unsigned long *page_sz;
0420 bool young;
0421 };
0422
0423 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
0424 unsigned long next, struct mm_walk *walk)
0425 {
0426 pte_t *pte;
0427 spinlock_t *ptl;
0428 struct page *page;
0429 struct damon_young_walk_private *priv = walk->private;
0430
0431 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0432 if (pmd_huge(*pmd)) {
0433 ptl = pmd_lock(walk->mm, pmd);
0434 if (!pmd_huge(*pmd)) {
0435 spin_unlock(ptl);
0436 goto regular_page;
0437 }
0438 page = damon_get_page(pmd_pfn(*pmd));
0439 if (!page)
0440 goto huge_out;
0441 if (pmd_young(*pmd) || !page_is_idle(page) ||
0442 mmu_notifier_test_young(walk->mm,
0443 addr)) {
0444 *priv->page_sz = HPAGE_PMD_SIZE;
0445 priv->young = true;
0446 }
0447 put_page(page);
0448 huge_out:
0449 spin_unlock(ptl);
0450 return 0;
0451 }
0452
0453 regular_page:
0454 #endif
0455
0456 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
0457 return -EINVAL;
0458 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
0459 if (!pte_present(*pte))
0460 goto out;
0461 page = damon_get_page(pte_pfn(*pte));
0462 if (!page)
0463 goto out;
0464 if (pte_young(*pte) || !page_is_idle(page) ||
0465 mmu_notifier_test_young(walk->mm, addr)) {
0466 *priv->page_sz = PAGE_SIZE;
0467 priv->young = true;
0468 }
0469 put_page(page);
0470 out:
0471 pte_unmap_unlock(pte, ptl);
0472 return 0;
0473 }
0474
0475 #ifdef CONFIG_HUGETLB_PAGE
0476 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
0477 unsigned long addr, unsigned long end,
0478 struct mm_walk *walk)
0479 {
0480 struct damon_young_walk_private *priv = walk->private;
0481 struct hstate *h = hstate_vma(walk->vma);
0482 struct page *page;
0483 spinlock_t *ptl;
0484 pte_t entry;
0485
0486 ptl = huge_pte_lock(h, walk->mm, pte);
0487 entry = huge_ptep_get(pte);
0488 if (!pte_present(entry))
0489 goto out;
0490
0491 page = pte_page(entry);
0492 get_page(page);
0493
0494 if (pte_young(entry) || !page_is_idle(page) ||
0495 mmu_notifier_test_young(walk->mm, addr)) {
0496 *priv->page_sz = huge_page_size(h);
0497 priv->young = true;
0498 }
0499
0500 put_page(page);
0501
0502 out:
0503 spin_unlock(ptl);
0504 return 0;
0505 }
0506 #else
0507 #define damon_young_hugetlb_entry NULL
0508 #endif
0509
0510 static const struct mm_walk_ops damon_young_ops = {
0511 .pmd_entry = damon_young_pmd_entry,
0512 .hugetlb_entry = damon_young_hugetlb_entry,
0513 };
0514
0515 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
0516 unsigned long *page_sz)
0517 {
0518 struct damon_young_walk_private arg = {
0519 .page_sz = page_sz,
0520 .young = false,
0521 };
0522
0523 mmap_read_lock(mm);
0524 walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
0525 mmap_read_unlock(mm);
0526 return arg.young;
0527 }
0528
0529
0530
0531
0532
0533
0534
0535 static void __damon_va_check_access(struct damon_ctx *ctx,
0536 struct mm_struct *mm, struct damon_region *r)
0537 {
0538 static struct mm_struct *last_mm;
0539 static unsigned long last_addr;
0540 static unsigned long last_page_sz = PAGE_SIZE;
0541 static bool last_accessed;
0542
0543
0544 if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) ==
0545 ALIGN_DOWN(r->sampling_addr, last_page_sz))) {
0546 if (last_accessed)
0547 r->nr_accesses++;
0548 return;
0549 }
0550
0551 last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz);
0552 if (last_accessed)
0553 r->nr_accesses++;
0554
0555 last_mm = mm;
0556 last_addr = r->sampling_addr;
0557 }
0558
0559 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
0560 {
0561 struct damon_target *t;
0562 struct mm_struct *mm;
0563 struct damon_region *r;
0564 unsigned int max_nr_accesses = 0;
0565
0566 damon_for_each_target(t, ctx) {
0567 mm = damon_get_mm(t);
0568 if (!mm)
0569 continue;
0570 damon_for_each_region(r, t) {
0571 __damon_va_check_access(ctx, mm, r);
0572 max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
0573 }
0574 mmput(mm);
0575 }
0576
0577 return max_nr_accesses;
0578 }
0579
0580
0581
0582
0583
0584 static bool damon_va_target_valid(void *target)
0585 {
0586 struct damon_target *t = target;
0587 struct task_struct *task;
0588
0589 task = damon_get_task_struct(t);
0590 if (task) {
0591 put_task_struct(task);
0592 return true;
0593 }
0594
0595 return false;
0596 }
0597
0598 #ifndef CONFIG_ADVISE_SYSCALLS
0599 static unsigned long damos_madvise(struct damon_target *target,
0600 struct damon_region *r, int behavior)
0601 {
0602 return 0;
0603 }
0604 #else
0605 static unsigned long damos_madvise(struct damon_target *target,
0606 struct damon_region *r, int behavior)
0607 {
0608 struct mm_struct *mm;
0609 unsigned long start = PAGE_ALIGN(r->ar.start);
0610 unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start);
0611 unsigned long applied;
0612
0613 mm = damon_get_mm(target);
0614 if (!mm)
0615 return 0;
0616
0617 applied = do_madvise(mm, start, len, behavior) ? 0 : len;
0618 mmput(mm);
0619
0620 return applied;
0621 }
0622 #endif
0623
0624 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
0625 struct damon_target *t, struct damon_region *r,
0626 struct damos *scheme)
0627 {
0628 int madv_action;
0629
0630 switch (scheme->action) {
0631 case DAMOS_WILLNEED:
0632 madv_action = MADV_WILLNEED;
0633 break;
0634 case DAMOS_COLD:
0635 madv_action = MADV_COLD;
0636 break;
0637 case DAMOS_PAGEOUT:
0638 madv_action = MADV_PAGEOUT;
0639 break;
0640 case DAMOS_HUGEPAGE:
0641 madv_action = MADV_HUGEPAGE;
0642 break;
0643 case DAMOS_NOHUGEPAGE:
0644 madv_action = MADV_NOHUGEPAGE;
0645 break;
0646 case DAMOS_STAT:
0647 return 0;
0648 default:
0649 return 0;
0650 }
0651
0652 return damos_madvise(t, r, madv_action);
0653 }
0654
0655 static int damon_va_scheme_score(struct damon_ctx *context,
0656 struct damon_target *t, struct damon_region *r,
0657 struct damos *scheme)
0658 {
0659
0660 switch (scheme->action) {
0661 case DAMOS_PAGEOUT:
0662 return damon_pageout_score(context, r, scheme);
0663 default:
0664 break;
0665 }
0666
0667 return DAMOS_MAX_SCORE;
0668 }
0669
0670 static int __init damon_va_initcall(void)
0671 {
0672 struct damon_operations ops = {
0673 .id = DAMON_OPS_VADDR,
0674 .init = damon_va_init,
0675 .update = damon_va_update,
0676 .prepare_access_checks = damon_va_prepare_access_checks,
0677 .check_accesses = damon_va_check_accesses,
0678 .reset_aggregated = NULL,
0679 .target_valid = damon_va_target_valid,
0680 .cleanup = NULL,
0681 .apply_scheme = damon_va_apply_scheme,
0682 .get_scheme_score = damon_va_scheme_score,
0683 };
0684
0685 struct damon_operations ops_fvaddr = ops;
0686 int err;
0687
0688
0689 ops_fvaddr.id = DAMON_OPS_FVADDR;
0690 ops_fvaddr.init = NULL;
0691 ops_fvaddr.update = NULL;
0692
0693 err = damon_register_ops(&ops);
0694 if (err)
0695 return err;
0696 return damon_register_ops(&ops_fvaddr);
0697 };
0698
0699 subsys_initcall(damon_va_initcall);
0700
0701 #include "vaddr-test.h"