mm/damon/core.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Data Access Monitor
0004  *
0005  * Author: SeongJae Park <sjpark@amazon.de>
0006  */
0007
0008 #define pr_fmt(fmt) "damon: " fmt
0009
0010 #include <linux/damon.h>
0011 #include <linux/delay.h>
0012 #include <linux/kthread.h>
0013 #include <linux/mm.h>
0014 #include <linux/slab.h>
0015 #include <linux/string.h>
0016
0017 #define CREATE_TRACE_POINTS
0018 #include <trace/events/damon.h>
0019
0020 #ifdef CONFIG_DAMON_KUNIT_TEST
0021 #undef DAMON_MIN_REGION
0022 #define DAMON_MIN_REGION 1
0023 #endif
0024
0025 static DEFINE_MUTEX(damon_lock);
0026 static int nr_running_ctxs;
0027 static bool running_exclusive_ctxs;
0028
0029 static DEFINE_MUTEX(damon_ops_lock);
0030 static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
0031
0032 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
0033 static bool __damon_is_registered_ops(enum damon_ops_id id)
0034 {
0035     struct damon_operations empty_ops = {};
0036
0037     if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
0038         return false;
0039     return true;
0040 }
0041
0042 /**
0043  * damon_is_registered_ops() - Check if a given damon_operations is registered.
0044  * @id: Id of the damon_operations to check if registered.
0045  *
0046  * Return: true if the ops is set, false otherwise.
0047  */
0048 bool damon_is_registered_ops(enum damon_ops_id id)
0049 {
0050     bool registered;
0051
0052     if (id >= NR_DAMON_OPS)
0053         return false;
0054     mutex_lock(&damon_ops_lock);
0055     registered = __damon_is_registered_ops(id);
0056     mutex_unlock(&damon_ops_lock);
0057     return registered;
0058 }
0059
0060 /**
0061  * damon_register_ops() - Register a monitoring operations set to DAMON.
0062  * @ops:    monitoring operations set to register.
0063  *
0064  * This function registers a monitoring operations set of valid &struct
0065  * damon_operations->id so that others can find and use them later.
0066  *
0067  * Return: 0 on success, negative error code otherwise.
0068  */
0069 int damon_register_ops(struct damon_operations *ops)
0070 {
0071     int err = 0;
0072
0073     if (ops->id >= NR_DAMON_OPS)
0074         return -EINVAL;
0075     mutex_lock(&damon_ops_lock);
0076     /* Fail for already registered ops */
0077     if (__damon_is_registered_ops(ops->id)) {
0078         err = -EINVAL;
0079         goto out;
0080     }
0081     damon_registered_ops[ops->id] = *ops;
0082 out:
0083     mutex_unlock(&damon_ops_lock);
0084     return err;
0085 }
0086
0087 /**
0088  * damon_select_ops() - Select a monitoring operations to use with the context.
0089  * @ctx:    monitoring context to use the operations.
0090  * @id:     id of the registered monitoring operations to select.
0091  *
0092  * This function finds registered monitoring operations set of @id and make
0093  * @ctx to use it.
0094  *
0095  * Return: 0 on success, negative error code otherwise.
0096  */
0097 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
0098 {
0099     int err = 0;
0100
0101     if (id >= NR_DAMON_OPS)
0102         return -EINVAL;
0103
0104     mutex_lock(&damon_ops_lock);
0105     if (!__damon_is_registered_ops(id))
0106         err = -EINVAL;
0107     else
0108         ctx->ops = damon_registered_ops[id];
0109     mutex_unlock(&damon_ops_lock);
0110     return err;
0111 }
0112
0113 /*
0114  * Construct a damon_region struct
0115  *
0116  * Returns the pointer to the new struct if success, or NULL otherwise
0117  */
0118 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
0119 {
0120     struct damon_region *region;
0121
0122     region = kmalloc(sizeof(*region), GFP_KERNEL);
0123     if (!region)
0124         return NULL;
0125
0126     region->ar.start = start;
0127     region->ar.end = end;
0128     region->nr_accesses = 0;
0129     INIT_LIST_HEAD(&region->list);
0130
0131     region->age = 0;
0132     region->last_nr_accesses = 0;
0133
0134     return region;
0135 }
0136
0137 void damon_add_region(struct damon_region *r, struct damon_target *t)
0138 {
0139     list_add_tail(&r->list, &t->regions_list);
0140     t->nr_regions++;
0141 }
0142
0143 static void damon_del_region(struct damon_region *r, struct damon_target *t)
0144 {
0145     list_del(&r->list);
0146     t->nr_regions--;
0147 }
0148
0149 static void damon_free_region(struct damon_region *r)
0150 {
0151     kfree(r);
0152 }
0153
0154 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
0155 {
0156     damon_del_region(r, t);
0157     damon_free_region(r);
0158 }
0159
0160 /*
0161  * Check whether a region is intersecting an address range
0162  *
0163  * Returns true if it is.
0164  */
0165 static bool damon_intersect(struct damon_region *r,
0166         struct damon_addr_range *re)
0167 {
0168     return !(r->ar.end <= re->start || re->end <= r->ar.start);
0169 }
0170
0171 /*
0172  * damon_set_regions() - Set regions of a target for given address ranges.
0173  * @t:      the given target.
0174  * @ranges: array of new monitoring target ranges.
0175  * @nr_ranges:  length of @ranges.
0176  *
0177  * This function adds new regions to, or modify existing regions of a
0178  * monitoring target to fit in specific ranges.
0179  *
0180  * Return: 0 if success, or negative error code otherwise.
0181  */
0182 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
0183         unsigned int nr_ranges)
0184 {
0185     struct damon_region *r, *next;
0186     unsigned int i;
0187
0188     /* Remove regions which are not in the new ranges */
0189     damon_for_each_region_safe(r, next, t) {
0190         for (i = 0; i < nr_ranges; i++) {
0191             if (damon_intersect(r, &ranges[i]))
0192                 break;
0193         }
0194         if (i == nr_ranges)
0195             damon_destroy_region(r, t);
0196     }
0197
0198     /* Add new regions or resize existing regions to fit in the ranges */
0199     for (i = 0; i < nr_ranges; i++) {
0200         struct damon_region *first = NULL, *last, *newr;
0201         struct damon_addr_range *range;
0202
0203         range = &ranges[i];
0204         /* Get the first/last regions intersecting with the range */
0205         damon_for_each_region(r, t) {
0206             if (damon_intersect(r, range)) {
0207                 if (!first)
0208                     first = r;
0209                 last = r;
0210             }
0211             if (r->ar.start >= range->end)
0212                 break;
0213         }
0214         if (!first) {
0215             /* no region intersects with this range */
0216             newr = damon_new_region(
0217                     ALIGN_DOWN(range->start,
0218                         DAMON_MIN_REGION),
0219                     ALIGN(range->end, DAMON_MIN_REGION));
0220             if (!newr)
0221                 return -ENOMEM;
0222             damon_insert_region(newr, damon_prev_region(r), r, t);
0223         } else {
0224             /* resize intersecting regions to fit in this range */
0225             first->ar.start = ALIGN_DOWN(range->start,
0226                     DAMON_MIN_REGION);
0227             last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
0228         }
0229     }
0230     return 0;
0231 }
0232
0233 struct damos *damon_new_scheme(
0234         unsigned long min_sz_region, unsigned long max_sz_region,
0235         unsigned int min_nr_accesses, unsigned int max_nr_accesses,
0236         unsigned int min_age_region, unsigned int max_age_region,
0237         enum damos_action action, struct damos_quota *quota,
0238         struct damos_watermarks *wmarks)
0239 {
0240     struct damos *scheme;
0241
0242     scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
0243     if (!scheme)
0244         return NULL;
0245     scheme->min_sz_region = min_sz_region;
0246     scheme->max_sz_region = max_sz_region;
0247     scheme->min_nr_accesses = min_nr_accesses;
0248     scheme->max_nr_accesses = max_nr_accesses;
0249     scheme->min_age_region = min_age_region;
0250     scheme->max_age_region = max_age_region;
0251     scheme->action = action;
0252     scheme->stat = (struct damos_stat){};
0253     INIT_LIST_HEAD(&scheme->list);
0254
0255     scheme->quota.ms = quota->ms;
0256     scheme->quota.sz = quota->sz;
0257     scheme->quota.reset_interval = quota->reset_interval;
0258     scheme->quota.weight_sz = quota->weight_sz;
0259     scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
0260     scheme->quota.weight_age = quota->weight_age;
0261     scheme->quota.total_charged_sz = 0;
0262     scheme->quota.total_charged_ns = 0;
0263     scheme->quota.esz = 0;
0264     scheme->quota.charged_sz = 0;
0265     scheme->quota.charged_from = 0;
0266     scheme->quota.charge_target_from = NULL;
0267     scheme->quota.charge_addr_from = 0;
0268
0269     scheme->wmarks.metric = wmarks->metric;
0270     scheme->wmarks.interval = wmarks->interval;
0271     scheme->wmarks.high = wmarks->high;
0272     scheme->wmarks.mid = wmarks->mid;
0273     scheme->wmarks.low = wmarks->low;
0274     scheme->wmarks.activated = true;
0275
0276     return scheme;
0277 }
0278
0279 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
0280 {
0281     list_add_tail(&s->list, &ctx->schemes);
0282 }
0283
0284 static void damon_del_scheme(struct damos *s)
0285 {
0286     list_del(&s->list);
0287 }
0288
0289 static void damon_free_scheme(struct damos *s)
0290 {
0291     kfree(s);
0292 }
0293
0294 void damon_destroy_scheme(struct damos *s)
0295 {
0296     damon_del_scheme(s);
0297     damon_free_scheme(s);
0298 }
0299
0300 /*
0301  * Construct a damon_target struct
0302  *
0303  * Returns the pointer to the new struct if success, or NULL otherwise
0304  */
0305 struct damon_target *damon_new_target(void)
0306 {
0307     struct damon_target *t;
0308
0309     t = kmalloc(sizeof(*t), GFP_KERNEL);
0310     if (!t)
0311         return NULL;
0312
0313     t->pid = NULL;
0314     t->nr_regions = 0;
0315     INIT_LIST_HEAD(&t->regions_list);
0316
0317     return t;
0318 }
0319
0320 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
0321 {
0322     list_add_tail(&t->list, &ctx->adaptive_targets);
0323 }
0324
0325 bool damon_targets_empty(struct damon_ctx *ctx)
0326 {
0327     return list_empty(&ctx->adaptive_targets);
0328 }
0329
0330 static void damon_del_target(struct damon_target *t)
0331 {
0332     list_del(&t->list);
0333 }
0334
0335 void damon_free_target(struct damon_target *t)
0336 {
0337     struct damon_region *r, *next;
0338
0339     damon_for_each_region_safe(r, next, t)
0340         damon_free_region(r);
0341     kfree(t);
0342 }
0343
0344 void damon_destroy_target(struct damon_target *t)
0345 {
0346     damon_del_target(t);
0347     damon_free_target(t);
0348 }
0349
0350 unsigned int damon_nr_regions(struct damon_target *t)
0351 {
0352     return t->nr_regions;
0353 }
0354
0355 struct damon_ctx *damon_new_ctx(void)
0356 {
0357     struct damon_ctx *ctx;
0358
0359     ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
0360     if (!ctx)
0361         return NULL;
0362
0363     ctx->sample_interval = 5 * 1000;
0364     ctx->aggr_interval = 100 * 1000;
0365     ctx->ops_update_interval = 60 * 1000 * 1000;
0366
0367     ktime_get_coarse_ts64(&ctx->last_aggregation);
0368     ctx->last_ops_update = ctx->last_aggregation;
0369
0370     mutex_init(&ctx->kdamond_lock);
0371
0372     ctx->min_nr_regions = 10;
0373     ctx->max_nr_regions = 1000;
0374
0375     INIT_LIST_HEAD(&ctx->adaptive_targets);
0376     INIT_LIST_HEAD(&ctx->schemes);
0377
0378     return ctx;
0379 }
0380
0381 static void damon_destroy_targets(struct damon_ctx *ctx)
0382 {
0383     struct damon_target *t, *next_t;
0384
0385     if (ctx->ops.cleanup) {
0386         ctx->ops.cleanup(ctx);
0387         return;
0388     }
0389
0390     damon_for_each_target_safe(t, next_t, ctx)
0391         damon_destroy_target(t);
0392 }
0393
0394 void damon_destroy_ctx(struct damon_ctx *ctx)
0395 {
0396     struct damos *s, *next_s;
0397
0398     damon_destroy_targets(ctx);
0399
0400     damon_for_each_scheme_safe(s, next_s, ctx)
0401         damon_destroy_scheme(s);
0402
0403     kfree(ctx);
0404 }
0405
0406 /**
0407  * damon_set_attrs() - Set attributes for the monitoring.
0408  * @ctx:        monitoring context
0409  * @sample_int:     time interval between samplings
0410  * @aggr_int:       time interval between aggregations
0411  * @ops_upd_int:    time interval between monitoring operations updates
0412  * @min_nr_reg:     minimal number of regions
0413  * @max_nr_reg:     maximum number of regions
0414  *
0415  * This function should not be called while the kdamond is running.
0416  * Every time interval is in micro-seconds.
0417  *
0418  * Return: 0 on success, negative error code otherwise.
0419  */
0420 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
0421             unsigned long aggr_int, unsigned long ops_upd_int,
0422             unsigned long min_nr_reg, unsigned long max_nr_reg)
0423 {
0424     if (min_nr_reg < 3)
0425         return -EINVAL;
0426     if (min_nr_reg > max_nr_reg)
0427         return -EINVAL;
0428
0429     ctx->sample_interval = sample_int;
0430     ctx->aggr_interval = aggr_int;
0431     ctx->ops_update_interval = ops_upd_int;
0432     ctx->min_nr_regions = min_nr_reg;
0433     ctx->max_nr_regions = max_nr_reg;
0434
0435     return 0;
0436 }
0437
0438 /**
0439  * damon_set_schemes() - Set data access monitoring based operation schemes.
0440  * @ctx:    monitoring context
0441  * @schemes:    array of the schemes
0442  * @nr_schemes: number of entries in @schemes
0443  *
0444  * This function should not be called while the kdamond of the context is
0445  * running.
0446  *
0447  * Return: 0 if success, or negative error code otherwise.
0448  */
0449 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
0450             ssize_t nr_schemes)
0451 {
0452     struct damos *s, *next;
0453     ssize_t i;
0454
0455     damon_for_each_scheme_safe(s, next, ctx)
0456         damon_destroy_scheme(s);
0457     for (i = 0; i < nr_schemes; i++)
0458         damon_add_scheme(ctx, schemes[i]);
0459     return 0;
0460 }
0461
0462 /**
0463  * damon_nr_running_ctxs() - Return number of currently running contexts.
0464  */
0465 int damon_nr_running_ctxs(void)
0466 {
0467     int nr_ctxs;
0468
0469     mutex_lock(&damon_lock);
0470     nr_ctxs = nr_running_ctxs;
0471     mutex_unlock(&damon_lock);
0472
0473     return nr_ctxs;
0474 }
0475
0476 /* Returns the size upper limit for each monitoring region */
0477 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
0478 {
0479     struct damon_target *t;
0480     struct damon_region *r;
0481     unsigned long sz = 0;
0482
0483     damon_for_each_target(t, ctx) {
0484         damon_for_each_region(r, t)
0485             sz += r->ar.end - r->ar.start;
0486     }
0487
0488     if (ctx->min_nr_regions)
0489         sz /= ctx->min_nr_regions;
0490     if (sz < DAMON_MIN_REGION)
0491         sz = DAMON_MIN_REGION;
0492
0493     return sz;
0494 }
0495
0496 static int kdamond_fn(void *data);
0497
0498 /*
0499  * __damon_start() - Starts monitoring with given context.
0500  * @ctx:    monitoring context
0501  *
0502  * This function should be called while damon_lock is hold.
0503  *
0504  * Return: 0 on success, negative error code otherwise.
0505  */
0506 static int __damon_start(struct damon_ctx *ctx)
0507 {
0508     int err = -EBUSY;
0509
0510     mutex_lock(&ctx->kdamond_lock);
0511     if (!ctx->kdamond) {
0512         err = 0;
0513         ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
0514                 nr_running_ctxs);
0515         if (IS_ERR(ctx->kdamond)) {
0516             err = PTR_ERR(ctx->kdamond);
0517             ctx->kdamond = NULL;
0518         }
0519     }
0520     mutex_unlock(&ctx->kdamond_lock);
0521
0522     return err;
0523 }
0524
0525 /**
0526  * damon_start() - Starts the monitorings for a given group of contexts.
0527  * @ctxs:   an array of the pointers for contexts to start monitoring
0528  * @nr_ctxs:    size of @ctxs
0529  * @exclusive:  exclusiveness of this contexts group
0530  *
0531  * This function starts a group of monitoring threads for a group of monitoring
0532  * contexts.  One thread per each context is created and run in parallel.  The
0533  * caller should handle synchronization between the threads by itself.  If
0534  * @exclusive is true and a group of threads that created by other
0535  * 'damon_start()' call is currently running, this function does nothing but
0536  * returns -EBUSY.
0537  *
0538  * Return: 0 on success, negative error code otherwise.
0539  */
0540 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
0541 {
0542     int i;
0543     int err = 0;
0544
0545     mutex_lock(&damon_lock);
0546     if ((exclusive && nr_running_ctxs) ||
0547             (!exclusive && running_exclusive_ctxs)) {
0548         mutex_unlock(&damon_lock);
0549         return -EBUSY;
0550     }
0551
0552     for (i = 0; i < nr_ctxs; i++) {
0553         err = __damon_start(ctxs[i]);
0554         if (err)
0555             break;
0556         nr_running_ctxs++;
0557     }
0558     if (exclusive && nr_running_ctxs)
0559         running_exclusive_ctxs = true;
0560     mutex_unlock(&damon_lock);
0561
0562     return err;
0563 }
0564
0565 /*
0566  * __damon_stop() - Stops monitoring of a given context.
0567  * @ctx:    monitoring context
0568  *
0569  * Return: 0 on success, negative error code otherwise.
0570  */
0571 static int __damon_stop(struct damon_ctx *ctx)
0572 {
0573     struct task_struct *tsk;
0574
0575     mutex_lock(&ctx->kdamond_lock);
0576     tsk = ctx->kdamond;
0577     if (tsk) {
0578         get_task_struct(tsk);
0579         mutex_unlock(&ctx->kdamond_lock);
0580         kthread_stop(tsk);
0581         put_task_struct(tsk);
0582         return 0;
0583     }
0584     mutex_unlock(&ctx->kdamond_lock);
0585
0586     return -EPERM;
0587 }
0588
0589 /**
0590  * damon_stop() - Stops the monitorings for a given group of contexts.
0591  * @ctxs:   an array of the pointers for contexts to stop monitoring
0592  * @nr_ctxs:    size of @ctxs
0593  *
0594  * Return: 0 on success, negative error code otherwise.
0595  */
0596 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
0597 {
0598     int i, err = 0;
0599
0600     for (i = 0; i < nr_ctxs; i++) {
0601         /* nr_running_ctxs is decremented in kdamond_fn */
0602         err = __damon_stop(ctxs[i]);
0603         if (err)
0604             break;
0605     }
0606     return err;
0607 }
0608
0609 /*
0610  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
0611  * @baseline:   the time to check whether the interval has elapsed since
0612  * @interval:   the time interval (microseconds)
0613  *
0614  * See whether the given time interval has passed since the given baseline
0615  * time.  If so, it also updates the baseline to current time for next check.
0616  *
0617  * Return:  true if the time interval has passed, or false otherwise.
0618  */
0619 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
0620         unsigned long interval)
0621 {
0622     struct timespec64 now;
0623
0624     ktime_get_coarse_ts64(&now);
0625     if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
0626             interval * 1000)
0627         return false;
0628     *baseline = now;
0629     return true;
0630 }
0631
0632 /*
0633  * Check whether it is time to flush the aggregated information
0634  */
0635 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
0636 {
0637     return damon_check_reset_time_interval(&ctx->last_aggregation,
0638             ctx->aggr_interval);
0639 }
0640
0641 /*
0642  * Reset the aggregated monitoring results ('nr_accesses' of each region).
0643  */
0644 static void kdamond_reset_aggregated(struct damon_ctx *c)
0645 {
0646     struct damon_target *t;
0647     unsigned int ti = 0;    /* target's index */
0648
0649     damon_for_each_target(t, c) {
0650         struct damon_region *r;
0651
0652         damon_for_each_region(r, t) {
0653             trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
0654             r->last_nr_accesses = r->nr_accesses;
0655             r->nr_accesses = 0;
0656         }
0657         ti++;
0658     }
0659 }
0660
0661 static void damon_split_region_at(struct damon_ctx *ctx,
0662         struct damon_target *t, struct damon_region *r,
0663         unsigned long sz_r);
0664
0665 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
0666 {
0667     unsigned long sz;
0668
0669     sz = r->ar.end - r->ar.start;
0670     return s->min_sz_region <= sz && sz <= s->max_sz_region &&
0671         s->min_nr_accesses <= r->nr_accesses &&
0672         r->nr_accesses <= s->max_nr_accesses &&
0673         s->min_age_region <= r->age && r->age <= s->max_age_region;
0674 }
0675
0676 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
0677         struct damon_region *r, struct damos *s)
0678 {
0679     bool ret = __damos_valid_target(r, s);
0680
0681     if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
0682         return ret;
0683
0684     return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
0685 }
0686
0687 static void damon_do_apply_schemes(struct damon_ctx *c,
0688                    struct damon_target *t,
0689                    struct damon_region *r)
0690 {
0691     struct damos *s;
0692
0693     damon_for_each_scheme(s, c) {
0694         struct damos_quota *quota = &s->quota;
0695         unsigned long sz = r->ar.end - r->ar.start;
0696         struct timespec64 begin, end;
0697         unsigned long sz_applied = 0;
0698
0699         if (!s->wmarks.activated)
0700             continue;
0701
0702         /* Check the quota */
0703         if (quota->esz && quota->charged_sz >= quota->esz)
0704             continue;
0705
0706         /* Skip previously charged regions */
0707         if (quota->charge_target_from) {
0708             if (t != quota->charge_target_from)
0709                 continue;
0710             if (r == damon_last_region(t)) {
0711                 quota->charge_target_from = NULL;
0712                 quota->charge_addr_from = 0;
0713                 continue;
0714             }
0715             if (quota->charge_addr_from &&
0716                     r->ar.end <= quota->charge_addr_from)
0717                 continue;
0718
0719             if (quota->charge_addr_from && r->ar.start <
0720                     quota->charge_addr_from) {
0721                 sz = ALIGN_DOWN(quota->charge_addr_from -
0722                         r->ar.start, DAMON_MIN_REGION);
0723                 if (!sz) {
0724                     if (r->ar.end - r->ar.start <=
0725                             DAMON_MIN_REGION)
0726                         continue;
0727                     sz = DAMON_MIN_REGION;
0728                 }
0729                 damon_split_region_at(c, t, r, sz);
0730                 r = damon_next_region(r);
0731                 sz = r->ar.end - r->ar.start;
0732             }
0733             quota->charge_target_from = NULL;
0734             quota->charge_addr_from = 0;
0735         }
0736
0737         if (!damos_valid_target(c, t, r, s))
0738             continue;
0739
0740         /* Apply the scheme */
0741         if (c->ops.apply_scheme) {
0742             if (quota->esz &&
0743                     quota->charged_sz + sz > quota->esz) {
0744                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
0745                         DAMON_MIN_REGION);
0746                 if (!sz)
0747                     goto update_stat;
0748                 damon_split_region_at(c, t, r, sz);
0749             }
0750             ktime_get_coarse_ts64(&begin);
0751             sz_applied = c->ops.apply_scheme(c, t, r, s);
0752             ktime_get_coarse_ts64(&end);
0753             quota->total_charged_ns += timespec64_to_ns(&end) -
0754                 timespec64_to_ns(&begin);
0755             quota->charged_sz += sz;
0756             if (quota->esz && quota->charged_sz >= quota->esz) {
0757                 quota->charge_target_from = t;
0758                 quota->charge_addr_from = r->ar.end + 1;
0759             }
0760         }
0761         if (s->action != DAMOS_STAT)
0762             r->age = 0;
0763
0764 update_stat:
0765         s->stat.nr_tried++;
0766         s->stat.sz_tried += sz;
0767         if (sz_applied)
0768             s->stat.nr_applied++;
0769         s->stat.sz_applied += sz_applied;
0770     }
0771 }
0772
0773 /* Shouldn't be called if quota->ms and quota->sz are zero */
0774 static void damos_set_effective_quota(struct damos_quota *quota)
0775 {
0776     unsigned long throughput;
0777     unsigned long esz;
0778
0779     if (!quota->ms) {
0780         quota->esz = quota->sz;
0781         return;
0782     }
0783
0784     if (quota->total_charged_ns)
0785         throughput = quota->total_charged_sz * 1000000 /
0786             quota->total_charged_ns;
0787     else
0788         throughput = PAGE_SIZE * 1024;
0789     esz = throughput * quota->ms;
0790
0791     if (quota->sz && quota->sz < esz)
0792         esz = quota->sz;
0793     quota->esz = esz;
0794 }
0795
0796 static void kdamond_apply_schemes(struct damon_ctx *c)
0797 {
0798     struct damon_target *t;
0799     struct damon_region *r, *next_r;
0800     struct damos *s;
0801
0802     damon_for_each_scheme(s, c) {
0803         struct damos_quota *quota = &s->quota;
0804         unsigned long cumulated_sz;
0805         unsigned int score, max_score = 0;
0806
0807         if (!s->wmarks.activated)
0808             continue;
0809
0810         if (!quota->ms && !quota->sz)
0811             continue;
0812
0813         /* New charge window starts */
0814         if (time_after_eq(jiffies, quota->charged_from +
0815                     msecs_to_jiffies(
0816                         quota->reset_interval))) {
0817             if (quota->esz && quota->charged_sz >= quota->esz)
0818                 s->stat.qt_exceeds++;
0819             quota->total_charged_sz += quota->charged_sz;
0820             quota->charged_from = jiffies;
0821             quota->charged_sz = 0;
0822             damos_set_effective_quota(quota);
0823         }
0824
0825         if (!c->ops.get_scheme_score)
0826             continue;
0827
0828         /* Fill up the score histogram */
0829         memset(quota->histogram, 0, sizeof(quota->histogram));
0830         damon_for_each_target(t, c) {
0831             damon_for_each_region(r, t) {
0832                 if (!__damos_valid_target(r, s))
0833                     continue;
0834                 score = c->ops.get_scheme_score(
0835                         c, t, r, s);
0836                 quota->histogram[score] +=
0837                     r->ar.end - r->ar.start;
0838                 if (score > max_score)
0839                     max_score = score;
0840             }
0841         }
0842
0843         /* Set the min score limit */
0844         for (cumulated_sz = 0, score = max_score; ; score--) {
0845             cumulated_sz += quota->histogram[score];
0846             if (cumulated_sz >= quota->esz || !score)
0847                 break;
0848         }
0849         quota->min_score = score;
0850     }
0851
0852     damon_for_each_target(t, c) {
0853         damon_for_each_region_safe(r, next_r, t)
0854             damon_do_apply_schemes(c, t, r);
0855     }
0856 }
0857
0858 static inline unsigned long sz_damon_region(struct damon_region *r)
0859 {
0860     return r->ar.end - r->ar.start;
0861 }
0862
0863 /*
0864  * Merge two adjacent regions into one region
0865  */
0866 static void damon_merge_two_regions(struct damon_target *t,
0867         struct damon_region *l, struct damon_region *r)
0868 {
0869     unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
0870
0871     l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
0872             (sz_l + sz_r);
0873     l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
0874     l->ar.end = r->ar.end;
0875     damon_destroy_region(r, t);
0876 }
0877
0878 /*
0879  * Merge adjacent regions having similar access frequencies
0880  *
0881  * t        target affected by this merge operation
0882  * thres    '->nr_accesses' diff threshold for the merge
0883  * sz_limit size upper limit of each region
0884  */
0885 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
0886                    unsigned long sz_limit)
0887 {
0888     struct damon_region *r, *prev = NULL, *next;
0889
0890     damon_for_each_region_safe(r, next, t) {
0891         if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
0892             r->age = 0;
0893         else
0894             r->age++;
0895
0896         if (prev && prev->ar.end == r->ar.start &&
0897             abs(prev->nr_accesses - r->nr_accesses) <= thres &&
0898             sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
0899             damon_merge_two_regions(t, prev, r);
0900         else
0901             prev = r;
0902     }
0903 }
0904
0905 /*
0906  * Merge adjacent regions having similar access frequencies
0907  *
0908  * threshold    '->nr_accesses' diff threshold for the merge
0909  * sz_limit size upper limit of each region
0910  *
0911  * This function merges monitoring target regions which are adjacent and their
0912  * access frequencies are similar.  This is for minimizing the monitoring
0913  * overhead under the dynamically changeable access pattern.  If a merge was
0914  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
0915  */
0916 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
0917                   unsigned long sz_limit)
0918 {
0919     struct damon_target *t;
0920
0921     damon_for_each_target(t, c)
0922         damon_merge_regions_of(t, threshold, sz_limit);
0923 }
0924
0925 /*
0926  * Split a region in two
0927  *
0928  * r        the region to be split
0929  * sz_r     size of the first sub-region that will be made
0930  */
0931 static void damon_split_region_at(struct damon_ctx *ctx,
0932         struct damon_target *t, struct damon_region *r,
0933         unsigned long sz_r)
0934 {
0935     struct damon_region *new;
0936
0937     new = damon_new_region(r->ar.start + sz_r, r->ar.end);
0938     if (!new)
0939         return;
0940
0941     r->ar.end = new->ar.start;
0942
0943     new->age = r->age;
0944     new->last_nr_accesses = r->last_nr_accesses;
0945
0946     damon_insert_region(new, r, damon_next_region(r), t);
0947 }
0948
0949 /* Split every region in the given target into 'nr_subs' regions */
0950 static void damon_split_regions_of(struct damon_ctx *ctx,
0951                      struct damon_target *t, int nr_subs)
0952 {
0953     struct damon_region *r, *next;
0954     unsigned long sz_region, sz_sub = 0;
0955     int i;
0956
0957     damon_for_each_region_safe(r, next, t) {
0958         sz_region = r->ar.end - r->ar.start;
0959
0960         for (i = 0; i < nr_subs - 1 &&
0961                 sz_region > 2 * DAMON_MIN_REGION; i++) {
0962             /*
0963              * Randomly select size of left sub-region to be at
0964              * least 10 percent and at most 90% of original region
0965              */
0966             sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
0967                     sz_region / 10, DAMON_MIN_REGION);
0968             /* Do not allow blank region */
0969             if (sz_sub == 0 || sz_sub >= sz_region)
0970                 continue;
0971
0972             damon_split_region_at(ctx, t, r, sz_sub);
0973             sz_region = sz_sub;
0974         }
0975     }
0976 }
0977
0978 /*
0979  * Split every target region into randomly-sized small regions
0980  *
0981  * This function splits every target region into random-sized small regions if
0982  * current total number of the regions is equal or smaller than half of the
0983  * user-specified maximum number of regions.  This is for maximizing the
0984  * monitoring accuracy under the dynamically changeable access patterns.  If a
0985  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
0986  * it.
0987  */
0988 static void kdamond_split_regions(struct damon_ctx *ctx)
0989 {
0990     struct damon_target *t;
0991     unsigned int nr_regions = 0;
0992     static unsigned int last_nr_regions;
0993     int nr_subregions = 2;
0994
0995     damon_for_each_target(t, ctx)
0996         nr_regions += damon_nr_regions(t);
0997
0998     if (nr_regions > ctx->max_nr_regions / 2)
0999         return;
1000
1001     /* Maybe the middle of the region has different access frequency */
1002     if (last_nr_regions == nr_regions &&
1003             nr_regions < ctx->max_nr_regions / 3)
1004         nr_subregions = 3;
1005
1006     damon_for_each_target(t, ctx)
1007         damon_split_regions_of(ctx, t, nr_subregions);
1008
1009     last_nr_regions = nr_regions;
1010 }
1011
1012 /*
1013  * Check whether it is time to check and apply the operations-related data
1014  * structures.
1015  *
1016  * Returns true if it is.
1017  */
1018 static bool kdamond_need_update_operations(struct damon_ctx *ctx)
1019 {
1020     return damon_check_reset_time_interval(&ctx->last_ops_update,
1021             ctx->ops_update_interval);
1022 }
1023
1024 /*
1025  * Check whether current monitoring should be stopped
1026  *
1027  * The monitoring is stopped when either the user requested to stop, or all
1028  * monitoring targets are invalid.
1029  *
1030  * Returns true if need to stop current monitoring.
1031  */
1032 static bool kdamond_need_stop(struct damon_ctx *ctx)
1033 {
1034     struct damon_target *t;
1035
1036     if (kthread_should_stop())
1037         return true;
1038
1039     if (!ctx->ops.target_valid)
1040         return false;
1041
1042     damon_for_each_target(t, ctx) {
1043         if (ctx->ops.target_valid(t))
1044             return false;
1045     }
1046
1047     return true;
1048 }
1049
1050 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
1051 {
1052     struct sysinfo i;
1053
1054     switch (metric) {
1055     case DAMOS_WMARK_FREE_MEM_RATE:
1056         si_meminfo(&i);
1057         return i.freeram * 1000 / i.totalram;
1058     default:
1059         break;
1060     }
1061     return -EINVAL;
1062 }
1063
1064 /*
1065  * Returns zero if the scheme is active.  Else, returns time to wait for next
1066  * watermark check in micro-seconds.
1067  */
1068 static unsigned long damos_wmark_wait_us(struct damos *scheme)
1069 {
1070     unsigned long metric;
1071
1072     if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
1073         return 0;
1074
1075     metric = damos_wmark_metric_value(scheme->wmarks.metric);
1076     /* higher than high watermark or lower than low watermark */
1077     if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
1078         if (scheme->wmarks.activated)
1079             pr_debug("deactivate a scheme (%d) for %s wmark\n",
1080                     scheme->action,
1081                     metric > scheme->wmarks.high ?
1082                     "high" : "low");
1083         scheme->wmarks.activated = false;
1084         return scheme->wmarks.interval;
1085     }
1086
1087     /* inactive and higher than middle watermark */
1088     if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
1089             !scheme->wmarks.activated)
1090         return scheme->wmarks.interval;
1091
1092     if (!scheme->wmarks.activated)
1093         pr_debug("activate a scheme (%d)\n", scheme->action);
1094     scheme->wmarks.activated = true;
1095     return 0;
1096 }
1097
1098 static void kdamond_usleep(unsigned long usecs)
1099 {
1100     /* See Documentation/timers/timers-howto.rst for the thresholds */
1101     if (usecs > 20 * USEC_PER_MSEC)
1102         schedule_timeout_idle(usecs_to_jiffies(usecs));
1103     else
1104         usleep_idle_range(usecs, usecs + 1);
1105 }
1106
1107 /* Returns negative error code if it's not activated but should return */
1108 static int kdamond_wait_activation(struct damon_ctx *ctx)
1109 {
1110     struct damos *s;
1111     unsigned long wait_time;
1112     unsigned long min_wait_time = 0;
1113     bool init_wait_time = false;
1114
1115     while (!kdamond_need_stop(ctx)) {
1116         damon_for_each_scheme(s, ctx) {
1117             wait_time = damos_wmark_wait_us(s);
1118             if (!init_wait_time || wait_time < min_wait_time) {
1119                 init_wait_time = true;
1120                 min_wait_time = wait_time;
1121             }
1122         }
1123         if (!min_wait_time)
1124             return 0;
1125
1126         kdamond_usleep(min_wait_time);
1127
1128         if (ctx->callback.after_wmarks_check &&
1129                 ctx->callback.after_wmarks_check(ctx))
1130             break;
1131     }
1132     return -EBUSY;
1133 }
1134
1135 /*
1136  * The monitoring daemon that runs as a kernel thread
1137  */
1138 static int kdamond_fn(void *data)
1139 {
1140     struct damon_ctx *ctx = data;
1141     struct damon_target *t;
1142     struct damon_region *r, *next;
1143     unsigned int max_nr_accesses = 0;
1144     unsigned long sz_limit = 0;
1145     bool done = false;
1146
1147     pr_debug("kdamond (%d) starts\n", current->pid);
1148
1149     if (ctx->ops.init)
1150         ctx->ops.init(ctx);
1151     if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1152         done = true;
1153
1154     sz_limit = damon_region_sz_limit(ctx);
1155
1156     while (!kdamond_need_stop(ctx) && !done) {
1157         if (kdamond_wait_activation(ctx)) {
1158             done = true;
1159             continue;
1160         }
1161
1162         if (ctx->ops.prepare_access_checks)
1163             ctx->ops.prepare_access_checks(ctx);
1164         if (ctx->callback.after_sampling &&
1165                 ctx->callback.after_sampling(ctx)) {
1166             done = true;
1167             continue;
1168         }
1169
1170         kdamond_usleep(ctx->sample_interval);
1171
1172         if (ctx->ops.check_accesses)
1173             max_nr_accesses = ctx->ops.check_accesses(ctx);
1174
1175         if (kdamond_aggregate_interval_passed(ctx)) {
1176             kdamond_merge_regions(ctx,
1177                     max_nr_accesses / 10,
1178                     sz_limit);
1179             if (ctx->callback.after_aggregation &&
1180                     ctx->callback.after_aggregation(ctx)) {
1181                 done = true;
1182                 continue;
1183             }
1184             kdamond_apply_schemes(ctx);
1185             kdamond_reset_aggregated(ctx);
1186             kdamond_split_regions(ctx);
1187             if (ctx->ops.reset_aggregated)
1188                 ctx->ops.reset_aggregated(ctx);
1189         }
1190
1191         if (kdamond_need_update_operations(ctx)) {
1192             if (ctx->ops.update)
1193                 ctx->ops.update(ctx);
1194             sz_limit = damon_region_sz_limit(ctx);
1195         }
1196     }
1197     damon_for_each_target(t, ctx) {
1198         damon_for_each_region_safe(r, next, t)
1199             damon_destroy_region(r, t);
1200     }
1201
1202     if (ctx->callback.before_terminate)
1203         ctx->callback.before_terminate(ctx);
1204     if (ctx->ops.cleanup)
1205         ctx->ops.cleanup(ctx);
1206
1207     pr_debug("kdamond (%d) finishes\n", current->pid);
1208     mutex_lock(&ctx->kdamond_lock);
1209     ctx->kdamond = NULL;
1210     mutex_unlock(&ctx->kdamond_lock);
1211
1212     mutex_lock(&damon_lock);
1213     nr_running_ctxs--;
1214     if (!nr_running_ctxs && running_exclusive_ctxs)
1215         running_exclusive_ctxs = false;
1216     mutex_unlock(&damon_lock);
1217
1218     return 0;
1219 }
1220
1221 #include "core-test.h"