Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * DAMON-based page reclamation
0004  *
0005  * Author: SeongJae Park <sj@kernel.org>
0006  */
0007 
0008 #define pr_fmt(fmt) "damon-reclaim: " fmt
0009 
0010 #include <linux/damon.h>
0011 #include <linux/ioport.h>
0012 #include <linux/module.h>
0013 #include <linux/sched.h>
0014 #include <linux/workqueue.h>
0015 
0016 #ifdef MODULE_PARAM_PREFIX
0017 #undef MODULE_PARAM_PREFIX
0018 #endif
0019 #define MODULE_PARAM_PREFIX "damon_reclaim."
0020 
0021 /*
0022  * Enable or disable DAMON_RECLAIM.
0023  *
0024  * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
0025  * Setting it as ``N`` disables DAMON_RECLAIM.  Note that DAMON_RECLAIM could
0026  * do no real monitoring and reclamation due to the watermarks-based activation
0027  * condition.  Refer to below descriptions for the watermarks parameter for
0028  * this.
0029  */
0030 static bool enabled __read_mostly;
0031 
0032 /*
0033  * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``.
0034  *
0035  * Input parameters that updated while DAMON_RECLAIM is running are not applied
0036  * by default.  Once this parameter is set as ``Y``, DAMON_RECLAIM reads values
0037  * of parametrs except ``enabled`` again.  Once the re-reading is done, this
0038  * parameter is set as ``N``.  If invalid parameters are found while the
0039  * re-reading, DAMON_RECLAIM will be disabled.
0040  */
0041 static bool commit_inputs __read_mostly;
0042 module_param(commit_inputs, bool, 0600);
0043 
0044 /*
0045  * Time threshold for cold memory regions identification in microseconds.
0046  *
0047  * If a memory region is not accessed for this or longer time, DAMON_RECLAIM
0048  * identifies the region as cold, and reclaims.  120 seconds by default.
0049  */
0050 static unsigned long min_age __read_mostly = 120000000;
0051 module_param(min_age, ulong, 0600);
0052 
0053 /*
0054  * Limit of time for trying the reclamation in milliseconds.
0055  *
0056  * DAMON_RECLAIM tries to use only up to this time within a time window
0057  * (quota_reset_interval_ms) for trying reclamation of cold pages.  This can be
0058  * used for limiting CPU consumption of DAMON_RECLAIM.  If the value is zero,
0059  * the limit is disabled.
0060  *
0061  * 10 ms by default.
0062  */
0063 static unsigned long quota_ms __read_mostly = 10;
0064 module_param(quota_ms, ulong, 0600);
0065 
0066 /*
0067  * Limit of size of memory for the reclamation in bytes.
0068  *
0069  * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a
0070  * time window (quota_reset_interval_ms) and makes no more than this limit is
0071  * tried.  This can be used for limiting consumption of CPU and IO.  If this
0072  * value is zero, the limit is disabled.
0073  *
0074  * 128 MiB by default.
0075  */
0076 static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024;
0077 module_param(quota_sz, ulong, 0600);
0078 
0079 /*
0080  * The time/size quota charge reset interval in milliseconds.
0081  *
0082  * The charge reset interval for the quota of time (quota_ms) and size
0083  * (quota_sz).  That is, DAMON_RECLAIM does not try reclamation for more than
0084  * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms
0085  * milliseconds.
0086  *
0087  * 1 second by default.
0088  */
0089 static unsigned long quota_reset_interval_ms __read_mostly = 1000;
0090 module_param(quota_reset_interval_ms, ulong, 0600);
0091 
0092 /*
0093  * The watermarks check time interval in microseconds.
0094  *
0095  * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is
0096  * enabled but inactive due to its watermarks rule.  5 seconds by default.
0097  */
0098 static unsigned long wmarks_interval __read_mostly = 5000000;
0099 module_param(wmarks_interval, ulong, 0600);
0100 
0101 /*
0102  * Free memory rate (per thousand) for the high watermark.
0103  *
0104  * If free memory of the system in bytes per thousand bytes is higher than
0105  * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically
0106  * checks the watermarks.  500 (50%) by default.
0107  */
0108 static unsigned long wmarks_high __read_mostly = 500;
0109 module_param(wmarks_high, ulong, 0600);
0110 
0111 /*
0112  * Free memory rate (per thousand) for the middle watermark.
0113  *
0114  * If free memory of the system in bytes per thousand bytes is between this and
0115  * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring
0116  * and the reclaiming.  400 (40%) by default.
0117  */
0118 static unsigned long wmarks_mid __read_mostly = 400;
0119 module_param(wmarks_mid, ulong, 0600);
0120 
0121 /*
0122  * Free memory rate (per thousand) for the low watermark.
0123  *
0124  * If free memory of the system in bytes per thousand bytes is lower than this,
0125  * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks
0126  * the watermarks.  In the case, the system falls back to the LRU-based page
0127  * granularity reclamation logic.  200 (20%) by default.
0128  */
0129 static unsigned long wmarks_low __read_mostly = 200;
0130 module_param(wmarks_low, ulong, 0600);
0131 
0132 /*
0133  * Sampling interval for the monitoring in microseconds.
0134  *
0135  * The sampling interval of DAMON for the cold memory monitoring.  Please refer
0136  * to the DAMON documentation for more detail.  5 ms by default.
0137  */
0138 static unsigned long sample_interval __read_mostly = 5000;
0139 module_param(sample_interval, ulong, 0600);
0140 
0141 /*
0142  * Aggregation interval for the monitoring in microseconds.
0143  *
0144  * The aggregation interval of DAMON for the cold memory monitoring.  Please
0145  * refer to the DAMON documentation for more detail.  100 ms by default.
0146  */
0147 static unsigned long aggr_interval __read_mostly = 100000;
0148 module_param(aggr_interval, ulong, 0600);
0149 
0150 /*
0151  * Minimum number of monitoring regions.
0152  *
0153  * The minimal number of monitoring regions of DAMON for the cold memory
0154  * monitoring.  This can be used to set lower-bound of the monitoring quality.
0155  * But, setting this too high could result in increased monitoring overhead.
0156  * Please refer to the DAMON documentation for more detail.  10 by default.
0157  */
0158 static unsigned long min_nr_regions __read_mostly = 10;
0159 module_param(min_nr_regions, ulong, 0600);
0160 
0161 /*
0162  * Maximum number of monitoring regions.
0163  *
0164  * The maximum number of monitoring regions of DAMON for the cold memory
0165  * monitoring.  This can be used to set upper-bound of the monitoring overhead.
0166  * However, setting this too low could result in bad monitoring quality.
0167  * Please refer to the DAMON documentation for more detail.  1000 by default.
0168  */
0169 static unsigned long max_nr_regions __read_mostly = 1000;
0170 module_param(max_nr_regions, ulong, 0600);
0171 
0172 /*
0173  * Start of the target memory region in physical address.
0174  *
0175  * The start physical address of memory region that DAMON_RECLAIM will do work
0176  * against.  By default, biggest System RAM is used as the region.
0177  */
0178 static unsigned long monitor_region_start __read_mostly;
0179 module_param(monitor_region_start, ulong, 0600);
0180 
0181 /*
0182  * End of the target memory region in physical address.
0183  *
0184  * The end physical address of memory region that DAMON_RECLAIM will do work
0185  * against.  By default, biggest System RAM is used as the region.
0186  */
0187 static unsigned long monitor_region_end __read_mostly;
0188 module_param(monitor_region_end, ulong, 0600);
0189 
0190 /*
0191  * PID of the DAMON thread
0192  *
0193  * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
0194  * Else, -1.
0195  */
0196 static int kdamond_pid __read_mostly = -1;
0197 module_param(kdamond_pid, int, 0400);
0198 
0199 /*
0200  * Number of memory regions that tried to be reclaimed.
0201  */
0202 static unsigned long nr_reclaim_tried_regions __read_mostly;
0203 module_param(nr_reclaim_tried_regions, ulong, 0400);
0204 
0205 /*
0206  * Total bytes of memory regions that tried to be reclaimed.
0207  */
0208 static unsigned long bytes_reclaim_tried_regions __read_mostly;
0209 module_param(bytes_reclaim_tried_regions, ulong, 0400);
0210 
0211 /*
0212  * Number of memory regions that successfully be reclaimed.
0213  */
0214 static unsigned long nr_reclaimed_regions __read_mostly;
0215 module_param(nr_reclaimed_regions, ulong, 0400);
0216 
0217 /*
0218  * Total bytes of memory regions that successfully be reclaimed.
0219  */
0220 static unsigned long bytes_reclaimed_regions __read_mostly;
0221 module_param(bytes_reclaimed_regions, ulong, 0400);
0222 
0223 /*
0224  * Number of times that the time/space quota limits have exceeded
0225  */
0226 static unsigned long nr_quota_exceeds __read_mostly;
0227 module_param(nr_quota_exceeds, ulong, 0400);
0228 
0229 static struct damon_ctx *ctx;
0230 static struct damon_target *target;
0231 
0232 struct damon_reclaim_ram_walk_arg {
0233     unsigned long start;
0234     unsigned long end;
0235 };
0236 
0237 static int walk_system_ram(struct resource *res, void *arg)
0238 {
0239     struct damon_reclaim_ram_walk_arg *a = arg;
0240 
0241     if (a->end - a->start < resource_size(res)) {
0242         a->start = res->start;
0243         a->end = res->end;
0244     }
0245     return 0;
0246 }
0247 
0248 /*
0249  * Find biggest 'System RAM' resource and store its start and end address in
0250  * @start and @end, respectively.  If no System RAM is found, returns false.
0251  */
0252 static bool get_monitoring_region(unsigned long *start, unsigned long *end)
0253 {
0254     struct damon_reclaim_ram_walk_arg arg = {};
0255 
0256     walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
0257     if (arg.end <= arg.start)
0258         return false;
0259 
0260     *start = arg.start;
0261     *end = arg.end;
0262     return true;
0263 }
0264 
0265 static struct damos *damon_reclaim_new_scheme(void)
0266 {
0267     struct damos_watermarks wmarks = {
0268         .metric = DAMOS_WMARK_FREE_MEM_RATE,
0269         .interval = wmarks_interval,
0270         .high = wmarks_high,
0271         .mid = wmarks_mid,
0272         .low = wmarks_low,
0273     };
0274     struct damos_quota quota = {
0275         /*
0276          * Do not try reclamation for more than quota_ms milliseconds
0277          * or quota_sz bytes within quota_reset_interval_ms.
0278          */
0279         .ms = quota_ms,
0280         .sz = quota_sz,
0281         .reset_interval = quota_reset_interval_ms,
0282         /* Within the quota, page out older regions first. */
0283         .weight_sz = 0,
0284         .weight_nr_accesses = 0,
0285         .weight_age = 1
0286     };
0287     struct damos *scheme = damon_new_scheme(
0288             /* Find regions having PAGE_SIZE or larger size */
0289             PAGE_SIZE, ULONG_MAX,
0290             /* and not accessed at all */
0291             0, 0,
0292             /* for min_age or more micro-seconds, and */
0293             min_age / aggr_interval, UINT_MAX,
0294             /* page out those, as soon as found */
0295             DAMOS_PAGEOUT,
0296             /* under the quota. */
0297             &quota,
0298             /* (De)activate this according to the watermarks. */
0299             &wmarks);
0300 
0301     return scheme;
0302 }
0303 
0304 static int damon_reclaim_apply_parameters(void)
0305 {
0306     struct damos *scheme;
0307     struct damon_addr_range addr_range;
0308     int err = 0;
0309 
0310     err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
0311             min_nr_regions, max_nr_regions);
0312     if (err)
0313         return err;
0314 
0315     /* Will be freed by next 'damon_set_schemes()' below */
0316     scheme = damon_reclaim_new_scheme();
0317     if (!scheme)
0318         return -ENOMEM;
0319     err = damon_set_schemes(ctx, &scheme, 1);
0320     if (err)
0321         return err;
0322 
0323     if (monitor_region_start > monitor_region_end)
0324         return -EINVAL;
0325     if (!monitor_region_start && !monitor_region_end &&
0326             !get_monitoring_region(&monitor_region_start,
0327                 &monitor_region_end))
0328         return -EINVAL;
0329     addr_range.start = monitor_region_start;
0330     addr_range.end = monitor_region_end;
0331     return damon_set_regions(target, &addr_range, 1);
0332 }
0333 
0334 static int damon_reclaim_turn(bool on)
0335 {
0336     int err;
0337 
0338     if (!on) {
0339         err = damon_stop(&ctx, 1);
0340         if (!err)
0341             kdamond_pid = -1;
0342         return err;
0343     }
0344 
0345     err = damon_reclaim_apply_parameters();
0346     if (err)
0347         return err;
0348 
0349     err = damon_start(&ctx, 1, true);
0350     if (err)
0351         return err;
0352     kdamond_pid = ctx->kdamond->pid;
0353     return 0;
0354 }
0355 
0356 static struct delayed_work damon_reclaim_timer;
0357 static void damon_reclaim_timer_fn(struct work_struct *work)
0358 {
0359     static bool last_enabled;
0360     bool now_enabled;
0361 
0362     now_enabled = enabled;
0363     if (last_enabled != now_enabled) {
0364         if (!damon_reclaim_turn(now_enabled))
0365             last_enabled = now_enabled;
0366         else
0367             enabled = last_enabled;
0368     }
0369 }
0370 static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
0371 
0372 static bool damon_reclaim_initialized;
0373 
0374 static int damon_reclaim_enabled_store(const char *val,
0375         const struct kernel_param *kp)
0376 {
0377     int rc = param_set_bool(val, kp);
0378 
0379     if (rc < 0)
0380         return rc;
0381 
0382     /* system_wq might not initialized yet */
0383     if (!damon_reclaim_initialized)
0384         return rc;
0385 
0386     schedule_delayed_work(&damon_reclaim_timer, 0);
0387     return 0;
0388 }
0389 
0390 static const struct kernel_param_ops enabled_param_ops = {
0391     .set = damon_reclaim_enabled_store,
0392     .get = param_get_bool,
0393 };
0394 
0395 module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
0396 MODULE_PARM_DESC(enabled,
0397     "Enable or disable DAMON_RECLAIM (default: disabled)");
0398 
0399 static int damon_reclaim_handle_commit_inputs(void)
0400 {
0401     int err;
0402 
0403     if (!commit_inputs)
0404         return 0;
0405 
0406     err = damon_reclaim_apply_parameters();
0407     commit_inputs = false;
0408     return err;
0409 }
0410 
0411 static int damon_reclaim_after_aggregation(struct damon_ctx *c)
0412 {
0413     struct damos *s;
0414 
0415     /* update the stats parameter */
0416     damon_for_each_scheme(s, c) {
0417         nr_reclaim_tried_regions = s->stat.nr_tried;
0418         bytes_reclaim_tried_regions = s->stat.sz_tried;
0419         nr_reclaimed_regions = s->stat.nr_applied;
0420         bytes_reclaimed_regions = s->stat.sz_applied;
0421         nr_quota_exceeds = s->stat.qt_exceeds;
0422     }
0423 
0424     return damon_reclaim_handle_commit_inputs();
0425 }
0426 
0427 static int damon_reclaim_after_wmarks_check(struct damon_ctx *c)
0428 {
0429     return damon_reclaim_handle_commit_inputs();
0430 }
0431 
0432 static int __init damon_reclaim_init(void)
0433 {
0434     ctx = damon_new_ctx();
0435     if (!ctx)
0436         return -ENOMEM;
0437 
0438     if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
0439         damon_destroy_ctx(ctx);
0440         return -EINVAL;
0441     }
0442 
0443     ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
0444     ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
0445 
0446     target = damon_new_target();
0447     if (!target) {
0448         damon_destroy_ctx(ctx);
0449         return -ENOMEM;
0450     }
0451     damon_add_target(ctx, target);
0452 
0453     schedule_delayed_work(&damon_reclaim_timer, 0);
0454 
0455     damon_reclaim_initialized = true;
0456     return 0;
0457 }
0458 
0459 module_init(damon_reclaim_init);