0001
0002
0003
0004
0005 #include <linux/mm.h>
0006 #include <linux/gfp.h>
0007 #include <linux/ras.h>
0008 #include <linux/kernel.h>
0009 #include <linux/workqueue.h>
0010
0011 #include <asm/mce.h>
0012
0013 #include "debugfs.h"
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063 #undef pr_fmt
0064 #define pr_fmt(fmt) "RAS: " fmt
0065
0066
0067
0068
0069
0070 #define DECAY_BITS 2
0071 #define DECAY_MASK ((1ULL << DECAY_BITS) - 1)
0072 #define MAX_ELEMS (PAGE_SIZE / sizeof(u64))
0073
0074
0075
0076
0077
0078 #define CLEAN_ELEMS (MAX_ELEMS >> DECAY_BITS)
0079
0080
0081 #define COUNT_BITS (PAGE_SHIFT - DECAY_BITS)
0082 #define COUNT_MASK ((1ULL << COUNT_BITS) - 1)
0083 #define FULL_COUNT_MASK (PAGE_SIZE - 1)
0084
0085
0086
0087
0088
0089 #define PFN(e) ((e) >> PAGE_SHIFT)
0090 #define DECAY(e) (((e) >> COUNT_BITS) & DECAY_MASK)
0091 #define COUNT(e) ((unsigned int)(e) & COUNT_MASK)
0092 #define FULL_COUNT(e) ((e) & (PAGE_SIZE - 1))
0093
0094 static struct ce_array {
0095 u64 *array;
0096 unsigned int n;
0097
0098 unsigned int decay_count;
0099
0100
0101
0102
0103 u64 pfns_poisoned;
0104
0105
0106
0107 u64 ces_entered;
0108
0109
0110
0111
0112 u64 decays_done;
0113
0114
0115
0116 union {
0117 struct {
0118 __u32 disabled : 1,
0119 __resv : 31;
0120 };
0121 __u32 flags;
0122 };
0123 } ce_arr;
0124
0125 static DEFINE_MUTEX(ce_mutex);
0126 static u64 dfs_pfn;
0127
0128
0129 static u64 action_threshold = COUNT_MASK;
0130
0131
0132 #define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60
0133 #define CEC_DECAY_MIN_INTERVAL 1 * 60 * 60
0134 #define CEC_DECAY_MAX_INTERVAL 30 * 24 * 60 * 60
0135 static struct delayed_work cec_work;
0136 static u64 decay_interval = CEC_DECAY_DEFAULT_INTERVAL;
0137
0138
0139
0140
0141
0142 static void do_spring_cleaning(struct ce_array *ca)
0143 {
0144 int i;
0145
0146 for (i = 0; i < ca->n; i++) {
0147 u8 decay = DECAY(ca->array[i]);
0148
0149 if (!decay)
0150 continue;
0151
0152 decay--;
0153
0154 ca->array[i] &= ~(DECAY_MASK << COUNT_BITS);
0155 ca->array[i] |= (decay << COUNT_BITS);
0156 }
0157 ca->decay_count = 0;
0158 ca->decays_done++;
0159 }
0160
0161
0162
0163
0164 static void cec_mod_work(unsigned long interval)
0165 {
0166 unsigned long iv;
0167
0168 iv = interval * HZ;
0169 mod_delayed_work(system_wq, &cec_work, round_jiffies(iv));
0170 }
0171
0172 static void cec_work_fn(struct work_struct *work)
0173 {
0174 mutex_lock(&ce_mutex);
0175 do_spring_cleaning(&ce_arr);
0176 mutex_unlock(&ce_mutex);
0177
0178 cec_mod_work(decay_interval);
0179 }
0180
0181
0182
0183
0184
0185
0186 static int __find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
0187 {
0188 int min = 0, max = ca->n - 1;
0189 u64 this_pfn;
0190
0191 while (min <= max) {
0192 int i = (min + max) >> 1;
0193
0194 this_pfn = PFN(ca->array[i]);
0195
0196 if (this_pfn < pfn)
0197 min = i + 1;
0198 else if (this_pfn > pfn)
0199 max = i - 1;
0200 else if (this_pfn == pfn) {
0201 if (to)
0202 *to = i;
0203
0204 return i;
0205 }
0206 }
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217 if (to)
0218 *to = min;
0219
0220 return -ENOKEY;
0221 }
0222
0223 static int find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
0224 {
0225 WARN_ON(!to);
0226
0227 if (!ca->n) {
0228 *to = 0;
0229 return -ENOKEY;
0230 }
0231 return __find_elem(ca, pfn, to);
0232 }
0233
0234 static void del_elem(struct ce_array *ca, int idx)
0235 {
0236
0237 if (ca->n - (idx + 1))
0238 memmove((void *)&ca->array[idx],
0239 (void *)&ca->array[idx + 1],
0240 (ca->n - (idx + 1)) * sizeof(u64));
0241
0242 ca->n--;
0243 }
0244
0245 static u64 del_lru_elem_unlocked(struct ce_array *ca)
0246 {
0247 unsigned int min = FULL_COUNT_MASK;
0248 int i, min_idx = 0;
0249
0250 for (i = 0; i < ca->n; i++) {
0251 unsigned int this = FULL_COUNT(ca->array[i]);
0252
0253 if (min > this) {
0254 min = this;
0255 min_idx = i;
0256 }
0257 }
0258
0259 del_elem(ca, min_idx);
0260
0261 return PFN(ca->array[min_idx]);
0262 }
0263
0264
0265
0266
0267
0268 static u64 __maybe_unused del_lru_elem(void)
0269 {
0270 struct ce_array *ca = &ce_arr;
0271 u64 pfn;
0272
0273 if (!ca->n)
0274 return 0;
0275
0276 mutex_lock(&ce_mutex);
0277 pfn = del_lru_elem_unlocked(ca);
0278 mutex_unlock(&ce_mutex);
0279
0280 return pfn;
0281 }
0282
0283 static bool sanity_check(struct ce_array *ca)
0284 {
0285 bool ret = false;
0286 u64 prev = 0;
0287 int i;
0288
0289 for (i = 0; i < ca->n; i++) {
0290 u64 this = PFN(ca->array[i]);
0291
0292 if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this))
0293 ret = true;
0294
0295 prev = this;
0296 }
0297
0298 if (!ret)
0299 return ret;
0300
0301 pr_info("Sanity check dump:\n{ n: %d\n", ca->n);
0302 for (i = 0; i < ca->n; i++) {
0303 u64 this = PFN(ca->array[i]);
0304
0305 pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
0306 }
0307 pr_info("}\n");
0308
0309 return ret;
0310 }
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321 static int cec_add_elem(u64 pfn)
0322 {
0323 struct ce_array *ca = &ce_arr;
0324 int count, err, ret = 0;
0325 unsigned int to = 0;
0326
0327
0328
0329
0330
0331 if (!ce_arr.array || ce_arr.disabled)
0332 return -ENODEV;
0333
0334 mutex_lock(&ce_mutex);
0335
0336 ca->ces_entered++;
0337
0338
0339 if (ca->n == MAX_ELEMS)
0340 WARN_ON(!del_lru_elem_unlocked(ca));
0341
0342 err = find_elem(ca, pfn, &to);
0343 if (err < 0) {
0344
0345
0346
0347 memmove((void *)&ca->array[to + 1],
0348 (void *)&ca->array[to],
0349 (ca->n - to) * sizeof(u64));
0350
0351 ca->array[to] = pfn << PAGE_SHIFT;
0352 ca->n++;
0353 }
0354
0355
0356 ca->array[to] |= DECAY_MASK << COUNT_BITS;
0357 ca->array[to]++;
0358
0359
0360 count = COUNT(ca->array[to]);
0361 if (count >= action_threshold) {
0362 u64 pfn = ca->array[to] >> PAGE_SHIFT;
0363
0364 if (!pfn_valid(pfn)) {
0365 pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
0366 } else {
0367
0368 pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
0369 memory_failure_queue(pfn, MF_SOFT_OFFLINE);
0370 ca->pfns_poisoned++;
0371 }
0372
0373 del_elem(ca, to);
0374
0375
0376
0377
0378
0379 ret = 1;
0380
0381 goto unlock;
0382 }
0383
0384 ca->decay_count++;
0385
0386 if (ca->decay_count >= CLEAN_ELEMS)
0387 do_spring_cleaning(ca);
0388
0389 WARN_ON_ONCE(sanity_check(ca));
0390
0391 unlock:
0392 mutex_unlock(&ce_mutex);
0393
0394 return ret;
0395 }
0396
0397 static int u64_get(void *data, u64 *val)
0398 {
0399 *val = *(u64 *)data;
0400
0401 return 0;
0402 }
0403
0404 static int pfn_set(void *data, u64 val)
0405 {
0406 *(u64 *)data = val;
0407
0408 cec_add_elem(val);
0409
0410 return 0;
0411 }
0412
0413 DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
0414
0415 static int decay_interval_set(void *data, u64 val)
0416 {
0417 if (val < CEC_DECAY_MIN_INTERVAL)
0418 return -EINVAL;
0419
0420 if (val > CEC_DECAY_MAX_INTERVAL)
0421 return -EINVAL;
0422
0423 *(u64 *)data = val;
0424 decay_interval = val;
0425
0426 cec_mod_work(decay_interval);
0427
0428 return 0;
0429 }
0430 DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
0431
0432 static int action_threshold_set(void *data, u64 val)
0433 {
0434 *(u64 *)data = val;
0435
0436 if (val > COUNT_MASK)
0437 val = COUNT_MASK;
0438
0439 action_threshold = val;
0440
0441 return 0;
0442 }
0443 DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n");
0444
0445 static const char * const bins[] = { "00", "01", "10", "11" };
0446
0447 static int array_show(struct seq_file *m, void *v)
0448 {
0449 struct ce_array *ca = &ce_arr;
0450 int i;
0451
0452 mutex_lock(&ce_mutex);
0453
0454 seq_printf(m, "{ n: %d\n", ca->n);
0455 for (i = 0; i < ca->n; i++) {
0456 u64 this = PFN(ca->array[i]);
0457
0458 seq_printf(m, " %3d: [%016llx|%s|%03llx]\n",
0459 i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i]));
0460 }
0461
0462 seq_printf(m, "}\n");
0463
0464 seq_printf(m, "Stats:\nCEs: %llu\nofflined pages: %llu\n",
0465 ca->ces_entered, ca->pfns_poisoned);
0466
0467 seq_printf(m, "Flags: 0x%x\n", ca->flags);
0468
0469 seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
0470 seq_printf(m, "Decays: %lld\n", ca->decays_done);
0471
0472 seq_printf(m, "Action threshold: %lld\n", action_threshold);
0473
0474 mutex_unlock(&ce_mutex);
0475
0476 return 0;
0477 }
0478
0479 DEFINE_SHOW_ATTRIBUTE(array);
0480
0481 static int __init create_debugfs_nodes(void)
0482 {
0483 struct dentry *d, *pfn, *decay, *count, *array;
0484
0485 d = debugfs_create_dir("cec", ras_debugfs_dir);
0486 if (!d) {
0487 pr_warn("Error creating cec debugfs node!\n");
0488 return -1;
0489 }
0490
0491 decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
0492 &decay_interval, &decay_interval_ops);
0493 if (!decay) {
0494 pr_warn("Error creating decay_interval debugfs node!\n");
0495 goto err;
0496 }
0497
0498 count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d,
0499 &action_threshold, &action_threshold_ops);
0500 if (!count) {
0501 pr_warn("Error creating action_threshold debugfs node!\n");
0502 goto err;
0503 }
0504
0505 if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG))
0506 return 0;
0507
0508 pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
0509 if (!pfn) {
0510 pr_warn("Error creating pfn debugfs node!\n");
0511 goto err;
0512 }
0513
0514 array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops);
0515 if (!array) {
0516 pr_warn("Error creating array debugfs node!\n");
0517 goto err;
0518 }
0519
0520 return 0;
0521
0522 err:
0523 debugfs_remove_recursive(d);
0524
0525 return 1;
0526 }
0527
0528 static int cec_notifier(struct notifier_block *nb, unsigned long val,
0529 void *data)
0530 {
0531 struct mce *m = (struct mce *)data;
0532
0533 if (!m)
0534 return NOTIFY_DONE;
0535
0536
0537 if (mce_is_memory_error(m) &&
0538 mce_is_correctable(m) &&
0539 mce_usable_address(m)) {
0540 if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
0541 m->kflags |= MCE_HANDLED_CEC;
0542 return NOTIFY_OK;
0543 }
0544 }
0545
0546 return NOTIFY_DONE;
0547 }
0548
0549 static struct notifier_block cec_nb = {
0550 .notifier_call = cec_notifier,
0551 .priority = MCE_PRIO_CEC,
0552 };
0553
0554 static int __init cec_init(void)
0555 {
0556 if (ce_arr.disabled)
0557 return -ENODEV;
0558
0559 ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
0560 if (!ce_arr.array) {
0561 pr_err("Error allocating CE array page!\n");
0562 return -ENOMEM;
0563 }
0564
0565 if (create_debugfs_nodes()) {
0566 free_page((unsigned long)ce_arr.array);
0567 return -ENOMEM;
0568 }
0569
0570 INIT_DELAYED_WORK(&cec_work, cec_work_fn);
0571 schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
0572
0573 mce_register_decode_chain(&cec_nb);
0574
0575 pr_info("Correctable Errors collector initialized.\n");
0576 return 0;
0577 }
0578 late_initcall(cec_init);
0579
0580 int __init parse_cec_param(char *str)
0581 {
0582 if (!str)
0583 return 0;
0584
0585 if (*str == '=')
0586 str++;
0587
0588 if (!strcmp(str, "cec_disable"))
0589 ce_arr.disabled = 1;
0590 else
0591 return 0;
0592
0593 return 1;
0594 }