Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Intel specific MCE features.
0004  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
0005  * Copyright (C) 2008, 2009 Intel Corporation
0006  * Author: Andi Kleen
0007  */
0008 
0009 #include <linux/gfp.h>
0010 #include <linux/interrupt.h>
0011 #include <linux/percpu.h>
0012 #include <linux/sched.h>
0013 #include <linux/cpumask.h>
0014 #include <asm/apic.h>
0015 #include <asm/cpufeature.h>
0016 #include <asm/intel-family.h>
0017 #include <asm/processor.h>
0018 #include <asm/msr.h>
0019 #include <asm/mce.h>
0020 
0021 #include "internal.h"
0022 
0023 /*
0024  * Support for Intel Correct Machine Check Interrupts. This allows
0025  * the CPU to raise an interrupt when a corrected machine check happened.
0026  * Normally we pick those up using a regular polling timer.
0027  * Also supports reliable discovery of shared banks.
0028  */
0029 
0030 /*
0031  * CMCI can be delivered to multiple cpus that share a machine check bank
0032  * so we need to designate a single cpu to process errors logged in each bank
0033  * in the interrupt handler (otherwise we would have many races and potential
0034  * double reporting of the same error).
0035  * Note that this can change when a cpu is offlined or brought online since
0036  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
0037  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
0038  * this point, cmci_rediscover() kicks in and a different cpu may end up
0039  * taking ownership of some of the shared MCA banks that were previously
0040  * owned by the offlined cpu.
0041  */
0042 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
0043 
0044 /*
0045  * CMCI storm detection backoff counter
0046  *
0047  * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
0048  * encountered an error. If not, we decrement it by one. We signal the end of
0049  * the CMCI storm when it reaches 0.
0050  */
0051 static DEFINE_PER_CPU(int, cmci_backoff_cnt);
0052 
0053 /*
0054  * cmci_discover_lock protects against parallel discovery attempts
0055  * which could race against each other.
0056  */
0057 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
0058 
0059 #define CMCI_THRESHOLD      1
0060 #define CMCI_POLL_INTERVAL  (30 * HZ)
0061 #define CMCI_STORM_INTERVAL (HZ)
0062 #define CMCI_STORM_THRESHOLD    15
0063 
0064 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
0065 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
0066 static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
0067 
0068 enum {
0069     CMCI_STORM_NONE,
0070     CMCI_STORM_ACTIVE,
0071     CMCI_STORM_SUBSIDED,
0072 };
0073 
0074 static atomic_t cmci_storm_on_cpus;
0075 
0076 static int cmci_supported(int *banks)
0077 {
0078     u64 cap;
0079 
0080     if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
0081         return 0;
0082 
0083     /*
0084      * Vendor check is not strictly needed, but the initial
0085      * initialization is vendor keyed and this
0086      * makes sure none of the backdoors are entered otherwise.
0087      */
0088     if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
0089         boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
0090         return 0;
0091 
0092     if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
0093         return 0;
0094     rdmsrl(MSR_IA32_MCG_CAP, cap);
0095     *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
0096     return !!(cap & MCG_CMCI_P);
0097 }
0098 
0099 static bool lmce_supported(void)
0100 {
0101     u64 tmp;
0102 
0103     if (mca_cfg.lmce_disabled)
0104         return false;
0105 
0106     rdmsrl(MSR_IA32_MCG_CAP, tmp);
0107 
0108     /*
0109      * LMCE depends on recovery support in the processor. Hence both
0110      * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
0111      */
0112     if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
0113            (MCG_SER_P | MCG_LMCE_P))
0114         return false;
0115 
0116     /*
0117      * BIOS should indicate support for LMCE by setting bit 20 in
0118      * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
0119      * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
0120      * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
0121      * locks the MSR in the event that it wasn't already locked by BIOS.
0122      */
0123     rdmsrl(MSR_IA32_FEAT_CTL, tmp);
0124     if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
0125         return false;
0126 
0127     return tmp & FEAT_CTL_LMCE_ENABLED;
0128 }
0129 
0130 bool mce_intel_cmci_poll(void)
0131 {
0132     if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
0133         return false;
0134 
0135     /*
0136      * Reset the counter if we've logged an error in the last poll
0137      * during the storm.
0138      */
0139     if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
0140         this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
0141     else
0142         this_cpu_dec(cmci_backoff_cnt);
0143 
0144     return true;
0145 }
0146 
0147 void mce_intel_hcpu_update(unsigned long cpu)
0148 {
0149     if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
0150         atomic_dec(&cmci_storm_on_cpus);
0151 
0152     per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
0153 }
0154 
0155 static void cmci_toggle_interrupt_mode(bool on)
0156 {
0157     unsigned long flags, *owned;
0158     int bank;
0159     u64 val;
0160 
0161     raw_spin_lock_irqsave(&cmci_discover_lock, flags);
0162     owned = this_cpu_ptr(mce_banks_owned);
0163     for_each_set_bit(bank, owned, MAX_NR_BANKS) {
0164         rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
0165 
0166         if (on)
0167             val |= MCI_CTL2_CMCI_EN;
0168         else
0169             val &= ~MCI_CTL2_CMCI_EN;
0170 
0171         wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
0172     }
0173     raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
0174 }
0175 
0176 unsigned long cmci_intel_adjust_timer(unsigned long interval)
0177 {
0178     if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
0179         (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
0180         mce_notify_irq();
0181         return CMCI_STORM_INTERVAL;
0182     }
0183 
0184     switch (__this_cpu_read(cmci_storm_state)) {
0185     case CMCI_STORM_ACTIVE:
0186 
0187         /*
0188          * We switch back to interrupt mode once the poll timer has
0189          * silenced itself. That means no events recorded and the timer
0190          * interval is back to our poll interval.
0191          */
0192         __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
0193         if (!atomic_sub_return(1, &cmci_storm_on_cpus))
0194             pr_notice("CMCI storm subsided: switching to interrupt mode\n");
0195 
0196         fallthrough;
0197 
0198     case CMCI_STORM_SUBSIDED:
0199         /*
0200          * We wait for all CPUs to go back to SUBSIDED state. When that
0201          * happens we switch back to interrupt mode.
0202          */
0203         if (!atomic_read(&cmci_storm_on_cpus)) {
0204             __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
0205             cmci_toggle_interrupt_mode(true);
0206             cmci_recheck();
0207         }
0208         return CMCI_POLL_INTERVAL;
0209     default:
0210 
0211         /* We have shiny weather. Let the poll do whatever it thinks. */
0212         return interval;
0213     }
0214 }
0215 
0216 static bool cmci_storm_detect(void)
0217 {
0218     unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
0219     unsigned long ts = __this_cpu_read(cmci_time_stamp);
0220     unsigned long now = jiffies;
0221     int r;
0222 
0223     if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
0224         return true;
0225 
0226     if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
0227         cnt++;
0228     } else {
0229         cnt = 1;
0230         __this_cpu_write(cmci_time_stamp, now);
0231     }
0232     __this_cpu_write(cmci_storm_cnt, cnt);
0233 
0234     if (cnt <= CMCI_STORM_THRESHOLD)
0235         return false;
0236 
0237     cmci_toggle_interrupt_mode(false);
0238     __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
0239     r = atomic_add_return(1, &cmci_storm_on_cpus);
0240     mce_timer_kick(CMCI_STORM_INTERVAL);
0241     this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
0242 
0243     if (r == 1)
0244         pr_notice("CMCI storm detected: switching to poll mode\n");
0245     return true;
0246 }
0247 
0248 /*
0249  * The interrupt handler. This is called on every event.
0250  * Just call the poller directly to log any events.
0251  * This could in theory increase the threshold under high load,
0252  * but doesn't for now.
0253  */
0254 static void intel_threshold_interrupt(void)
0255 {
0256     if (cmci_storm_detect())
0257         return;
0258 
0259     machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
0260 }
0261 
0262 /*
0263  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
0264  * on this CPU. Use the algorithm recommended in the SDM to discover shared
0265  * banks.
0266  */
0267 static void cmci_discover(int banks)
0268 {
0269     unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
0270     unsigned long flags;
0271     int i;
0272     int bios_wrong_thresh = 0;
0273 
0274     raw_spin_lock_irqsave(&cmci_discover_lock, flags);
0275     for (i = 0; i < banks; i++) {
0276         u64 val;
0277         int bios_zero_thresh = 0;
0278 
0279         if (test_bit(i, owned))
0280             continue;
0281 
0282         /* Skip banks in firmware first mode */
0283         if (test_bit(i, mce_banks_ce_disabled))
0284             continue;
0285 
0286         rdmsrl(MSR_IA32_MCx_CTL2(i), val);
0287 
0288         /* Already owned by someone else? */
0289         if (val & MCI_CTL2_CMCI_EN) {
0290             clear_bit(i, owned);
0291             __clear_bit(i, this_cpu_ptr(mce_poll_banks));
0292             continue;
0293         }
0294 
0295         if (!mca_cfg.bios_cmci_threshold) {
0296             val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
0297             val |= CMCI_THRESHOLD;
0298         } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
0299             /*
0300              * If bios_cmci_threshold boot option was specified
0301              * but the threshold is zero, we'll try to initialize
0302              * it to 1.
0303              */
0304             bios_zero_thresh = 1;
0305             val |= CMCI_THRESHOLD;
0306         }
0307 
0308         val |= MCI_CTL2_CMCI_EN;
0309         wrmsrl(MSR_IA32_MCx_CTL2(i), val);
0310         rdmsrl(MSR_IA32_MCx_CTL2(i), val);
0311 
0312         /* Did the enable bit stick? -- the bank supports CMCI */
0313         if (val & MCI_CTL2_CMCI_EN) {
0314             set_bit(i, owned);
0315             __clear_bit(i, this_cpu_ptr(mce_poll_banks));
0316             /*
0317              * We are able to set thresholds for some banks that
0318              * had a threshold of 0. This means the BIOS has not
0319              * set the thresholds properly or does not work with
0320              * this boot option. Note down now and report later.
0321              */
0322             if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
0323                     (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
0324                 bios_wrong_thresh = 1;
0325         } else {
0326             WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
0327         }
0328     }
0329     raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
0330     if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
0331         pr_info_once(
0332             "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
0333         pr_info_once(
0334             "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
0335     }
0336 }
0337 
0338 /*
0339  * Just in case we missed an event during initialization check
0340  * all the CMCI owned banks.
0341  */
0342 void cmci_recheck(void)
0343 {
0344     unsigned long flags;
0345     int banks;
0346 
0347     if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
0348         return;
0349 
0350     local_irq_save(flags);
0351     machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
0352     local_irq_restore(flags);
0353 }
0354 
0355 /* Caller must hold the lock on cmci_discover_lock */
0356 static void __cmci_disable_bank(int bank)
0357 {
0358     u64 val;
0359 
0360     if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
0361         return;
0362     rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
0363     val &= ~MCI_CTL2_CMCI_EN;
0364     wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
0365     __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
0366 }
0367 
0368 /*
0369  * Disable CMCI on this CPU for all banks it owns when it goes down.
0370  * This allows other CPUs to claim the banks on rediscovery.
0371  */
0372 void cmci_clear(void)
0373 {
0374     unsigned long flags;
0375     int i;
0376     int banks;
0377 
0378     if (!cmci_supported(&banks))
0379         return;
0380     raw_spin_lock_irqsave(&cmci_discover_lock, flags);
0381     for (i = 0; i < banks; i++)
0382         __cmci_disable_bank(i);
0383     raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
0384 }
0385 
0386 static void cmci_rediscover_work_func(void *arg)
0387 {
0388     int banks;
0389 
0390     /* Recheck banks in case CPUs don't all have the same */
0391     if (cmci_supported(&banks))
0392         cmci_discover(banks);
0393 }
0394 
0395 /* After a CPU went down cycle through all the others and rediscover */
0396 void cmci_rediscover(void)
0397 {
0398     int banks;
0399 
0400     if (!cmci_supported(&banks))
0401         return;
0402 
0403     on_each_cpu(cmci_rediscover_work_func, NULL, 1);
0404 }
0405 
0406 /*
0407  * Reenable CMCI on this CPU in case a CPU down failed.
0408  */
0409 void cmci_reenable(void)
0410 {
0411     int banks;
0412     if (cmci_supported(&banks))
0413         cmci_discover(banks);
0414 }
0415 
0416 void cmci_disable_bank(int bank)
0417 {
0418     int banks;
0419     unsigned long flags;
0420 
0421     if (!cmci_supported(&banks))
0422         return;
0423 
0424     raw_spin_lock_irqsave(&cmci_discover_lock, flags);
0425     __cmci_disable_bank(bank);
0426     raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
0427 }
0428 
0429 void intel_init_cmci(void)
0430 {
0431     int banks;
0432 
0433     if (!cmci_supported(&banks))
0434         return;
0435 
0436     mce_threshold_vector = intel_threshold_interrupt;
0437     cmci_discover(banks);
0438     /*
0439      * For CPU #0 this runs with still disabled APIC, but that's
0440      * ok because only the vector is set up. We still do another
0441      * check for the banks later for CPU #0 just to make sure
0442      * to not miss any events.
0443      */
0444     apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
0445     cmci_recheck();
0446 }
0447 
0448 void intel_init_lmce(void)
0449 {
0450     u64 val;
0451 
0452     if (!lmce_supported())
0453         return;
0454 
0455     rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
0456 
0457     if (!(val & MCG_EXT_CTL_LMCE_EN))
0458         wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
0459 }
0460 
0461 void intel_clear_lmce(void)
0462 {
0463     u64 val;
0464 
0465     if (!lmce_supported())
0466         return;
0467 
0468     rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
0469     val &= ~MCG_EXT_CTL_LMCE_EN;
0470     wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
0471 }
0472 
0473 /*
0474  * Enable additional error logs from the integrated
0475  * memory controller on processors that support this.
0476  */
0477 static void intel_imc_init(struct cpuinfo_x86 *c)
0478 {
0479     u64 error_control;
0480 
0481     switch (c->x86_model) {
0482     case INTEL_FAM6_SANDYBRIDGE_X:
0483     case INTEL_FAM6_IVYBRIDGE_X:
0484     case INTEL_FAM6_HASWELL_X:
0485         if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
0486             return;
0487         error_control |= 2;
0488         wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
0489         break;
0490     }
0491 }
0492 
0493 void mce_intel_feature_init(struct cpuinfo_x86 *c)
0494 {
0495     intel_init_cmci();
0496     intel_init_lmce();
0497     intel_imc_init(c);
0498 }
0499 
0500 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
0501 {
0502     intel_clear_lmce();
0503 }
0504 
0505 bool intel_filter_mce(struct mce *m)
0506 {
0507     struct cpuinfo_x86 *c = &boot_cpu_data;
0508 
0509     /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
0510     if ((c->x86 == 6) &&
0511         ((c->x86_model == INTEL_FAM6_HASWELL) ||
0512          (c->x86_model == INTEL_FAM6_HASWELL_L) ||
0513          (c->x86_model == INTEL_FAM6_BROADWELL) ||
0514          (c->x86_model == INTEL_FAM6_HASWELL_G) ||
0515          (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
0516         (m->bank == 0) &&
0517         ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
0518         return true;
0519 
0520     return false;
0521 }