0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/module.h>
0010 #include <linux/acpi.h>
0011 #include <linux/cper.h>
0012 #include <linux/ratelimit.h>
0013 #include <linux/edac.h>
0014 #include <linux/ras.h>
0015 #include <asm/cpu.h>
0016 #include <asm/mce.h>
0017
0018 #include "apei/apei-internal.h"
0019 #include <ras/ras_event.h>
0020
0021 #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0)
0022
0023 #define EXTLOG_DSM_REV 0x0
0024 #define EXTLOG_FN_ADDR 0x1
0025
0026 #define FLAG_OS_OPTIN BIT(0)
0027 #define ELOG_ENTRY_VALID (1ULL<<63)
0028 #define ELOG_ENTRY_LEN 0x1000
0029
0030 #define EMCA_BUG \
0031 "Can not request iomem region <0x%016llx-0x%016llx> - eMCA disabled\n"
0032
0033 struct extlog_l1_head {
0034 u32 ver;
0035 u32 hdr_len;
0036 u64 total_len;
0037 u64 elog_base;
0038 u64 elog_len;
0039 u32 flags;
0040 u8 rev0[12];
0041 u32 entries;
0042 u8 rev1[12];
0043 };
0044
0045 static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
0046
0047
0048 static u64 elog_base;
0049 static size_t elog_size;
0050 static u64 l1_dirbase;
0051 static size_t l1_size;
0052
0053
0054 static void __iomem *extlog_l1_addr;
0055 static void __iomem *elog_addr;
0056
0057 static void *elog_buf;
0058
0059 static u64 *l1_entry_base;
0060 static u32 l1_percpu_entry;
0061
0062 #define ELOG_IDX(cpu, bank) \
0063 (cpu_physical_id(cpu) * l1_percpu_entry + (bank))
0064
0065 #define ELOG_ENTRY_DATA(idx) \
0066 (*(l1_entry_base + (idx)))
0067
0068 #define ELOG_ENTRY_ADDR(phyaddr) \
0069 (phyaddr - elog_base + (u8 *)elog_addr)
0070
0071 static struct acpi_hest_generic_status *extlog_elog_entry_check(int cpu, int bank)
0072 {
0073 int idx;
0074 u64 data;
0075 struct acpi_hest_generic_status *estatus;
0076
0077 WARN_ON(cpu < 0);
0078 idx = ELOG_IDX(cpu, bank);
0079 data = ELOG_ENTRY_DATA(idx);
0080 if ((data & ELOG_ENTRY_VALID) == 0)
0081 return NULL;
0082
0083 data &= EXT_ELOG_ENTRY_MASK;
0084 estatus = (struct acpi_hest_generic_status *)ELOG_ENTRY_ADDR(data);
0085
0086
0087 if (estatus->block_status == 0)
0088 return NULL;
0089
0090 return estatus;
0091 }
0092
0093 static void __print_extlog_rcd(const char *pfx,
0094 struct acpi_hest_generic_status *estatus, int cpu)
0095 {
0096 static atomic_t seqno;
0097 unsigned int curr_seqno;
0098 char pfx_seq[64];
0099
0100 if (!pfx) {
0101 if (estatus->error_severity <= CPER_SEV_CORRECTED)
0102 pfx = KERN_INFO;
0103 else
0104 pfx = KERN_ERR;
0105 }
0106 curr_seqno = atomic_inc_return(&seqno);
0107 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}", pfx, curr_seqno);
0108 printk("%s""Hardware error detected on CPU%d\n", pfx_seq, cpu);
0109 cper_estatus_print(pfx_seq, estatus);
0110 }
0111
0112 static int print_extlog_rcd(const char *pfx,
0113 struct acpi_hest_generic_status *estatus, int cpu)
0114 {
0115
0116 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
0117 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
0118 struct ratelimit_state *ratelimit;
0119
0120 if (estatus->error_severity == CPER_SEV_CORRECTED ||
0121 (estatus->error_severity == CPER_SEV_INFORMATIONAL))
0122 ratelimit = &ratelimit_corrected;
0123 else
0124 ratelimit = &ratelimit_uncorrected;
0125 if (__ratelimit(ratelimit)) {
0126 __print_extlog_rcd(pfx, estatus, cpu);
0127 return 0;
0128 }
0129
0130 return 1;
0131 }
0132
0133 static int extlog_print(struct notifier_block *nb, unsigned long val,
0134 void *data)
0135 {
0136 struct mce *mce = (struct mce *)data;
0137 int bank = mce->bank;
0138 int cpu = mce->extcpu;
0139 struct acpi_hest_generic_status *estatus, *tmp;
0140 struct acpi_hest_generic_data *gdata;
0141 const guid_t *fru_id = &guid_null;
0142 char *fru_text = "";
0143 guid_t *sec_type;
0144 static u32 err_seq;
0145
0146 estatus = extlog_elog_entry_check(cpu, bank);
0147 if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
0148 return NOTIFY_DONE;
0149
0150 memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
0151
0152 estatus->block_status = 0;
0153
0154 tmp = (struct acpi_hest_generic_status *)elog_buf;
0155
0156 if (!ras_userspace_consumers()) {
0157 print_extlog_rcd(NULL, tmp, cpu);
0158 goto out;
0159 }
0160
0161
0162 err_seq++;
0163 gdata = (struct acpi_hest_generic_data *)(tmp + 1);
0164 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
0165 fru_id = (guid_t *)gdata->fru_id;
0166 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
0167 fru_text = gdata->fru_text;
0168 sec_type = (guid_t *)gdata->section_type;
0169 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
0170 struct cper_sec_mem_err *mem = (void *)(gdata + 1);
0171 if (gdata->error_data_length >= sizeof(*mem))
0172 trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
0173 (u8)gdata->error_severity);
0174 }
0175
0176 out:
0177 mce->kflags |= MCE_HANDLED_EXTLOG;
0178 return NOTIFY_OK;
0179 }
0180
0181 static bool __init extlog_get_l1addr(void)
0182 {
0183 guid_t guid;
0184 acpi_handle handle;
0185 union acpi_object *obj;
0186
0187 if (guid_parse(extlog_dsm_uuid, &guid))
0188 return false;
0189 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
0190 return false;
0191 if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
0192 return false;
0193 obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
0194 EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
0195 if (!obj) {
0196 return false;
0197 } else {
0198 l1_dirbase = obj->integer.value;
0199 ACPI_FREE(obj);
0200 }
0201
0202
0203 if (l1_dirbase & ((1 << 12) - 1)) {
0204 pr_warn(FW_BUG "L1 Directory is invalid at physical %llx\n",
0205 l1_dirbase);
0206 return false;
0207 }
0208
0209 return true;
0210 }
0211 static struct notifier_block extlog_mce_dec = {
0212 .notifier_call = extlog_print,
0213 .priority = MCE_PRIO_EXTLOG,
0214 };
0215
0216 static int __init extlog_init(void)
0217 {
0218 struct extlog_l1_head *l1_head;
0219 void __iomem *extlog_l1_hdr;
0220 size_t l1_hdr_size;
0221 struct resource *r;
0222 u64 cap;
0223 int rc;
0224
0225 if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) ||
0226 !(cap & MCG_ELOG_P) ||
0227 !extlog_get_l1addr())
0228 return -ENODEV;
0229
0230 rc = -EINVAL;
0231
0232 l1_hdr_size = sizeof(struct extlog_l1_head);
0233 r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR");
0234 if (!r) {
0235 pr_warn(FW_BUG EMCA_BUG,
0236 (unsigned long long)l1_dirbase,
0237 (unsigned long long)l1_dirbase + l1_hdr_size);
0238 goto err;
0239 }
0240
0241 extlog_l1_hdr = acpi_os_map_iomem(l1_dirbase, l1_hdr_size);
0242 l1_head = (struct extlog_l1_head *)extlog_l1_hdr;
0243 l1_size = l1_head->total_len;
0244 l1_percpu_entry = l1_head->entries;
0245 elog_base = l1_head->elog_base;
0246 elog_size = l1_head->elog_len;
0247 acpi_os_unmap_iomem(extlog_l1_hdr, l1_hdr_size);
0248 release_mem_region(l1_dirbase, l1_hdr_size);
0249
0250
0251 r = request_mem_region(l1_dirbase, l1_size, "L1 Table");
0252 if (!r) {
0253 pr_warn(FW_BUG EMCA_BUG,
0254 (unsigned long long)l1_dirbase,
0255 (unsigned long long)l1_dirbase + l1_size);
0256 goto err;
0257 }
0258 extlog_l1_addr = acpi_os_map_iomem(l1_dirbase, l1_size);
0259 l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size);
0260
0261
0262 r = request_mem_region(elog_base, elog_size, "Elog Table");
0263 if (!r) {
0264 pr_warn(FW_BUG EMCA_BUG,
0265 (unsigned long long)elog_base,
0266 (unsigned long long)elog_base + elog_size);
0267 goto err_release_l1_dir;
0268 }
0269 elog_addr = acpi_os_map_iomem(elog_base, elog_size);
0270
0271 rc = -ENOMEM;
0272
0273 elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL);
0274 if (elog_buf == NULL)
0275 goto err_release_elog;
0276
0277 mce_register_decode_chain(&extlog_mce_dec);
0278
0279 ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
0280
0281 return 0;
0282
0283 err_release_elog:
0284 if (elog_addr)
0285 acpi_os_unmap_iomem(elog_addr, elog_size);
0286 release_mem_region(elog_base, elog_size);
0287 err_release_l1_dir:
0288 if (extlog_l1_addr)
0289 acpi_os_unmap_iomem(extlog_l1_addr, l1_size);
0290 release_mem_region(l1_dirbase, l1_size);
0291 err:
0292 pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n");
0293 return rc;
0294 }
0295
0296 static void __exit extlog_exit(void)
0297 {
0298 mce_unregister_decode_chain(&extlog_mce_dec);
0299 ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
0300 if (extlog_l1_addr)
0301 acpi_os_unmap_iomem(extlog_l1_addr, l1_size);
0302 if (elog_addr)
0303 acpi_os_unmap_iomem(elog_addr, elog_size);
0304 release_mem_region(elog_base, elog_size);
0305 release_mem_region(l1_dirbase, l1_size);
0306 kfree(elog_buf);
0307 }
0308
0309 module_init(extlog_init);
0310 module_exit(extlog_exit);
0311
0312 MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>");
0313 MODULE_DESCRIPTION("Extended MCA Error Log Driver");
0314 MODULE_LICENSE("GPL");