Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Extended Error Log driver
0004  *
0005  * Copyright (C) 2013 Intel Corp.
0006  * Author: Chen, Gong <gong.chen@intel.com>
0007  */
0008 
0009 #include <linux/module.h>
0010 #include <linux/acpi.h>
0011 #include <linux/cper.h>
0012 #include <linux/ratelimit.h>
0013 #include <linux/edac.h>
0014 #include <linux/ras.h>
0015 #include <asm/cpu.h>
0016 #include <asm/mce.h>
0017 
0018 #include "apei/apei-internal.h"
0019 #include <ras/ras_event.h>
0020 
0021 #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
0022 
0023 #define EXTLOG_DSM_REV      0x0
0024 #define EXTLOG_FN_ADDR      0x1
0025 
0026 #define FLAG_OS_OPTIN       BIT(0)
0027 #define ELOG_ENTRY_VALID    (1ULL<<63)
0028 #define ELOG_ENTRY_LEN      0x1000
0029 
0030 #define EMCA_BUG \
0031     "Can not request iomem region <0x%016llx-0x%016llx> - eMCA disabled\n"
0032 
0033 struct extlog_l1_head {
0034     u32 ver;    /* Header Version */
0035     u32 hdr_len;    /* Header Length */
0036     u64 total_len;  /* entire L1 Directory length including this header */
0037     u64 elog_base;  /* MCA Error Log Directory base address */
0038     u64 elog_len;   /* MCA Error Log Directory length */
0039     u32 flags;  /* bit 0 - OS/VMM Opt-in */
0040     u8  rev0[12];
0041     u32 entries;    /* Valid L1 Directory entries per logical processor */
0042     u8  rev1[12];
0043 };
0044 
0045 static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
0046 
0047 /* L1 table related physical address */
0048 static u64 elog_base;
0049 static size_t elog_size;
0050 static u64 l1_dirbase;
0051 static size_t l1_size;
0052 
0053 /* L1 table related virtual address */
0054 static void __iomem *extlog_l1_addr;
0055 static void __iomem *elog_addr;
0056 
0057 static void *elog_buf;
0058 
0059 static u64 *l1_entry_base;
0060 static u32 l1_percpu_entry;
0061 
0062 #define ELOG_IDX(cpu, bank) \
0063     (cpu_physical_id(cpu) * l1_percpu_entry + (bank))
0064 
0065 #define ELOG_ENTRY_DATA(idx) \
0066     (*(l1_entry_base + (idx)))
0067 
0068 #define ELOG_ENTRY_ADDR(phyaddr) \
0069     (phyaddr - elog_base + (u8 *)elog_addr)
0070 
0071 static struct acpi_hest_generic_status *extlog_elog_entry_check(int cpu, int bank)
0072 {
0073     int idx;
0074     u64 data;
0075     struct acpi_hest_generic_status *estatus;
0076 
0077     WARN_ON(cpu < 0);
0078     idx = ELOG_IDX(cpu, bank);
0079     data = ELOG_ENTRY_DATA(idx);
0080     if ((data & ELOG_ENTRY_VALID) == 0)
0081         return NULL;
0082 
0083     data &= EXT_ELOG_ENTRY_MASK;
0084     estatus = (struct acpi_hest_generic_status *)ELOG_ENTRY_ADDR(data);
0085 
0086     /* if no valid data in elog entry, just return */
0087     if (estatus->block_status == 0)
0088         return NULL;
0089 
0090     return estatus;
0091 }
0092 
0093 static void __print_extlog_rcd(const char *pfx,
0094                    struct acpi_hest_generic_status *estatus, int cpu)
0095 {
0096     static atomic_t seqno;
0097     unsigned int curr_seqno;
0098     char pfx_seq[64];
0099 
0100     if (!pfx) {
0101         if (estatus->error_severity <= CPER_SEV_CORRECTED)
0102             pfx = KERN_INFO;
0103         else
0104             pfx = KERN_ERR;
0105     }
0106     curr_seqno = atomic_inc_return(&seqno);
0107     snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}", pfx, curr_seqno);
0108     printk("%s""Hardware error detected on CPU%d\n", pfx_seq, cpu);
0109     cper_estatus_print(pfx_seq, estatus);
0110 }
0111 
0112 static int print_extlog_rcd(const char *pfx,
0113                 struct acpi_hest_generic_status *estatus, int cpu)
0114 {
0115     /* Not more than 2 messages every 5 seconds */
0116     static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
0117     static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
0118     struct ratelimit_state *ratelimit;
0119 
0120     if (estatus->error_severity == CPER_SEV_CORRECTED ||
0121         (estatus->error_severity == CPER_SEV_INFORMATIONAL))
0122         ratelimit = &ratelimit_corrected;
0123     else
0124         ratelimit = &ratelimit_uncorrected;
0125     if (__ratelimit(ratelimit)) {
0126         __print_extlog_rcd(pfx, estatus, cpu);
0127         return 0;
0128     }
0129 
0130     return 1;
0131 }
0132 
0133 static int extlog_print(struct notifier_block *nb, unsigned long val,
0134             void *data)
0135 {
0136     struct mce *mce = (struct mce *)data;
0137     int bank = mce->bank;
0138     int cpu = mce->extcpu;
0139     struct acpi_hest_generic_status *estatus, *tmp;
0140     struct acpi_hest_generic_data *gdata;
0141     const guid_t *fru_id = &guid_null;
0142     char *fru_text = "";
0143     guid_t *sec_type;
0144     static u32 err_seq;
0145 
0146     estatus = extlog_elog_entry_check(cpu, bank);
0147     if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
0148         return NOTIFY_DONE;
0149 
0150     memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
0151     /* clear record status to enable BIOS to update it again */
0152     estatus->block_status = 0;
0153 
0154     tmp = (struct acpi_hest_generic_status *)elog_buf;
0155 
0156     if (!ras_userspace_consumers()) {
0157         print_extlog_rcd(NULL, tmp, cpu);
0158         goto out;
0159     }
0160 
0161     /* log event via trace */
0162     err_seq++;
0163     gdata = (struct acpi_hest_generic_data *)(tmp + 1);
0164     if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
0165         fru_id = (guid_t *)gdata->fru_id;
0166     if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
0167         fru_text = gdata->fru_text;
0168     sec_type = (guid_t *)gdata->section_type;
0169     if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
0170         struct cper_sec_mem_err *mem = (void *)(gdata + 1);
0171         if (gdata->error_data_length >= sizeof(*mem))
0172             trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
0173                            (u8)gdata->error_severity);
0174     }
0175 
0176 out:
0177     mce->kflags |= MCE_HANDLED_EXTLOG;
0178     return NOTIFY_OK;
0179 }
0180 
0181 static bool __init extlog_get_l1addr(void)
0182 {
0183     guid_t guid;
0184     acpi_handle handle;
0185     union acpi_object *obj;
0186 
0187     if (guid_parse(extlog_dsm_uuid, &guid))
0188         return false;
0189     if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
0190         return false;
0191     if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
0192         return false;
0193     obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
0194                       EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
0195     if (!obj) {
0196         return false;
0197     } else {
0198         l1_dirbase = obj->integer.value;
0199         ACPI_FREE(obj);
0200     }
0201 
0202     /* Spec says L1 directory must be 4K aligned, bail out if it isn't */
0203     if (l1_dirbase & ((1 << 12) - 1)) {
0204         pr_warn(FW_BUG "L1 Directory is invalid at physical %llx\n",
0205             l1_dirbase);
0206         return false;
0207     }
0208 
0209     return true;
0210 }
0211 static struct notifier_block extlog_mce_dec = {
0212     .notifier_call  = extlog_print,
0213     .priority   = MCE_PRIO_EXTLOG,
0214 };
0215 
0216 static int __init extlog_init(void)
0217 {
0218     struct extlog_l1_head *l1_head;
0219     void __iomem *extlog_l1_hdr;
0220     size_t l1_hdr_size;
0221     struct resource *r;
0222     u64 cap;
0223     int rc;
0224 
0225     if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) ||
0226         !(cap & MCG_ELOG_P) ||
0227         !extlog_get_l1addr())
0228         return -ENODEV;
0229 
0230     rc = -EINVAL;
0231     /* get L1 header to fetch necessary information */
0232     l1_hdr_size = sizeof(struct extlog_l1_head);
0233     r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR");
0234     if (!r) {
0235         pr_warn(FW_BUG EMCA_BUG,
0236             (unsigned long long)l1_dirbase,
0237             (unsigned long long)l1_dirbase + l1_hdr_size);
0238         goto err;
0239     }
0240 
0241     extlog_l1_hdr = acpi_os_map_iomem(l1_dirbase, l1_hdr_size);
0242     l1_head = (struct extlog_l1_head *)extlog_l1_hdr;
0243     l1_size = l1_head->total_len;
0244     l1_percpu_entry = l1_head->entries;
0245     elog_base = l1_head->elog_base;
0246     elog_size = l1_head->elog_len;
0247     acpi_os_unmap_iomem(extlog_l1_hdr, l1_hdr_size);
0248     release_mem_region(l1_dirbase, l1_hdr_size);
0249 
0250     /* remap L1 header again based on completed information */
0251     r = request_mem_region(l1_dirbase, l1_size, "L1 Table");
0252     if (!r) {
0253         pr_warn(FW_BUG EMCA_BUG,
0254             (unsigned long long)l1_dirbase,
0255             (unsigned long long)l1_dirbase + l1_size);
0256         goto err;
0257     }
0258     extlog_l1_addr = acpi_os_map_iomem(l1_dirbase, l1_size);
0259     l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size);
0260 
0261     /* remap elog table */
0262     r = request_mem_region(elog_base, elog_size, "Elog Table");
0263     if (!r) {
0264         pr_warn(FW_BUG EMCA_BUG,
0265             (unsigned long long)elog_base,
0266             (unsigned long long)elog_base + elog_size);
0267         goto err_release_l1_dir;
0268     }
0269     elog_addr = acpi_os_map_iomem(elog_base, elog_size);
0270 
0271     rc = -ENOMEM;
0272     /* allocate buffer to save elog record */
0273     elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL);
0274     if (elog_buf == NULL)
0275         goto err_release_elog;
0276 
0277     mce_register_decode_chain(&extlog_mce_dec);
0278     /* enable OS to be involved to take over management from BIOS */
0279     ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
0280 
0281     return 0;
0282 
0283 err_release_elog:
0284     if (elog_addr)
0285         acpi_os_unmap_iomem(elog_addr, elog_size);
0286     release_mem_region(elog_base, elog_size);
0287 err_release_l1_dir:
0288     if (extlog_l1_addr)
0289         acpi_os_unmap_iomem(extlog_l1_addr, l1_size);
0290     release_mem_region(l1_dirbase, l1_size);
0291 err:
0292     pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n");
0293     return rc;
0294 }
0295 
0296 static void __exit extlog_exit(void)
0297 {
0298     mce_unregister_decode_chain(&extlog_mce_dec);
0299     ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
0300     if (extlog_l1_addr)
0301         acpi_os_unmap_iomem(extlog_l1_addr, l1_size);
0302     if (elog_addr)
0303         acpi_os_unmap_iomem(elog_addr, elog_size);
0304     release_mem_region(elog_base, elog_size);
0305     release_mem_region(l1_dirbase, l1_size);
0306     kfree(elog_buf);
0307 }
0308 
0309 module_init(extlog_init);
0310 module_exit(extlog_exit);
0311 
0312 MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>");
0313 MODULE_DESCRIPTION("Extended MCA Error Log Driver");
0314 MODULE_LICENSE("GPL");