Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * NFIT - Machine Check Handler
0004  *
0005  * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
0006  */
0007 #include <linux/notifier.h>
0008 #include <linux/acpi.h>
0009 #include <linux/nd.h>
0010 #include <asm/mce.h>
0011 #include "nfit.h"
0012 
0013 static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
0014             void *data)
0015 {
0016     struct mce *mce = (struct mce *)data;
0017     struct acpi_nfit_desc *acpi_desc;
0018     struct nfit_spa *nfit_spa;
0019 
0020     /* We only care about uncorrectable memory errors */
0021     if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
0022         return NOTIFY_DONE;
0023 
0024     /* Verify the address reported in the MCE is valid. */
0025     if (!mce_usable_address(mce))
0026         return NOTIFY_DONE;
0027 
0028     /*
0029      * mce->addr contains the physical addr accessed that caused the
0030      * machine check. We need to walk through the list of NFITs, and see
0031      * if any of them matches that address, and only then start a scrub.
0032      */
0033     mutex_lock(&acpi_desc_lock);
0034     list_for_each_entry(acpi_desc, &acpi_descs, list) {
0035         unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc);
0036         struct device *dev = acpi_desc->dev;
0037         int found_match = 0;
0038 
0039         mutex_lock(&acpi_desc->init_mutex);
0040         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
0041             struct acpi_nfit_system_address *spa = nfit_spa->spa;
0042 
0043             if (nfit_spa_type(spa) != NFIT_SPA_PM)
0044                 continue;
0045             /* find the spa that covers the mce addr */
0046             if (spa->address > mce->addr)
0047                 continue;
0048             if ((spa->address + spa->length - 1) < mce->addr)
0049                 continue;
0050             found_match = 1;
0051             dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
0052                 spa->range_index, spa->address, spa->length);
0053             /*
0054              * We can break at the first match because we're going
0055              * to rescan all the SPA ranges. There shouldn't be any
0056              * aliasing anyway.
0057              */
0058             break;
0059         }
0060         mutex_unlock(&acpi_desc->init_mutex);
0061 
0062         if (!found_match)
0063             continue;
0064 
0065         /* If this fails due to an -ENOMEM, there is little we can do */
0066         nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
0067                 ALIGN_DOWN(mce->addr, align), align);
0068         nvdimm_region_notify(nfit_spa->nd_region,
0069                 NVDIMM_REVALIDATE_POISON);
0070 
0071         if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
0072             /*
0073              * We can ignore an -EBUSY here because if an ARS is
0074              * already in progress, just let that be the last
0075              * authoritative one
0076              */
0077             acpi_nfit_ars_rescan(acpi_desc, 0);
0078         }
0079         mce->kflags |= MCE_HANDLED_NFIT;
0080         break;
0081     }
0082 
0083     mutex_unlock(&acpi_desc_lock);
0084     return NOTIFY_DONE;
0085 }
0086 
0087 static struct notifier_block nfit_mce_dec = {
0088     .notifier_call  = nfit_handle_mce,
0089     .priority   = MCE_PRIO_NFIT,
0090 };
0091 
0092 void nfit_mce_register(void)
0093 {
0094     mce_register_decode_chain(&nfit_mce_dec);
0095 }
0096 
0097 void nfit_mce_unregister(void)
0098 {
0099     mce_unregister_decode_chain(&nfit_mce_dec);
0100 }