Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
0004  */
0005 
0006 #include <linux/sched.h>
0007 #include <linux/interrupt.h>
0008 #include <linux/irq.h>
0009 #include <linux/of.h>
0010 #include <linux/fs.h>
0011 #include <linux/reboot.h>
0012 #include <linux/irq_work.h>
0013 
0014 #include <asm/machdep.h>
0015 #include <asm/rtas.h>
0016 #include <asm/firmware.h>
0017 #include <asm/mce.h>
0018 
0019 #include "pseries.h"
0020 
0021 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
0022 static DEFINE_SPINLOCK(ras_log_buf_lock);
0023 
0024 static int ras_check_exception_token;
0025 
0026 #define EPOW_SENSOR_TOKEN   9
0027 #define EPOW_SENSOR_INDEX   0
0028 
0029 /* EPOW events counter variable */
0030 static int num_epow_events;
0031 
0032 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
0033 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
0034 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
0035 
0036 /* RTAS pseries MCE errorlog section. */
0037 struct pseries_mc_errorlog {
0038     __be32  fru_id;
0039     __be32  proc_id;
0040     u8  error_type;
0041     /*
0042      * sub_err_type (1 byte). Bit fields depends on error_type
0043      *
0044      *   MSB0
0045      *   |
0046      *   V
0047      *   01234567
0048      *   XXXXXXXX
0049      *
0050      * For error_type == MC_ERROR_TYPE_UE
0051      *   XXXXXXXX
0052      *   X      1: Permanent or Transient UE.
0053      *    X     1: Effective address provided.
0054      *     X    1: Logical address provided.
0055      *      XX  2: Reserved.
0056      *        XXX   3: Type of UE error.
0057      *
0058      * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
0059      *   XXXXXXXX
0060      *   X      1: Effective address provided.
0061      *    XXXXX 5: Reserved.
0062      *         XX   2: Type of SLB/ERAT/TLB error.
0063      *
0064      * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
0065      *   XXXXXXXX
0066      *   X      1: Error causing address provided.
0067      *    XXX   3: Type of error.
0068      *       XXXX   4: Reserved.
0069      */
0070     u8  sub_err_type;
0071     u8  reserved_1[6];
0072     __be64  effective_address;
0073     __be64  logical_address;
0074 } __packed;
0075 
0076 /* RTAS pseries MCE error types */
0077 #define MC_ERROR_TYPE_UE        0x00
0078 #define MC_ERROR_TYPE_SLB       0x01
0079 #define MC_ERROR_TYPE_ERAT      0x02
0080 #define MC_ERROR_TYPE_UNKNOWN       0x03
0081 #define MC_ERROR_TYPE_TLB       0x04
0082 #define MC_ERROR_TYPE_D_CACHE       0x05
0083 #define MC_ERROR_TYPE_I_CACHE       0x07
0084 #define MC_ERROR_TYPE_CTRL_MEM_ACCESS   0x08
0085 
0086 /* RTAS pseries MCE error sub types */
0087 #define MC_ERROR_UE_INDETERMINATE       0
0088 #define MC_ERROR_UE_IFETCH          1
0089 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH  2
0090 #define MC_ERROR_UE_LOAD_STORE          3
0091 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
0092 
0093 #define UE_EFFECTIVE_ADDR_PROVIDED      0x40
0094 #define UE_LOGICAL_ADDR_PROVIDED        0x20
0095 #define MC_EFFECTIVE_ADDR_PROVIDED      0x80
0096 
0097 #define MC_ERROR_SLB_PARITY     0
0098 #define MC_ERROR_SLB_MULTIHIT       1
0099 #define MC_ERROR_SLB_INDETERMINATE  2
0100 
0101 #define MC_ERROR_ERAT_PARITY        1
0102 #define MC_ERROR_ERAT_MULTIHIT      2
0103 #define MC_ERROR_ERAT_INDETERMINATE 3
0104 
0105 #define MC_ERROR_TLB_PARITY     1
0106 #define MC_ERROR_TLB_MULTIHIT       2
0107 #define MC_ERROR_TLB_INDETERMINATE  3
0108 
0109 #define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK    0
0110 #define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS  1
0111 
0112 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
0113 {
0114     switch (mlog->error_type) {
0115     case    MC_ERROR_TYPE_UE:
0116         return (mlog->sub_err_type & 0x07);
0117     case    MC_ERROR_TYPE_SLB:
0118     case    MC_ERROR_TYPE_ERAT:
0119     case    MC_ERROR_TYPE_TLB:
0120         return (mlog->sub_err_type & 0x03);
0121     case    MC_ERROR_TYPE_CTRL_MEM_ACCESS:
0122         return (mlog->sub_err_type & 0x70) >> 4;
0123     default:
0124         return 0;
0125     }
0126 }
0127 
0128 /*
0129  * Enable the hotplug interrupt late because processing them may touch other
0130  * devices or systems (e.g. hugepages) that have not been initialized at the
0131  * subsys stage.
0132  */
0133 static int __init init_ras_hotplug_IRQ(void)
0134 {
0135     struct device_node *np;
0136 
0137     /* Hotplug Events */
0138     np = of_find_node_by_path("/event-sources/hot-plug-events");
0139     if (np != NULL) {
0140         if (dlpar_workqueue_init() == 0)
0141             request_event_sources_irqs(np, ras_hotplug_interrupt,
0142                            "RAS_HOTPLUG");
0143         of_node_put(np);
0144     }
0145 
0146     return 0;
0147 }
0148 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
0149 
0150 /*
0151  * Initialize handlers for the set of interrupts caused by hardware errors
0152  * and power system events.
0153  */
0154 static int __init init_ras_IRQ(void)
0155 {
0156     struct device_node *np;
0157 
0158     ras_check_exception_token = rtas_token("check-exception");
0159 
0160     /* Internal Errors */
0161     np = of_find_node_by_path("/event-sources/internal-errors");
0162     if (np != NULL) {
0163         request_event_sources_irqs(np, ras_error_interrupt,
0164                        "RAS_ERROR");
0165         of_node_put(np);
0166     }
0167 
0168     /* EPOW Events */
0169     np = of_find_node_by_path("/event-sources/epow-events");
0170     if (np != NULL) {
0171         request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
0172         of_node_put(np);
0173     }
0174 
0175     return 0;
0176 }
0177 machine_subsys_initcall(pseries, init_ras_IRQ);
0178 
0179 #define EPOW_SHUTDOWN_NORMAL                1
0180 #define EPOW_SHUTDOWN_ON_UPS                2
0181 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS    3
0182 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH  4
0183 
0184 static void handle_system_shutdown(char event_modifier)
0185 {
0186     switch (event_modifier) {
0187     case EPOW_SHUTDOWN_NORMAL:
0188         pr_emerg("Power off requested\n");
0189         orderly_poweroff(true);
0190         break;
0191 
0192     case EPOW_SHUTDOWN_ON_UPS:
0193         pr_emerg("Loss of system power detected. System is running on"
0194              " UPS/battery. Check RTAS error log for details\n");
0195         break;
0196 
0197     case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
0198         pr_emerg("Loss of system critical functions detected. Check"
0199              " RTAS error log for details\n");
0200         orderly_poweroff(true);
0201         break;
0202 
0203     case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
0204         pr_emerg("High ambient temperature detected. Check RTAS"
0205              " error log for details\n");
0206         orderly_poweroff(true);
0207         break;
0208 
0209     default:
0210         pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
0211             event_modifier);
0212     }
0213 }
0214 
0215 struct epow_errorlog {
0216     unsigned char sensor_value;
0217     unsigned char event_modifier;
0218     unsigned char extended_modifier;
0219     unsigned char reserved;
0220     unsigned char platform_reason;
0221 };
0222 
0223 #define EPOW_RESET          0
0224 #define EPOW_WARN_COOLING       1
0225 #define EPOW_WARN_POWER         2
0226 #define EPOW_SYSTEM_SHUTDOWN        3
0227 #define EPOW_SYSTEM_HALT        4
0228 #define EPOW_MAIN_ENCLOSURE     5
0229 #define EPOW_POWER_OFF          7
0230 
0231 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
0232 {
0233     struct pseries_errorlog *pseries_log;
0234     struct epow_errorlog *epow_log;
0235     char action_code;
0236     char modifier;
0237 
0238     pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
0239     if (pseries_log == NULL)
0240         return;
0241 
0242     epow_log = (struct epow_errorlog *)pseries_log->data;
0243     action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
0244     modifier = epow_log->event_modifier & 0xF;  /* bottom 4 bits */
0245 
0246     switch (action_code) {
0247     case EPOW_RESET:
0248         if (num_epow_events) {
0249             pr_info("Non critical power/cooling issue cleared\n");
0250             num_epow_events--;
0251         }
0252         break;
0253 
0254     case EPOW_WARN_COOLING:
0255         pr_info("Non-critical cooling issue detected. Check RTAS error"
0256             " log for details\n");
0257         break;
0258 
0259     case EPOW_WARN_POWER:
0260         pr_info("Non-critical power issue detected. Check RTAS error"
0261             " log for details\n");
0262         break;
0263 
0264     case EPOW_SYSTEM_SHUTDOWN:
0265         handle_system_shutdown(modifier);
0266         break;
0267 
0268     case EPOW_SYSTEM_HALT:
0269         pr_emerg("Critical power/cooling issue detected. Check RTAS"
0270              " error log for details. Powering off.\n");
0271         orderly_poweroff(true);
0272         break;
0273 
0274     case EPOW_MAIN_ENCLOSURE:
0275     case EPOW_POWER_OFF:
0276         pr_emerg("System about to lose power. Check RTAS error log "
0277              " for details. Powering off immediately.\n");
0278         emergency_sync();
0279         kernel_power_off();
0280         break;
0281 
0282     default:
0283         pr_err("Unknown power/cooling event (action code  = %d)\n",
0284             action_code);
0285     }
0286 
0287     /* Increment epow events counter variable */
0288     if (action_code != EPOW_RESET)
0289         num_epow_events++;
0290 }
0291 
0292 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
0293 {
0294     struct pseries_errorlog *pseries_log;
0295     struct pseries_hp_errorlog *hp_elog;
0296 
0297     spin_lock(&ras_log_buf_lock);
0298 
0299     rtas_call(ras_check_exception_token, 6, 1, NULL,
0300           RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
0301           RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
0302           rtas_get_error_log_max());
0303 
0304     pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
0305                        PSERIES_ELOG_SECT_ID_HOTPLUG);
0306     hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
0307 
0308     /*
0309      * Since PCI hotplug is not currently supported on pseries, put PCI
0310      * hotplug events on the ras_log_buf to be handled by rtas_errd.
0311      */
0312     if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
0313         hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
0314         hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
0315         queue_hotplug_event(hp_elog);
0316     else
0317         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
0318 
0319     spin_unlock(&ras_log_buf_lock);
0320     return IRQ_HANDLED;
0321 }
0322 
0323 /* Handle environmental and power warning (EPOW) interrupts. */
0324 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
0325 {
0326     int state;
0327     int critical;
0328 
0329     rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
0330 
0331     if (state > 3)
0332         critical = 1;       /* Time Critical */
0333     else
0334         critical = 0;
0335 
0336     spin_lock(&ras_log_buf_lock);
0337 
0338     rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
0339           virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
0340           rtas_get_error_log_max());
0341 
0342     log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
0343 
0344     rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
0345 
0346     spin_unlock(&ras_log_buf_lock);
0347     return IRQ_HANDLED;
0348 }
0349 
0350 /*
0351  * Handle hardware error interrupts.
0352  *
0353  * RTAS check-exception is called to collect data on the exception.  If
0354  * the error is deemed recoverable, we log a warning and return.
0355  * For nonrecoverable errors, an error is logged and we stop all processing
0356  * as quickly as possible in order to prevent propagation of the failure.
0357  */
0358 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
0359 {
0360     struct rtas_error_log *rtas_elog;
0361     int status;
0362     int fatal;
0363 
0364     spin_lock(&ras_log_buf_lock);
0365 
0366     status = rtas_call(ras_check_exception_token, 6, 1, NULL,
0367                RTAS_VECTOR_EXTERNAL_INTERRUPT,
0368                virq_to_hw(irq),
0369                RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
0370                __pa(&ras_log_buf),
0371                 rtas_get_error_log_max());
0372 
0373     rtas_elog = (struct rtas_error_log *)ras_log_buf;
0374 
0375     if (status == 0 &&
0376         rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
0377         fatal = 1;
0378     else
0379         fatal = 0;
0380 
0381     /* format and print the extended information */
0382     log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
0383 
0384     if (fatal) {
0385         pr_emerg("Fatal hardware error detected. Check RTAS error"
0386              " log for details. Powering off immediately\n");
0387         emergency_sync();
0388         kernel_power_off();
0389     } else {
0390         pr_err("Recoverable hardware error detected\n");
0391     }
0392 
0393     spin_unlock(&ras_log_buf_lock);
0394     return IRQ_HANDLED;
0395 }
0396 
0397 /*
0398  * Some versions of FWNMI place the buffer inside the 4kB page starting at
0399  * 0x7000. Other versions place it inside the rtas buffer. We check both.
0400  * Minimum size of the buffer is 16 bytes.
0401  */
0402 #define VALID_FWNMI_BUFFER(A) \
0403     ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
0404     (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
0405 
0406 static inline struct rtas_error_log *fwnmi_get_errlog(void)
0407 {
0408     return (struct rtas_error_log *)local_paca->mce_data_buf;
0409 }
0410 
0411 static __be64 *fwnmi_get_savep(struct pt_regs *regs)
0412 {
0413     unsigned long savep_ra;
0414 
0415     /* Mask top two bits */
0416     savep_ra = regs->gpr[3] & ~(0x3UL << 62);
0417     if (!VALID_FWNMI_BUFFER(savep_ra)) {
0418         printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
0419         return NULL;
0420     }
0421 
0422     return __va(savep_ra);
0423 }
0424 
0425 /*
0426  * Get the error information for errors coming through the
0427  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
0428  * the actual r3 if possible, and a ptr to the error log entry
0429  * will be returned if found.
0430  *
0431  * Use one buffer mce_data_buf per cpu to store RTAS error.
0432  *
0433  * The mce_data_buf does not have any locks or protection around it,
0434  * if a second machine check comes in, or a system reset is done
0435  * before we have logged the error, then we will get corruption in the
0436  * error log.  This is preferable over holding off on calling
0437  * ibm,nmi-interlock which would result in us checkstopping if a
0438  * second machine check did come in.
0439  */
0440 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
0441 {
0442     struct rtas_error_log *h;
0443     __be64 *savep;
0444 
0445     savep = fwnmi_get_savep(regs);
0446     if (!savep)
0447         return NULL;
0448 
0449     regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
0450 
0451     h = (struct rtas_error_log *)&savep[1];
0452     /* Use the per cpu buffer from paca to store rtas error log */
0453     memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
0454     if (!rtas_error_extended(h)) {
0455         memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
0456     } else {
0457         int len, error_log_length;
0458 
0459         error_log_length = 8 + rtas_error_extended_log_length(h);
0460         len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
0461         memcpy(local_paca->mce_data_buf, h, len);
0462     }
0463 
0464     return (struct rtas_error_log *)local_paca->mce_data_buf;
0465 }
0466 
0467 /* Call this when done with the data returned by FWNMI_get_errinfo.
0468  * It will release the saved data area for other CPUs in the
0469  * partition to receive FWNMI errors.
0470  */
0471 static void fwnmi_release_errinfo(void)
0472 {
0473     struct rtas_args rtas_args;
0474     int ret;
0475 
0476     /*
0477      * On pseries, the machine check stack is limited to under 4GB, so
0478      * args can be on-stack.
0479      */
0480     rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
0481     ret = be32_to_cpu(rtas_args.rets[0]);
0482     if (ret != 0)
0483         printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
0484 }
0485 
0486 int pSeries_system_reset_exception(struct pt_regs *regs)
0487 {
0488 #ifdef __LITTLE_ENDIAN__
0489     /*
0490      * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
0491      * to detect the bad SRR1 pattern here. Flip the NIP back to correct
0492      * endian for reporting purposes. Unfortunately the MSR can't be fixed,
0493      * so clear it. It will be missing MSR_RI so we won't try to recover.
0494      */
0495     if ((be64_to_cpu(regs->msr) &
0496             (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
0497              MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
0498         regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
0499         regs_set_return_msr(regs, 0);
0500     }
0501 #endif
0502 
0503     if (fwnmi_active) {
0504         __be64 *savep;
0505 
0506         /*
0507          * Firmware (PowerVM and KVM) saves r3 to a save area like
0508          * machine check, which is not exactly what PAPR (2.9)
0509          * suggests but there is no way to detect otherwise, so this
0510          * is the interface now.
0511          *
0512          * System resets do not save any error log or require an
0513          * "ibm,nmi-interlock" rtas call to release.
0514          */
0515 
0516         savep = fwnmi_get_savep(regs);
0517         if (savep)
0518             regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
0519     }
0520 
0521     if (smp_handle_nmi_ipi(regs))
0522         return 1;
0523 
0524     return 0; /* need to perform reset */
0525 }
0526 
0527 static int mce_handle_err_realmode(int disposition, u8 error_type)
0528 {
0529 #ifdef CONFIG_PPC_BOOK3S_64
0530     if (disposition == RTAS_DISP_NOT_RECOVERED) {
0531         switch (error_type) {
0532         case    MC_ERROR_TYPE_ERAT:
0533             flush_erat();
0534             disposition = RTAS_DISP_FULLY_RECOVERED;
0535             break;
0536         case    MC_ERROR_TYPE_SLB:
0537 #ifdef CONFIG_PPC_64S_HASH_MMU
0538             /*
0539              * Store the old slb content in paca before flushing.
0540              * Print this when we go to virtual mode.
0541              * There are chances that we may hit MCE again if there
0542              * is a parity error on the SLB entry we trying to read
0543              * for saving. Hence limit the slb saving to single
0544              * level of recursion.
0545              */
0546             if (local_paca->in_mce == 1)
0547                 slb_save_contents(local_paca->mce_faulty_slbs);
0548             flush_and_reload_slb();
0549             disposition = RTAS_DISP_FULLY_RECOVERED;
0550 #endif
0551             break;
0552         default:
0553             break;
0554         }
0555     } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
0556         /* Platform corrected itself but could be degraded */
0557         pr_err("MCE: limited recovery, system may be degraded\n");
0558         disposition = RTAS_DISP_FULLY_RECOVERED;
0559     }
0560 #endif
0561     return disposition;
0562 }
0563 
0564 static int mce_handle_err_virtmode(struct pt_regs *regs,
0565                    struct rtas_error_log *errp,
0566                    struct pseries_mc_errorlog *mce_log,
0567                    int disposition)
0568 {
0569     struct mce_error_info mce_err = { 0 };
0570     int initiator = rtas_error_initiator(errp);
0571     int severity = rtas_error_severity(errp);
0572     unsigned long eaddr = 0, paddr = 0;
0573     u8 error_type, err_sub_type;
0574 
0575     if (!mce_log)
0576         goto out;
0577 
0578     error_type = mce_log->error_type;
0579     err_sub_type = rtas_mc_error_sub_type(mce_log);
0580 
0581     if (initiator == RTAS_INITIATOR_UNKNOWN)
0582         mce_err.initiator = MCE_INITIATOR_UNKNOWN;
0583     else if (initiator == RTAS_INITIATOR_CPU)
0584         mce_err.initiator = MCE_INITIATOR_CPU;
0585     else if (initiator == RTAS_INITIATOR_PCI)
0586         mce_err.initiator = MCE_INITIATOR_PCI;
0587     else if (initiator == RTAS_INITIATOR_ISA)
0588         mce_err.initiator = MCE_INITIATOR_ISA;
0589     else if (initiator == RTAS_INITIATOR_MEMORY)
0590         mce_err.initiator = MCE_INITIATOR_MEMORY;
0591     else if (initiator == RTAS_INITIATOR_POWERMGM)
0592         mce_err.initiator = MCE_INITIATOR_POWERMGM;
0593     else
0594         mce_err.initiator = MCE_INITIATOR_UNKNOWN;
0595 
0596     if (severity == RTAS_SEVERITY_NO_ERROR)
0597         mce_err.severity = MCE_SEV_NO_ERROR;
0598     else if (severity == RTAS_SEVERITY_EVENT)
0599         mce_err.severity = MCE_SEV_WARNING;
0600     else if (severity == RTAS_SEVERITY_WARNING)
0601         mce_err.severity = MCE_SEV_WARNING;
0602     else if (severity == RTAS_SEVERITY_ERROR_SYNC)
0603         mce_err.severity = MCE_SEV_SEVERE;
0604     else if (severity == RTAS_SEVERITY_ERROR)
0605         mce_err.severity = MCE_SEV_SEVERE;
0606     else
0607         mce_err.severity = MCE_SEV_FATAL;
0608 
0609     if (severity <= RTAS_SEVERITY_ERROR_SYNC)
0610         mce_err.sync_error = true;
0611     else
0612         mce_err.sync_error = false;
0613 
0614     mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
0615     mce_err.error_class = MCE_ECLASS_UNKNOWN;
0616 
0617     switch (error_type) {
0618     case MC_ERROR_TYPE_UE:
0619         mce_err.error_type = MCE_ERROR_TYPE_UE;
0620         mce_common_process_ue(regs, &mce_err);
0621         if (mce_err.ignore_event)
0622             disposition = RTAS_DISP_FULLY_RECOVERED;
0623         switch (err_sub_type) {
0624         case MC_ERROR_UE_IFETCH:
0625             mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
0626             break;
0627         case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
0628             mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
0629             break;
0630         case MC_ERROR_UE_LOAD_STORE:
0631             mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
0632             break;
0633         case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
0634             mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
0635             break;
0636         case MC_ERROR_UE_INDETERMINATE:
0637         default:
0638             mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
0639             break;
0640         }
0641         if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
0642             eaddr = be64_to_cpu(mce_log->effective_address);
0643 
0644         if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
0645             paddr = be64_to_cpu(mce_log->logical_address);
0646         } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
0647             unsigned long pfn;
0648 
0649             pfn = addr_to_pfn(regs, eaddr);
0650             if (pfn != ULONG_MAX)
0651                 paddr = pfn << PAGE_SHIFT;
0652         }
0653 
0654         break;
0655     case MC_ERROR_TYPE_SLB:
0656         mce_err.error_type = MCE_ERROR_TYPE_SLB;
0657         switch (err_sub_type) {
0658         case MC_ERROR_SLB_PARITY:
0659             mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
0660             break;
0661         case MC_ERROR_SLB_MULTIHIT:
0662             mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
0663             break;
0664         case MC_ERROR_SLB_INDETERMINATE:
0665         default:
0666             mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
0667             break;
0668         }
0669         if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0670             eaddr = be64_to_cpu(mce_log->effective_address);
0671         break;
0672     case MC_ERROR_TYPE_ERAT:
0673         mce_err.error_type = MCE_ERROR_TYPE_ERAT;
0674         switch (err_sub_type) {
0675         case MC_ERROR_ERAT_PARITY:
0676             mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
0677             break;
0678         case MC_ERROR_ERAT_MULTIHIT:
0679             mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
0680             break;
0681         case MC_ERROR_ERAT_INDETERMINATE:
0682         default:
0683             mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
0684             break;
0685         }
0686         if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0687             eaddr = be64_to_cpu(mce_log->effective_address);
0688         break;
0689     case MC_ERROR_TYPE_TLB:
0690         mce_err.error_type = MCE_ERROR_TYPE_TLB;
0691         switch (err_sub_type) {
0692         case MC_ERROR_TLB_PARITY:
0693             mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
0694             break;
0695         case MC_ERROR_TLB_MULTIHIT:
0696             mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
0697             break;
0698         case MC_ERROR_TLB_INDETERMINATE:
0699         default:
0700             mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
0701             break;
0702         }
0703         if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0704             eaddr = be64_to_cpu(mce_log->effective_address);
0705         break;
0706     case MC_ERROR_TYPE_D_CACHE:
0707         mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
0708         break;
0709     case MC_ERROR_TYPE_I_CACHE:
0710         mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
0711         break;
0712     case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
0713         mce_err.error_type = MCE_ERROR_TYPE_RA;
0714         switch (err_sub_type) {
0715         case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
0716             mce_err.u.ra_error_type =
0717                 MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
0718             break;
0719         case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
0720             mce_err.u.ra_error_type =
0721                 MCE_RA_ERROR_LOAD_STORE_FOREIGN;
0722             break;
0723         }
0724         if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0725             eaddr = be64_to_cpu(mce_log->effective_address);
0726         break;
0727     case MC_ERROR_TYPE_UNKNOWN:
0728     default:
0729         mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
0730         break;
0731     }
0732 out:
0733     save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
0734                &mce_err, regs->nip, eaddr, paddr);
0735     return disposition;
0736 }
0737 
0738 static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
0739 {
0740     struct pseries_errorlog *pseries_log;
0741     struct pseries_mc_errorlog *mce_log = NULL;
0742     int disposition = rtas_error_disposition(errp);
0743     u8 error_type;
0744 
0745     if (!rtas_error_extended(errp))
0746         goto out;
0747 
0748     pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
0749     if (!pseries_log)
0750         goto out;
0751 
0752     mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
0753     error_type = mce_log->error_type;
0754 
0755     disposition = mce_handle_err_realmode(disposition, error_type);
0756 out:
0757     disposition = mce_handle_err_virtmode(regs, errp, mce_log,
0758                           disposition);
0759     return disposition;
0760 }
0761 
0762 /*
0763  * Process MCE rtas errlog event.
0764  */
0765 void pSeries_machine_check_log_err(void)
0766 {
0767     struct rtas_error_log *err;
0768 
0769     err = fwnmi_get_errlog();
0770     log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
0771 }
0772 
0773 /*
0774  * See if we can recover from a machine check exception.
0775  * This is only called on power4 (or above) and only via
0776  * the Firmware Non-Maskable Interrupts (fwnmi) handler
0777  * which provides the error analysis for us.
0778  *
0779  * Return 1 if corrected (or delivered a signal).
0780  * Return 0 if there is nothing we can do.
0781  */
0782 static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
0783 {
0784     int recovered = 0;
0785 
0786     if (regs_is_unrecoverable(regs)) {
0787         /* If MSR_RI isn't set, we cannot recover */
0788         pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
0789         recovered = 0;
0790     } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
0791         /* Platform corrected itself */
0792         recovered = 1;
0793     } else if (evt->severity == MCE_SEV_FATAL) {
0794         /* Fatal machine check */
0795         pr_err("Machine check interrupt is fatal\n");
0796         recovered = 0;
0797     }
0798 
0799     if (!recovered && evt->sync_error) {
0800         /*
0801          * Try to kill processes if we get a synchronous machine check
0802          * (e.g., one caused by execution of this instruction). This
0803          * will devolve into a panic if we try to kill init or are in
0804          * an interrupt etc.
0805          *
0806          * TODO: Queue up this address for hwpoisioning later.
0807          * TODO: This is not quite right for d-side machine
0808          *       checks ->nip is not necessarily the important
0809          *       address.
0810          */
0811         if ((user_mode(regs))) {
0812             _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
0813             recovered = 1;
0814         } else if (die_will_crash()) {
0815             /*
0816              * die() would kill the kernel, so better to go via
0817              * the platform reboot code that will log the
0818              * machine check.
0819              */
0820             recovered = 0;
0821         } else {
0822             die_mce("Machine check", regs, SIGBUS);
0823             recovered = 1;
0824         }
0825     }
0826 
0827     return recovered;
0828 }
0829 
0830 /*
0831  * Handle a machine check.
0832  *
0833  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
0834  * should be present.  If so the handler which called us tells us if the
0835  * error was recovered (never true if RI=0).
0836  *
0837  * On hardware prior to Power 4 these exceptions were asynchronous which
0838  * means we can't tell exactly where it occurred and so we can't recover.
0839  */
0840 int pSeries_machine_check_exception(struct pt_regs *regs)
0841 {
0842     struct machine_check_event evt;
0843 
0844     if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
0845         return 0;
0846 
0847     /* Print things out */
0848     if (evt.version != MCE_V1) {
0849         pr_err("Machine Check Exception, Unknown event version %d !\n",
0850                evt.version);
0851         return 0;
0852     }
0853     machine_check_print_event_info(&evt, user_mode(regs), false);
0854 
0855     if (recover_mce(regs, &evt))
0856         return 1;
0857 
0858     return 0;
0859 }
0860 
0861 long pseries_machine_check_realmode(struct pt_regs *regs)
0862 {
0863     struct rtas_error_log *errp;
0864     int disposition;
0865 
0866     if (fwnmi_active) {
0867         errp = fwnmi_get_errinfo(regs);
0868         /*
0869          * Call to fwnmi_release_errinfo() in real mode causes kernel
0870          * to panic. Hence we will call it as soon as we go into
0871          * virtual mode.
0872          */
0873         disposition = mce_handle_error(regs, errp);
0874 
0875         fwnmi_release_errinfo();
0876 
0877         if (disposition == RTAS_DISP_FULLY_RECOVERED)
0878             return 1;
0879     }
0880 
0881     return 0;
0882 }