0001
0002
0003
0004
0005
0006 #include <linux/sched.h>
0007 #include <linux/interrupt.h>
0008 #include <linux/irq.h>
0009 #include <linux/of.h>
0010 #include <linux/fs.h>
0011 #include <linux/reboot.h>
0012 #include <linux/irq_work.h>
0013
0014 #include <asm/machdep.h>
0015 #include <asm/rtas.h>
0016 #include <asm/firmware.h>
0017 #include <asm/mce.h>
0018
0019 #include "pseries.h"
0020
0021 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
0022 static DEFINE_SPINLOCK(ras_log_buf_lock);
0023
0024 static int ras_check_exception_token;
0025
0026 #define EPOW_SENSOR_TOKEN 9
0027 #define EPOW_SENSOR_INDEX 0
0028
0029
0030 static int num_epow_events;
0031
0032 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
0033 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
0034 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
0035
0036
0037 struct pseries_mc_errorlog {
0038 __be32 fru_id;
0039 __be32 proc_id;
0040 u8 error_type;
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070 u8 sub_err_type;
0071 u8 reserved_1[6];
0072 __be64 effective_address;
0073 __be64 logical_address;
0074 } __packed;
0075
0076
0077 #define MC_ERROR_TYPE_UE 0x00
0078 #define MC_ERROR_TYPE_SLB 0x01
0079 #define MC_ERROR_TYPE_ERAT 0x02
0080 #define MC_ERROR_TYPE_UNKNOWN 0x03
0081 #define MC_ERROR_TYPE_TLB 0x04
0082 #define MC_ERROR_TYPE_D_CACHE 0x05
0083 #define MC_ERROR_TYPE_I_CACHE 0x07
0084 #define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08
0085
0086
0087 #define MC_ERROR_UE_INDETERMINATE 0
0088 #define MC_ERROR_UE_IFETCH 1
0089 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2
0090 #define MC_ERROR_UE_LOAD_STORE 3
0091 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
0092
0093 #define UE_EFFECTIVE_ADDR_PROVIDED 0x40
0094 #define UE_LOGICAL_ADDR_PROVIDED 0x20
0095 #define MC_EFFECTIVE_ADDR_PROVIDED 0x80
0096
0097 #define MC_ERROR_SLB_PARITY 0
0098 #define MC_ERROR_SLB_MULTIHIT 1
0099 #define MC_ERROR_SLB_INDETERMINATE 2
0100
0101 #define MC_ERROR_ERAT_PARITY 1
0102 #define MC_ERROR_ERAT_MULTIHIT 2
0103 #define MC_ERROR_ERAT_INDETERMINATE 3
0104
0105 #define MC_ERROR_TLB_PARITY 1
0106 #define MC_ERROR_TLB_MULTIHIT 2
0107 #define MC_ERROR_TLB_INDETERMINATE 3
0108
0109 #define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0
0110 #define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1
0111
0112 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
0113 {
0114 switch (mlog->error_type) {
0115 case MC_ERROR_TYPE_UE:
0116 return (mlog->sub_err_type & 0x07);
0117 case MC_ERROR_TYPE_SLB:
0118 case MC_ERROR_TYPE_ERAT:
0119 case MC_ERROR_TYPE_TLB:
0120 return (mlog->sub_err_type & 0x03);
0121 case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
0122 return (mlog->sub_err_type & 0x70) >> 4;
0123 default:
0124 return 0;
0125 }
0126 }
0127
0128
0129
0130
0131
0132
0133 static int __init init_ras_hotplug_IRQ(void)
0134 {
0135 struct device_node *np;
0136
0137
0138 np = of_find_node_by_path("/event-sources/hot-plug-events");
0139 if (np != NULL) {
0140 if (dlpar_workqueue_init() == 0)
0141 request_event_sources_irqs(np, ras_hotplug_interrupt,
0142 "RAS_HOTPLUG");
0143 of_node_put(np);
0144 }
0145
0146 return 0;
0147 }
0148 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
0149
0150
0151
0152
0153
0154 static int __init init_ras_IRQ(void)
0155 {
0156 struct device_node *np;
0157
0158 ras_check_exception_token = rtas_token("check-exception");
0159
0160
0161 np = of_find_node_by_path("/event-sources/internal-errors");
0162 if (np != NULL) {
0163 request_event_sources_irqs(np, ras_error_interrupt,
0164 "RAS_ERROR");
0165 of_node_put(np);
0166 }
0167
0168
0169 np = of_find_node_by_path("/event-sources/epow-events");
0170 if (np != NULL) {
0171 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
0172 of_node_put(np);
0173 }
0174
0175 return 0;
0176 }
0177 machine_subsys_initcall(pseries, init_ras_IRQ);
0178
0179 #define EPOW_SHUTDOWN_NORMAL 1
0180 #define EPOW_SHUTDOWN_ON_UPS 2
0181 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
0182 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
0183
0184 static void handle_system_shutdown(char event_modifier)
0185 {
0186 switch (event_modifier) {
0187 case EPOW_SHUTDOWN_NORMAL:
0188 pr_emerg("Power off requested\n");
0189 orderly_poweroff(true);
0190 break;
0191
0192 case EPOW_SHUTDOWN_ON_UPS:
0193 pr_emerg("Loss of system power detected. System is running on"
0194 " UPS/battery. Check RTAS error log for details\n");
0195 break;
0196
0197 case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
0198 pr_emerg("Loss of system critical functions detected. Check"
0199 " RTAS error log for details\n");
0200 orderly_poweroff(true);
0201 break;
0202
0203 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
0204 pr_emerg("High ambient temperature detected. Check RTAS"
0205 " error log for details\n");
0206 orderly_poweroff(true);
0207 break;
0208
0209 default:
0210 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
0211 event_modifier);
0212 }
0213 }
0214
0215 struct epow_errorlog {
0216 unsigned char sensor_value;
0217 unsigned char event_modifier;
0218 unsigned char extended_modifier;
0219 unsigned char reserved;
0220 unsigned char platform_reason;
0221 };
0222
0223 #define EPOW_RESET 0
0224 #define EPOW_WARN_COOLING 1
0225 #define EPOW_WARN_POWER 2
0226 #define EPOW_SYSTEM_SHUTDOWN 3
0227 #define EPOW_SYSTEM_HALT 4
0228 #define EPOW_MAIN_ENCLOSURE 5
0229 #define EPOW_POWER_OFF 7
0230
0231 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
0232 {
0233 struct pseries_errorlog *pseries_log;
0234 struct epow_errorlog *epow_log;
0235 char action_code;
0236 char modifier;
0237
0238 pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
0239 if (pseries_log == NULL)
0240 return;
0241
0242 epow_log = (struct epow_errorlog *)pseries_log->data;
0243 action_code = epow_log->sensor_value & 0xF;
0244 modifier = epow_log->event_modifier & 0xF;
0245
0246 switch (action_code) {
0247 case EPOW_RESET:
0248 if (num_epow_events) {
0249 pr_info("Non critical power/cooling issue cleared\n");
0250 num_epow_events--;
0251 }
0252 break;
0253
0254 case EPOW_WARN_COOLING:
0255 pr_info("Non-critical cooling issue detected. Check RTAS error"
0256 " log for details\n");
0257 break;
0258
0259 case EPOW_WARN_POWER:
0260 pr_info("Non-critical power issue detected. Check RTAS error"
0261 " log for details\n");
0262 break;
0263
0264 case EPOW_SYSTEM_SHUTDOWN:
0265 handle_system_shutdown(modifier);
0266 break;
0267
0268 case EPOW_SYSTEM_HALT:
0269 pr_emerg("Critical power/cooling issue detected. Check RTAS"
0270 " error log for details. Powering off.\n");
0271 orderly_poweroff(true);
0272 break;
0273
0274 case EPOW_MAIN_ENCLOSURE:
0275 case EPOW_POWER_OFF:
0276 pr_emerg("System about to lose power. Check RTAS error log "
0277 " for details. Powering off immediately.\n");
0278 emergency_sync();
0279 kernel_power_off();
0280 break;
0281
0282 default:
0283 pr_err("Unknown power/cooling event (action code = %d)\n",
0284 action_code);
0285 }
0286
0287
0288 if (action_code != EPOW_RESET)
0289 num_epow_events++;
0290 }
0291
0292 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
0293 {
0294 struct pseries_errorlog *pseries_log;
0295 struct pseries_hp_errorlog *hp_elog;
0296
0297 spin_lock(&ras_log_buf_lock);
0298
0299 rtas_call(ras_check_exception_token, 6, 1, NULL,
0300 RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
0301 RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
0302 rtas_get_error_log_max());
0303
0304 pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
0305 PSERIES_ELOG_SECT_ID_HOTPLUG);
0306 hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
0307
0308
0309
0310
0311
0312 if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
0313 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
0314 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
0315 queue_hotplug_event(hp_elog);
0316 else
0317 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
0318
0319 spin_unlock(&ras_log_buf_lock);
0320 return IRQ_HANDLED;
0321 }
0322
0323
0324 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
0325 {
0326 int state;
0327 int critical;
0328
0329 rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
0330
0331 if (state > 3)
0332 critical = 1;
0333 else
0334 critical = 0;
0335
0336 spin_lock(&ras_log_buf_lock);
0337
0338 rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
0339 virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
0340 rtas_get_error_log_max());
0341
0342 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
0343
0344 rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
0345
0346 spin_unlock(&ras_log_buf_lock);
0347 return IRQ_HANDLED;
0348 }
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
0359 {
0360 struct rtas_error_log *rtas_elog;
0361 int status;
0362 int fatal;
0363
0364 spin_lock(&ras_log_buf_lock);
0365
0366 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
0367 RTAS_VECTOR_EXTERNAL_INTERRUPT,
0368 virq_to_hw(irq),
0369 RTAS_INTERNAL_ERROR, 1 ,
0370 __pa(&ras_log_buf),
0371 rtas_get_error_log_max());
0372
0373 rtas_elog = (struct rtas_error_log *)ras_log_buf;
0374
0375 if (status == 0 &&
0376 rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
0377 fatal = 1;
0378 else
0379 fatal = 0;
0380
0381
0382 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
0383
0384 if (fatal) {
0385 pr_emerg("Fatal hardware error detected. Check RTAS error"
0386 " log for details. Powering off immediately\n");
0387 emergency_sync();
0388 kernel_power_off();
0389 } else {
0390 pr_err("Recoverable hardware error detected\n");
0391 }
0392
0393 spin_unlock(&ras_log_buf_lock);
0394 return IRQ_HANDLED;
0395 }
0396
0397
0398
0399
0400
0401
0402 #define VALID_FWNMI_BUFFER(A) \
0403 ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
0404 (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
0405
0406 static inline struct rtas_error_log *fwnmi_get_errlog(void)
0407 {
0408 return (struct rtas_error_log *)local_paca->mce_data_buf;
0409 }
0410
0411 static __be64 *fwnmi_get_savep(struct pt_regs *regs)
0412 {
0413 unsigned long savep_ra;
0414
0415
0416 savep_ra = regs->gpr[3] & ~(0x3UL << 62);
0417 if (!VALID_FWNMI_BUFFER(savep_ra)) {
0418 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
0419 return NULL;
0420 }
0421
0422 return __va(savep_ra);
0423 }
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
0441 {
0442 struct rtas_error_log *h;
0443 __be64 *savep;
0444
0445 savep = fwnmi_get_savep(regs);
0446 if (!savep)
0447 return NULL;
0448
0449 regs->gpr[3] = be64_to_cpu(savep[0]);
0450
0451 h = (struct rtas_error_log *)&savep[1];
0452
0453 memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
0454 if (!rtas_error_extended(h)) {
0455 memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
0456 } else {
0457 int len, error_log_length;
0458
0459 error_log_length = 8 + rtas_error_extended_log_length(h);
0460 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
0461 memcpy(local_paca->mce_data_buf, h, len);
0462 }
0463
0464 return (struct rtas_error_log *)local_paca->mce_data_buf;
0465 }
0466
0467
0468
0469
0470
0471 static void fwnmi_release_errinfo(void)
0472 {
0473 struct rtas_args rtas_args;
0474 int ret;
0475
0476
0477
0478
0479
0480 rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
0481 ret = be32_to_cpu(rtas_args.rets[0]);
0482 if (ret != 0)
0483 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
0484 }
0485
0486 int pSeries_system_reset_exception(struct pt_regs *regs)
0487 {
0488 #ifdef __LITTLE_ENDIAN__
0489
0490
0491
0492
0493
0494
0495 if ((be64_to_cpu(regs->msr) &
0496 (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
0497 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
0498 regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
0499 regs_set_return_msr(regs, 0);
0500 }
0501 #endif
0502
0503 if (fwnmi_active) {
0504 __be64 *savep;
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516 savep = fwnmi_get_savep(regs);
0517 if (savep)
0518 regs->gpr[3] = be64_to_cpu(savep[0]);
0519 }
0520
0521 if (smp_handle_nmi_ipi(regs))
0522 return 1;
0523
0524 return 0;
0525 }
0526
0527 static int mce_handle_err_realmode(int disposition, u8 error_type)
0528 {
0529 #ifdef CONFIG_PPC_BOOK3S_64
0530 if (disposition == RTAS_DISP_NOT_RECOVERED) {
0531 switch (error_type) {
0532 case MC_ERROR_TYPE_ERAT:
0533 flush_erat();
0534 disposition = RTAS_DISP_FULLY_RECOVERED;
0535 break;
0536 case MC_ERROR_TYPE_SLB:
0537 #ifdef CONFIG_PPC_64S_HASH_MMU
0538
0539
0540
0541
0542
0543
0544
0545
0546 if (local_paca->in_mce == 1)
0547 slb_save_contents(local_paca->mce_faulty_slbs);
0548 flush_and_reload_slb();
0549 disposition = RTAS_DISP_FULLY_RECOVERED;
0550 #endif
0551 break;
0552 default:
0553 break;
0554 }
0555 } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
0556
0557 pr_err("MCE: limited recovery, system may be degraded\n");
0558 disposition = RTAS_DISP_FULLY_RECOVERED;
0559 }
0560 #endif
0561 return disposition;
0562 }
0563
0564 static int mce_handle_err_virtmode(struct pt_regs *regs,
0565 struct rtas_error_log *errp,
0566 struct pseries_mc_errorlog *mce_log,
0567 int disposition)
0568 {
0569 struct mce_error_info mce_err = { 0 };
0570 int initiator = rtas_error_initiator(errp);
0571 int severity = rtas_error_severity(errp);
0572 unsigned long eaddr = 0, paddr = 0;
0573 u8 error_type, err_sub_type;
0574
0575 if (!mce_log)
0576 goto out;
0577
0578 error_type = mce_log->error_type;
0579 err_sub_type = rtas_mc_error_sub_type(mce_log);
0580
0581 if (initiator == RTAS_INITIATOR_UNKNOWN)
0582 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
0583 else if (initiator == RTAS_INITIATOR_CPU)
0584 mce_err.initiator = MCE_INITIATOR_CPU;
0585 else if (initiator == RTAS_INITIATOR_PCI)
0586 mce_err.initiator = MCE_INITIATOR_PCI;
0587 else if (initiator == RTAS_INITIATOR_ISA)
0588 mce_err.initiator = MCE_INITIATOR_ISA;
0589 else if (initiator == RTAS_INITIATOR_MEMORY)
0590 mce_err.initiator = MCE_INITIATOR_MEMORY;
0591 else if (initiator == RTAS_INITIATOR_POWERMGM)
0592 mce_err.initiator = MCE_INITIATOR_POWERMGM;
0593 else
0594 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
0595
0596 if (severity == RTAS_SEVERITY_NO_ERROR)
0597 mce_err.severity = MCE_SEV_NO_ERROR;
0598 else if (severity == RTAS_SEVERITY_EVENT)
0599 mce_err.severity = MCE_SEV_WARNING;
0600 else if (severity == RTAS_SEVERITY_WARNING)
0601 mce_err.severity = MCE_SEV_WARNING;
0602 else if (severity == RTAS_SEVERITY_ERROR_SYNC)
0603 mce_err.severity = MCE_SEV_SEVERE;
0604 else if (severity == RTAS_SEVERITY_ERROR)
0605 mce_err.severity = MCE_SEV_SEVERE;
0606 else
0607 mce_err.severity = MCE_SEV_FATAL;
0608
0609 if (severity <= RTAS_SEVERITY_ERROR_SYNC)
0610 mce_err.sync_error = true;
0611 else
0612 mce_err.sync_error = false;
0613
0614 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
0615 mce_err.error_class = MCE_ECLASS_UNKNOWN;
0616
0617 switch (error_type) {
0618 case MC_ERROR_TYPE_UE:
0619 mce_err.error_type = MCE_ERROR_TYPE_UE;
0620 mce_common_process_ue(regs, &mce_err);
0621 if (mce_err.ignore_event)
0622 disposition = RTAS_DISP_FULLY_RECOVERED;
0623 switch (err_sub_type) {
0624 case MC_ERROR_UE_IFETCH:
0625 mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
0626 break;
0627 case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
0628 mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
0629 break;
0630 case MC_ERROR_UE_LOAD_STORE:
0631 mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
0632 break;
0633 case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
0634 mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
0635 break;
0636 case MC_ERROR_UE_INDETERMINATE:
0637 default:
0638 mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
0639 break;
0640 }
0641 if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
0642 eaddr = be64_to_cpu(mce_log->effective_address);
0643
0644 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
0645 paddr = be64_to_cpu(mce_log->logical_address);
0646 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
0647 unsigned long pfn;
0648
0649 pfn = addr_to_pfn(regs, eaddr);
0650 if (pfn != ULONG_MAX)
0651 paddr = pfn << PAGE_SHIFT;
0652 }
0653
0654 break;
0655 case MC_ERROR_TYPE_SLB:
0656 mce_err.error_type = MCE_ERROR_TYPE_SLB;
0657 switch (err_sub_type) {
0658 case MC_ERROR_SLB_PARITY:
0659 mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
0660 break;
0661 case MC_ERROR_SLB_MULTIHIT:
0662 mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
0663 break;
0664 case MC_ERROR_SLB_INDETERMINATE:
0665 default:
0666 mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
0667 break;
0668 }
0669 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0670 eaddr = be64_to_cpu(mce_log->effective_address);
0671 break;
0672 case MC_ERROR_TYPE_ERAT:
0673 mce_err.error_type = MCE_ERROR_TYPE_ERAT;
0674 switch (err_sub_type) {
0675 case MC_ERROR_ERAT_PARITY:
0676 mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
0677 break;
0678 case MC_ERROR_ERAT_MULTIHIT:
0679 mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
0680 break;
0681 case MC_ERROR_ERAT_INDETERMINATE:
0682 default:
0683 mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
0684 break;
0685 }
0686 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0687 eaddr = be64_to_cpu(mce_log->effective_address);
0688 break;
0689 case MC_ERROR_TYPE_TLB:
0690 mce_err.error_type = MCE_ERROR_TYPE_TLB;
0691 switch (err_sub_type) {
0692 case MC_ERROR_TLB_PARITY:
0693 mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
0694 break;
0695 case MC_ERROR_TLB_MULTIHIT:
0696 mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
0697 break;
0698 case MC_ERROR_TLB_INDETERMINATE:
0699 default:
0700 mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
0701 break;
0702 }
0703 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0704 eaddr = be64_to_cpu(mce_log->effective_address);
0705 break;
0706 case MC_ERROR_TYPE_D_CACHE:
0707 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
0708 break;
0709 case MC_ERROR_TYPE_I_CACHE:
0710 mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
0711 break;
0712 case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
0713 mce_err.error_type = MCE_ERROR_TYPE_RA;
0714 switch (err_sub_type) {
0715 case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
0716 mce_err.u.ra_error_type =
0717 MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
0718 break;
0719 case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
0720 mce_err.u.ra_error_type =
0721 MCE_RA_ERROR_LOAD_STORE_FOREIGN;
0722 break;
0723 }
0724 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
0725 eaddr = be64_to_cpu(mce_log->effective_address);
0726 break;
0727 case MC_ERROR_TYPE_UNKNOWN:
0728 default:
0729 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
0730 break;
0731 }
0732 out:
0733 save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
0734 &mce_err, regs->nip, eaddr, paddr);
0735 return disposition;
0736 }
0737
0738 static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
0739 {
0740 struct pseries_errorlog *pseries_log;
0741 struct pseries_mc_errorlog *mce_log = NULL;
0742 int disposition = rtas_error_disposition(errp);
0743 u8 error_type;
0744
0745 if (!rtas_error_extended(errp))
0746 goto out;
0747
0748 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
0749 if (!pseries_log)
0750 goto out;
0751
0752 mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
0753 error_type = mce_log->error_type;
0754
0755 disposition = mce_handle_err_realmode(disposition, error_type);
0756 out:
0757 disposition = mce_handle_err_virtmode(regs, errp, mce_log,
0758 disposition);
0759 return disposition;
0760 }
0761
0762
0763
0764
0765 void pSeries_machine_check_log_err(void)
0766 {
0767 struct rtas_error_log *err;
0768
0769 err = fwnmi_get_errlog();
0770 log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
0771 }
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782 static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
0783 {
0784 int recovered = 0;
0785
0786 if (regs_is_unrecoverable(regs)) {
0787
0788 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
0789 recovered = 0;
0790 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
0791
0792 recovered = 1;
0793 } else if (evt->severity == MCE_SEV_FATAL) {
0794
0795 pr_err("Machine check interrupt is fatal\n");
0796 recovered = 0;
0797 }
0798
0799 if (!recovered && evt->sync_error) {
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811 if ((user_mode(regs))) {
0812 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
0813 recovered = 1;
0814 } else if (die_will_crash()) {
0815
0816
0817
0818
0819
0820 recovered = 0;
0821 } else {
0822 die_mce("Machine check", regs, SIGBUS);
0823 recovered = 1;
0824 }
0825 }
0826
0827 return recovered;
0828 }
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840 int pSeries_machine_check_exception(struct pt_regs *regs)
0841 {
0842 struct machine_check_event evt;
0843
0844 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
0845 return 0;
0846
0847
0848 if (evt.version != MCE_V1) {
0849 pr_err("Machine Check Exception, Unknown event version %d !\n",
0850 evt.version);
0851 return 0;
0852 }
0853 machine_check_print_event_info(&evt, user_mode(regs), false);
0854
0855 if (recover_mce(regs, &evt))
0856 return 1;
0857
0858 return 0;
0859 }
0860
0861 long pseries_machine_check_realmode(struct pt_regs *regs)
0862 {
0863 struct rtas_error_log *errp;
0864 int disposition;
0865
0866 if (fwnmi_active) {
0867 errp = fwnmi_get_errinfo(regs);
0868
0869
0870
0871
0872
0873 disposition = mce_handle_error(regs, errp);
0874
0875 fwnmi_release_errinfo();
0876
0877 if (disposition == RTAS_DISP_FULLY_RECOVERED)
0878 return 1;
0879 }
0880
0881 return 0;
0882 }