Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * PowerNV OPAL high level interfaces
0004  *
0005  * Copyright 2011 IBM Corp.
0006  */
0007 
0008 #define pr_fmt(fmt) "opal: " fmt
0009 
0010 #include <linux/printk.h>
0011 #include <linux/types.h>
0012 #include <linux/of.h>
0013 #include <linux/of_fdt.h>
0014 #include <linux/of_platform.h>
0015 #include <linux/of_address.h>
0016 #include <linux/interrupt.h>
0017 #include <linux/notifier.h>
0018 #include <linux/slab.h>
0019 #include <linux/sched.h>
0020 #include <linux/kobject.h>
0021 #include <linux/delay.h>
0022 #include <linux/memblock.h>
0023 #include <linux/kthread.h>
0024 #include <linux/freezer.h>
0025 #include <linux/kmsg_dump.h>
0026 #include <linux/console.h>
0027 #include <linux/sched/debug.h>
0028 
0029 #include <asm/machdep.h>
0030 #include <asm/opal.h>
0031 #include <asm/firmware.h>
0032 #include <asm/mce.h>
0033 #include <asm/imc-pmu.h>
0034 #include <asm/bug.h>
0035 
0036 #include "powernv.h"
0037 
0038 #define OPAL_MSG_QUEUE_MAX 16
0039 
0040 struct opal_msg_node {
0041     struct list_head    list;
0042     struct opal_msg     msg;
0043 };
0044 
0045 static DEFINE_SPINLOCK(msg_list_lock);
0046 static LIST_HEAD(msg_list);
0047 
0048 /* /sys/firmware/opal */
0049 struct kobject *opal_kobj;
0050 
0051 struct opal {
0052     u64 base;
0053     u64 entry;
0054     u64 size;
0055 } opal;
0056 
0057 struct mcheck_recoverable_range {
0058     u64 start_addr;
0059     u64 end_addr;
0060     u64 recover_addr;
0061 };
0062 
0063 static int msg_list_size;
0064 
0065 static struct mcheck_recoverable_range *mc_recoverable_range;
0066 static int mc_recoverable_range_len;
0067 
0068 struct device_node *opal_node;
0069 static DEFINE_SPINLOCK(opal_write_lock);
0070 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
0071 static uint32_t opal_heartbeat;
0072 static struct task_struct *kopald_tsk;
0073 static struct opal_msg *opal_msg;
0074 static u32 opal_msg_size __ro_after_init;
0075 
0076 void __init opal_configure_cores(void)
0077 {
0078     u64 reinit_flags = 0;
0079 
0080     /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
0081      *
0082      * It will preserve non volatile GPRs and HSPRG0/1. It will
0083      * also restore HIDs and other SPRs to their original value
0084      * but it might clobber a bunch.
0085      */
0086 #ifdef __BIG_ENDIAN__
0087     reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
0088 #else
0089     reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
0090 #endif
0091 
0092     /*
0093      * POWER9 always support running hash:
0094      *  ie. Host hash  supports  hash guests
0095      *      Host radix supports  hash/radix guests
0096      */
0097     if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
0098         reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
0099         if (early_radix_enabled())
0100             reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
0101     }
0102 
0103     opal_reinit_cpus(reinit_flags);
0104 
0105     /* Restore some bits */
0106     if (cur_cpu_spec->cpu_restore)
0107         cur_cpu_spec->cpu_restore();
0108 }
0109 
0110 int __init early_init_dt_scan_opal(unsigned long node,
0111                    const char *uname, int depth, void *data)
0112 {
0113     const void *basep, *entryp, *sizep;
0114     int basesz, entrysz, runtimesz;
0115 
0116     if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
0117         return 0;
0118 
0119     basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
0120     entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
0121     sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
0122 
0123     if (!basep || !entryp || !sizep)
0124         return 1;
0125 
0126     opal.base = of_read_number(basep, basesz/4);
0127     opal.entry = of_read_number(entryp, entrysz/4);
0128     opal.size = of_read_number(sizep, runtimesz/4);
0129 
0130     pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
0131          opal.base, basep, basesz);
0132     pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
0133          opal.entry, entryp, entrysz);
0134     pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
0135          opal.size, sizep, runtimesz);
0136 
0137     if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
0138         powerpc_firmware_features |= FW_FEATURE_OPAL;
0139         pr_debug("OPAL detected !\n");
0140     } else {
0141         panic("OPAL != V3 detected, no longer supported.\n");
0142     }
0143 
0144     return 1;
0145 }
0146 
0147 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
0148                    const char *uname, int depth, void *data)
0149 {
0150     int i, psize, size;
0151     const __be32 *prop;
0152 
0153     if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
0154         return 0;
0155 
0156     prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
0157 
0158     if (!prop)
0159         return 1;
0160 
0161     pr_debug("Found machine check recoverable ranges.\n");
0162 
0163     /*
0164      * Calculate number of available entries.
0165      *
0166      * Each recoverable address range entry is (start address, len,
0167      * recovery address), 2 cells each for start and recovery address,
0168      * 1 cell for len, totalling 5 cells per entry.
0169      */
0170     mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
0171 
0172     /* Sanity check */
0173     if (!mc_recoverable_range_len)
0174         return 1;
0175 
0176     /* Size required to hold all the entries. */
0177     size = mc_recoverable_range_len *
0178             sizeof(struct mcheck_recoverable_range);
0179 
0180     /*
0181      * Allocate a buffer to hold the MC recoverable ranges.
0182      */
0183     mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
0184     if (!mc_recoverable_range)
0185         panic("%s: Failed to allocate %u bytes align=0x%lx\n",
0186               __func__, size, __alignof__(u64));
0187 
0188     for (i = 0; i < mc_recoverable_range_len; i++) {
0189         mc_recoverable_range[i].start_addr =
0190                     of_read_number(prop + (i * 5) + 0, 2);
0191         mc_recoverable_range[i].end_addr =
0192                     mc_recoverable_range[i].start_addr +
0193                     of_read_number(prop + (i * 5) + 2, 1);
0194         mc_recoverable_range[i].recover_addr =
0195                     of_read_number(prop + (i * 5) + 3, 2);
0196 
0197         pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
0198                 mc_recoverable_range[i].start_addr,
0199                 mc_recoverable_range[i].end_addr,
0200                 mc_recoverable_range[i].recover_addr);
0201     }
0202     return 1;
0203 }
0204 
0205 static int __init opal_register_exception_handlers(void)
0206 {
0207 #ifdef __BIG_ENDIAN__
0208     u64 glue;
0209 
0210     if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
0211         return -ENODEV;
0212 
0213     /* Hookup some exception handlers except machine check. We use the
0214      * fwnmi area at 0x7000 to provide the glue space to OPAL
0215      */
0216     glue = 0x7000;
0217 
0218     /*
0219      * Only ancient OPAL firmware requires this.
0220      * Specifically, firmware from FW810.00 (released June 2014)
0221      * through FW810.20 (Released October 2014).
0222      *
0223      * Check if we are running on newer (post Oct 2014) firmware that
0224      * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to
0225      * patch the HMI interrupt and we catch it directly in Linux.
0226      *
0227      * For older firmware (i.e < FW810.20), we fallback to old behavior and
0228      * let OPAL patch the HMI vector and handle it inside OPAL firmware.
0229      *
0230      * For newer firmware we catch/handle the HMI directly in Linux.
0231      */
0232     if (!opal_check_token(OPAL_HANDLE_HMI)) {
0233         pr_info("Old firmware detected, OPAL handles HMIs.\n");
0234         opal_register_exception_handler(
0235                 OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
0236                 0, glue);
0237         glue += 128;
0238     }
0239 
0240     /*
0241      * Only applicable to ancient firmware, all modern
0242      * (post March 2015/skiboot 5.0) firmware will just return
0243      * OPAL_UNSUPPORTED.
0244      */
0245     opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
0246 #endif
0247 
0248     return 0;
0249 }
0250 machine_early_initcall(powernv, opal_register_exception_handlers);
0251 
0252 static void queue_replay_msg(void *msg)
0253 {
0254     struct opal_msg_node *msg_node;
0255 
0256     if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
0257         msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
0258         if (msg_node) {
0259             INIT_LIST_HEAD(&msg_node->list);
0260             memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
0261             list_add_tail(&msg_node->list, &msg_list);
0262             msg_list_size++;
0263         } else
0264             pr_warn_once("message queue no memory\n");
0265 
0266         if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
0267             pr_warn_once("message queue full\n");
0268     }
0269 }
0270 
0271 static void dequeue_replay_msg(enum opal_msg_type msg_type)
0272 {
0273     struct opal_msg_node *msg_node, *tmp;
0274 
0275     list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
0276         if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
0277             continue;
0278 
0279         atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
0280                     msg_type,
0281                     &msg_node->msg);
0282 
0283         list_del(&msg_node->list);
0284         kfree(msg_node);
0285         msg_list_size--;
0286     }
0287 }
0288 
0289 /*
0290  * Opal message notifier based on message type. Allow subscribers to get
0291  * notified for specific messgae type.
0292  */
0293 int opal_message_notifier_register(enum opal_msg_type msg_type,
0294                     struct notifier_block *nb)
0295 {
0296     int ret;
0297     unsigned long flags;
0298 
0299     if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
0300         pr_warn("%s: Invalid arguments, msg_type:%d\n",
0301             __func__, msg_type);
0302         return -EINVAL;
0303     }
0304 
0305     spin_lock_irqsave(&msg_list_lock, flags);
0306     ret = atomic_notifier_chain_register(
0307         &opal_msg_notifier_head[msg_type], nb);
0308 
0309     /*
0310      * If the registration succeeded, replay any queued messages that came
0311      * in prior to the notifier chain registration. msg_list_lock held here
0312      * to ensure they're delivered prior to any subsequent messages.
0313      */
0314     if (ret == 0)
0315         dequeue_replay_msg(msg_type);
0316 
0317     spin_unlock_irqrestore(&msg_list_lock, flags);
0318 
0319     return ret;
0320 }
0321 EXPORT_SYMBOL_GPL(opal_message_notifier_register);
0322 
0323 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
0324                      struct notifier_block *nb)
0325 {
0326     return atomic_notifier_chain_unregister(
0327             &opal_msg_notifier_head[msg_type], nb);
0328 }
0329 EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
0330 
0331 static void opal_message_do_notify(uint32_t msg_type, void *msg)
0332 {
0333     unsigned long flags;
0334     bool queued = false;
0335 
0336     spin_lock_irqsave(&msg_list_lock, flags);
0337     if (opal_msg_notifier_head[msg_type].head == NULL) {
0338         /*
0339          * Queue up the msg since no notifiers have registered
0340          * yet for this msg_type.
0341          */
0342         queue_replay_msg(msg);
0343         queued = true;
0344     }
0345     spin_unlock_irqrestore(&msg_list_lock, flags);
0346 
0347     if (queued)
0348         return;
0349 
0350     /* notify subscribers */
0351     atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
0352                     msg_type, msg);
0353 }
0354 
0355 static void opal_handle_message(void)
0356 {
0357     s64 ret;
0358     u32 type;
0359 
0360     ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
0361     /* No opal message pending. */
0362     if (ret == OPAL_RESOURCE)
0363         return;
0364 
0365     /* check for errors. */
0366     if (ret) {
0367         pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
0368             __func__, ret);
0369         return;
0370     }
0371 
0372     type = be32_to_cpu(opal_msg->msg_type);
0373 
0374     /* Sanity check */
0375     if (type >= OPAL_MSG_TYPE_MAX) {
0376         pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
0377         return;
0378     }
0379     opal_message_do_notify(type, (void *)opal_msg);
0380 }
0381 
0382 static irqreturn_t opal_message_notify(int irq, void *data)
0383 {
0384     opal_handle_message();
0385     return IRQ_HANDLED;
0386 }
0387 
0388 static int __init opal_message_init(struct device_node *opal_node)
0389 {
0390     int ret, i, irq;
0391 
0392     ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
0393     if (ret) {
0394         pr_notice("Failed to read opal-msg-size property\n");
0395         opal_msg_size = sizeof(struct opal_msg);
0396     }
0397 
0398     opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
0399     if (!opal_msg) {
0400         opal_msg_size = sizeof(struct opal_msg);
0401         /* Try to allocate fixed message size */
0402         opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
0403         BUG_ON(opal_msg == NULL);
0404     }
0405 
0406     for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
0407         ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
0408 
0409     irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
0410     if (!irq) {
0411         pr_err("%s: Can't register OPAL event irq (%d)\n",
0412                __func__, irq);
0413         return irq;
0414     }
0415 
0416     ret = request_irq(irq, opal_message_notify,
0417             IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
0418     if (ret) {
0419         pr_err("%s: Can't request OPAL event irq (%d)\n",
0420                __func__, ret);
0421         return ret;
0422     }
0423 
0424     return 0;
0425 }
0426 
0427 int opal_get_chars(uint32_t vtermno, char *buf, int count)
0428 {
0429     s64 rc;
0430     __be64 evt, len;
0431 
0432     if (!opal.entry)
0433         return -ENODEV;
0434     opal_poll_events(&evt);
0435     if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
0436         return 0;
0437     len = cpu_to_be64(count);
0438     rc = opal_console_read(vtermno, &len, buf);
0439     if (rc == OPAL_SUCCESS)
0440         return be64_to_cpu(len);
0441     return 0;
0442 }
0443 
0444 static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
0445 {
0446     unsigned long flags = 0 /* shut up gcc */;
0447     int written;
0448     __be64 olen;
0449     s64 rc;
0450 
0451     if (!opal.entry)
0452         return -ENODEV;
0453 
0454     if (atomic)
0455         spin_lock_irqsave(&opal_write_lock, flags);
0456     rc = opal_console_write_buffer_space(vtermno, &olen);
0457     if (rc || be64_to_cpu(olen) < total_len) {
0458         /* Closed -> drop characters */
0459         if (rc)
0460             written = total_len;
0461         else
0462             written = -EAGAIN;
0463         goto out;
0464     }
0465 
0466     /* Should not get a partial write here because space is available. */
0467     olen = cpu_to_be64(total_len);
0468     rc = opal_console_write(vtermno, &olen, data);
0469     if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
0470         if (rc == OPAL_BUSY_EVENT)
0471             opal_poll_events(NULL);
0472         written = -EAGAIN;
0473         goto out;
0474     }
0475 
0476     /* Closed or other error drop */
0477     if (rc != OPAL_SUCCESS) {
0478         written = opal_error_code(rc);
0479         goto out;
0480     }
0481 
0482     written = be64_to_cpu(olen);
0483     if (written < total_len) {
0484         if (atomic) {
0485             /* Should not happen */
0486             pr_warn("atomic console write returned partial "
0487                 "len=%d written=%d\n", total_len, written);
0488         }
0489         if (!written)
0490             written = -EAGAIN;
0491     }
0492 
0493 out:
0494     if (atomic)
0495         spin_unlock_irqrestore(&opal_write_lock, flags);
0496 
0497     return written;
0498 }
0499 
0500 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
0501 {
0502     return __opal_put_chars(vtermno, data, total_len, false);
0503 }
0504 
0505 /*
0506  * opal_put_chars_atomic will not perform partial-writes. Data will be
0507  * atomically written to the terminal or not at all. This is not strictly
0508  * true at the moment because console space can race with OPAL's console
0509  * writes.
0510  */
0511 int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
0512 {
0513     return __opal_put_chars(vtermno, data, total_len, true);
0514 }
0515 
0516 static s64 __opal_flush_console(uint32_t vtermno)
0517 {
0518     s64 rc;
0519 
0520     if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
0521         __be64 evt;
0522 
0523         /*
0524          * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
0525          * the console can still be flushed by calling the polling
0526          * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
0527          */
0528         WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
0529 
0530         opal_poll_events(&evt);
0531         if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
0532             return OPAL_SUCCESS;
0533         return OPAL_BUSY;
0534 
0535     } else {
0536         rc = opal_console_flush(vtermno);
0537         if (rc == OPAL_BUSY_EVENT) {
0538             opal_poll_events(NULL);
0539             rc = OPAL_BUSY;
0540         }
0541         return rc;
0542     }
0543 
0544 }
0545 
0546 /*
0547  * opal_flush_console spins until the console is flushed
0548  */
0549 int opal_flush_console(uint32_t vtermno)
0550 {
0551     for (;;) {
0552         s64 rc = __opal_flush_console(vtermno);
0553 
0554         if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
0555             mdelay(1);
0556             continue;
0557         }
0558 
0559         return opal_error_code(rc);
0560     }
0561 }
0562 
0563 /*
0564  * opal_flush_chars is an hvc interface that sleeps until the console is
0565  * flushed if wait, otherwise it will return -EBUSY if the console has data,
0566  * -EAGAIN if it has data and some of it was flushed.
0567  */
0568 int opal_flush_chars(uint32_t vtermno, bool wait)
0569 {
0570     for (;;) {
0571         s64 rc = __opal_flush_console(vtermno);
0572 
0573         if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
0574             if (wait) {
0575                 msleep(OPAL_BUSY_DELAY_MS);
0576                 continue;
0577             }
0578             if (rc == OPAL_PARTIAL)
0579                 return -EAGAIN;
0580         }
0581 
0582         return opal_error_code(rc);
0583     }
0584 }
0585 
0586 static int opal_recover_mce(struct pt_regs *regs,
0587                     struct machine_check_event *evt)
0588 {
0589     int recovered = 0;
0590 
0591     if (regs_is_unrecoverable(regs)) {
0592         /* If MSR_RI isn't set, we cannot recover */
0593         pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
0594         recovered = 0;
0595     } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
0596         /* Platform corrected itself */
0597         recovered = 1;
0598     } else if (evt->severity == MCE_SEV_FATAL) {
0599         /* Fatal machine check */
0600         pr_err("Machine check interrupt is fatal\n");
0601         recovered = 0;
0602     }
0603 
0604     if (!recovered && evt->sync_error) {
0605         /*
0606          * Try to kill processes if we get a synchronous machine check
0607          * (e.g., one caused by execution of this instruction). This
0608          * will devolve into a panic if we try to kill init or are in
0609          * an interrupt etc.
0610          *
0611          * TODO: Queue up this address for hwpoisioning later.
0612          * TODO: This is not quite right for d-side machine
0613          *       checks ->nip is not necessarily the important
0614          *       address.
0615          */
0616         if ((user_mode(regs))) {
0617             _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
0618             recovered = 1;
0619         } else if (die_will_crash()) {
0620             /*
0621              * die() would kill the kernel, so better to go via
0622              * the platform reboot code that will log the
0623              * machine check.
0624              */
0625             recovered = 0;
0626         } else {
0627             die_mce("Machine check", regs, SIGBUS);
0628             recovered = 1;
0629         }
0630     }
0631 
0632     return recovered;
0633 }
0634 
0635 void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
0636 {
0637     panic_flush_kmsg_start();
0638 
0639     pr_emerg("Hardware platform error: %s\n", msg);
0640     if (regs)
0641         show_regs(regs);
0642     smp_send_stop();
0643 
0644     panic_flush_kmsg_end();
0645 
0646     /*
0647      * Don't bother to shut things down because this will
0648      * xstop the system.
0649      */
0650     if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
0651                         == OPAL_UNSUPPORTED) {
0652         pr_emerg("Reboot type %d not supported for %s\n",
0653                 OPAL_REBOOT_PLATFORM_ERROR, msg);
0654     }
0655 
0656     /*
0657      * We reached here. There can be three possibilities:
0658      * 1. We are running on a firmware level that do not support
0659      *    opal_cec_reboot2()
0660      * 2. We are running on a firmware level that do not support
0661      *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
0662      * 3. We are running on FSP based system that does not need
0663      *    opal to trigger checkstop explicitly for error analysis.
0664      *    The FSP PRD component would have already got notified
0665      *    about this error through other channels.
0666      * 4. We are running on a newer skiboot that by default does
0667      *    not cause a checkstop, drops us back to the kernel to
0668      *    extract context and state at the time of the error.
0669      */
0670 
0671     panic(msg);
0672 }
0673 
0674 int opal_machine_check(struct pt_regs *regs)
0675 {
0676     struct machine_check_event evt;
0677 
0678     if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
0679         return 0;
0680 
0681     /* Print things out */
0682     if (evt.version != MCE_V1) {
0683         pr_err("Machine Check Exception, Unknown event version %d !\n",
0684                evt.version);
0685         return 0;
0686     }
0687     machine_check_print_event_info(&evt, user_mode(regs), false);
0688 
0689     if (opal_recover_mce(regs, &evt))
0690         return 1;
0691 
0692     pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
0693 }
0694 
0695 /* Early hmi handler called in real mode. */
0696 int opal_hmi_exception_early(struct pt_regs *regs)
0697 {
0698     s64 rc;
0699 
0700     /*
0701      * call opal hmi handler. Pass paca address as token.
0702      * The return value OPAL_SUCCESS is an indication that there is
0703      * an HMI event generated waiting to pull by Linux.
0704      */
0705     rc = opal_handle_hmi();
0706     if (rc == OPAL_SUCCESS) {
0707         local_paca->hmi_event_available = 1;
0708         return 1;
0709     }
0710     return 0;
0711 }
0712 
0713 int opal_hmi_exception_early2(struct pt_regs *regs)
0714 {
0715     s64 rc;
0716     __be64 out_flags;
0717 
0718     /*
0719      * call opal hmi handler.
0720      * Check 64-bit flag mask to find out if an event was generated,
0721      * and whether TB is still valid or not etc.
0722      */
0723     rc = opal_handle_hmi2(&out_flags);
0724     if (rc != OPAL_SUCCESS)
0725         return 0;
0726 
0727     if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
0728         local_paca->hmi_event_available = 1;
0729     if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
0730         tb_invalid = true;
0731     return 1;
0732 }
0733 
0734 /* HMI exception handler called in virtual mode when irqs are next enabled. */
0735 int opal_handle_hmi_exception(struct pt_regs *regs)
0736 {
0737     /*
0738      * Check if HMI event is available.
0739      * if Yes, then wake kopald to process them.
0740      */
0741     if (!local_paca->hmi_event_available)
0742         return 0;
0743 
0744     local_paca->hmi_event_available = 0;
0745     opal_wake_poller();
0746 
0747     return 1;
0748 }
0749 
0750 static uint64_t find_recovery_address(uint64_t nip)
0751 {
0752     int i;
0753 
0754     for (i = 0; i < mc_recoverable_range_len; i++)
0755         if ((nip >= mc_recoverable_range[i].start_addr) &&
0756             (nip < mc_recoverable_range[i].end_addr))
0757             return mc_recoverable_range[i].recover_addr;
0758     return 0;
0759 }
0760 
0761 bool opal_mce_check_early_recovery(struct pt_regs *regs)
0762 {
0763     uint64_t recover_addr = 0;
0764 
0765     if (!opal.base || !opal.size)
0766         goto out;
0767 
0768     if ((regs->nip >= opal.base) &&
0769             (regs->nip < (opal.base + opal.size)))
0770         recover_addr = find_recovery_address(regs->nip);
0771 
0772     /*
0773      * Setup regs->nip to rfi into fixup address.
0774      */
0775     if (recover_addr)
0776         regs_set_return_ip(regs, recover_addr);
0777 
0778 out:
0779     return !!recover_addr;
0780 }
0781 
0782 static int __init opal_sysfs_init(void)
0783 {
0784     opal_kobj = kobject_create_and_add("opal", firmware_kobj);
0785     if (!opal_kobj) {
0786         pr_warn("kobject_create_and_add opal failed\n");
0787         return -ENOMEM;
0788     }
0789 
0790     return 0;
0791 }
0792 
0793 static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
0794                 struct bin_attribute *bin_attr, char *buf,
0795                 loff_t off, size_t count)
0796 {
0797     return memory_read_from_buffer(buf, count, &off, bin_attr->private,
0798                        bin_attr->size);
0799 }
0800 
0801 static int opal_add_one_export(struct kobject *parent, const char *export_name,
0802                    struct device_node *np, const char *prop_name)
0803 {
0804     struct bin_attribute *attr = NULL;
0805     const char *name = NULL;
0806     u64 vals[2];
0807     int rc;
0808 
0809     rc = of_property_read_u64_array(np, prop_name, &vals[0], 2);
0810     if (rc)
0811         goto out;
0812 
0813     attr = kzalloc(sizeof(*attr), GFP_KERNEL);
0814     if (!attr) {
0815         rc = -ENOMEM;
0816         goto out;
0817     }
0818     name = kstrdup(export_name, GFP_KERNEL);
0819     if (!name) {
0820         rc = -ENOMEM;
0821         goto out;
0822     }
0823 
0824     sysfs_bin_attr_init(attr);
0825     attr->attr.name = name;
0826     attr->attr.mode = 0400;
0827     attr->read = export_attr_read;
0828     attr->private = __va(vals[0]);
0829     attr->size = vals[1];
0830 
0831     rc = sysfs_create_bin_file(parent, attr);
0832 out:
0833     if (rc) {
0834         kfree(name);
0835         kfree(attr);
0836     }
0837 
0838     return rc;
0839 }
0840 
0841 static void opal_add_exported_attrs(struct device_node *np,
0842                     struct kobject *kobj)
0843 {
0844     struct device_node *child;
0845     struct property *prop;
0846 
0847     for_each_property_of_node(np, prop) {
0848         int rc;
0849 
0850         if (!strcmp(prop->name, "name") ||
0851             !strcmp(prop->name, "phandle"))
0852             continue;
0853 
0854         rc = opal_add_one_export(kobj, prop->name, np, prop->name);
0855         if (rc) {
0856             pr_warn("Unable to add export %pOF/%s, rc = %d!\n",
0857                 np, prop->name, rc);
0858         }
0859     }
0860 
0861     for_each_child_of_node(np, child) {
0862         struct kobject *child_kobj;
0863 
0864         child_kobj = kobject_create_and_add(child->name, kobj);
0865         if (!child_kobj) {
0866             pr_err("Unable to create export dir for %pOF\n", child);
0867             continue;
0868         }
0869 
0870         opal_add_exported_attrs(child, child_kobj);
0871     }
0872 }
0873 
0874 /*
0875  * opal_export_attrs: creates a sysfs node for each property listed in
0876  * the device-tree under /ibm,opal/firmware/exports/
0877  * All new sysfs nodes are created under /opal/exports/.
0878  * This allows for reserved memory regions (e.g. HDAT) to be read.
0879  * The new sysfs nodes are only readable by root.
0880  */
0881 static void opal_export_attrs(void)
0882 {
0883     struct device_node *np;
0884     struct kobject *kobj;
0885     int rc;
0886 
0887     np = of_find_node_by_path("/ibm,opal/firmware/exports");
0888     if (!np)
0889         return;
0890 
0891     /* Create new 'exports' directory - /sys/firmware/opal/exports */
0892     kobj = kobject_create_and_add("exports", opal_kobj);
0893     if (!kobj) {
0894         pr_warn("kobject_create_and_add() of exports failed\n");
0895         return;
0896     }
0897 
0898     opal_add_exported_attrs(np, kobj);
0899 
0900     /*
0901      * NB: symbol_map existed before the generic export interface so it
0902      * lives under the top level opal_kobj.
0903      */
0904     rc = opal_add_one_export(opal_kobj, "symbol_map",
0905                  np->parent, "symbol-map");
0906     if (rc)
0907         pr_warn("Error %d creating OPAL symbols file\n", rc);
0908 
0909     of_node_put(np);
0910 }
0911 
0912 static void __init opal_dump_region_init(void)
0913 {
0914     void *addr;
0915     uint64_t size;
0916     int rc;
0917 
0918     if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
0919         return;
0920 
0921     /* Register kernel log buffer */
0922     addr = log_buf_addr_get();
0923     if (addr == NULL)
0924         return;
0925 
0926     size = log_buf_len_get();
0927     if (size == 0)
0928         return;
0929 
0930     rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
0931                        __pa(addr), size);
0932     /* Don't warn if this is just an older OPAL that doesn't
0933      * know about that call
0934      */
0935     if (rc && rc != OPAL_UNSUPPORTED)
0936         pr_warn("DUMP: Failed to register kernel log buffer. "
0937             "rc = %d\n", rc);
0938 }
0939 
0940 static void __init opal_pdev_init(const char *compatible)
0941 {
0942     struct device_node *np;
0943 
0944     for_each_compatible_node(np, NULL, compatible)
0945         of_platform_device_create(np, NULL, NULL);
0946 }
0947 
0948 static void __init opal_imc_init_dev(void)
0949 {
0950     struct device_node *np;
0951 
0952     np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
0953     if (np)
0954         of_platform_device_create(np, NULL, NULL);
0955 }
0956 
0957 static int kopald(void *unused)
0958 {
0959     unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
0960 
0961     set_freezable();
0962     do {
0963         try_to_freeze();
0964 
0965         opal_handle_events();
0966 
0967         set_current_state(TASK_INTERRUPTIBLE);
0968         if (opal_have_pending_events())
0969             __set_current_state(TASK_RUNNING);
0970         else
0971             schedule_timeout(timeout);
0972 
0973     } while (!kthread_should_stop());
0974 
0975     return 0;
0976 }
0977 
0978 void opal_wake_poller(void)
0979 {
0980     if (kopald_tsk)
0981         wake_up_process(kopald_tsk);
0982 }
0983 
0984 static void __init opal_init_heartbeat(void)
0985 {
0986     /* Old firwmware, we assume the HVC heartbeat is sufficient */
0987     if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
0988                  &opal_heartbeat) != 0)
0989         opal_heartbeat = 0;
0990 
0991     if (opal_heartbeat)
0992         kopald_tsk = kthread_run(kopald, NULL, "kopald");
0993 }
0994 
0995 static int __init opal_init(void)
0996 {
0997     struct device_node *np, *consoles, *leds;
0998     int rc;
0999 
1000     opal_node = of_find_node_by_path("/ibm,opal");
1001     if (!opal_node) {
1002         pr_warn("Device node not found\n");
1003         return -ENODEV;
1004     }
1005 
1006     /* Register OPAL consoles if any ports */
1007     consoles = of_find_node_by_path("/ibm,opal/consoles");
1008     if (consoles) {
1009         for_each_child_of_node(consoles, np) {
1010             if (!of_node_name_eq(np, "serial"))
1011                 continue;
1012             of_platform_device_create(np, NULL, NULL);
1013         }
1014         of_node_put(consoles);
1015     }
1016 
1017     /* Initialise OPAL messaging system */
1018     opal_message_init(opal_node);
1019 
1020     /* Initialise OPAL asynchronous completion interface */
1021     opal_async_comp_init();
1022 
1023     /* Initialise OPAL sensor interface */
1024     opal_sensor_init();
1025 
1026     /* Initialise OPAL hypervisor maintainence interrupt handling */
1027     opal_hmi_handler_init();
1028 
1029     /* Create i2c platform devices */
1030     opal_pdev_init("ibm,opal-i2c");
1031 
1032     /* Handle non-volatile memory devices */
1033     opal_pdev_init("pmem-region");
1034 
1035     /* Setup a heatbeat thread if requested by OPAL */
1036     opal_init_heartbeat();
1037 
1038     /* Detect In-Memory Collection counters and create devices*/
1039     opal_imc_init_dev();
1040 
1041     /* Create leds platform devices */
1042     leds = of_find_node_by_path("/ibm,opal/leds");
1043     if (leds) {
1044         of_platform_device_create(leds, "opal_leds", NULL);
1045         of_node_put(leds);
1046     }
1047 
1048     /* Initialise OPAL message log interface */
1049     opal_msglog_init();
1050 
1051     /* Create "opal" kobject under /sys/firmware */
1052     rc = opal_sysfs_init();
1053     if (rc == 0) {
1054         /* Setup dump region interface */
1055         opal_dump_region_init();
1056         /* Setup error log interface */
1057         rc = opal_elog_init();
1058         /* Setup code update interface */
1059         opal_flash_update_init();
1060         /* Setup platform dump extract interface */
1061         opal_platform_dump_init();
1062         /* Setup system parameters interface */
1063         opal_sys_param_init();
1064         /* Setup message log sysfs interface. */
1065         opal_msglog_sysfs_init();
1066         /* Add all export properties*/
1067         opal_export_attrs();
1068     }
1069 
1070     /* Initialize platform devices: IPMI backend, PRD & flash interface */
1071     opal_pdev_init("ibm,opal-ipmi");
1072     opal_pdev_init("ibm,opal-flash");
1073     opal_pdev_init("ibm,opal-prd");
1074 
1075     /* Initialise platform device: oppanel interface */
1076     opal_pdev_init("ibm,opal-oppanel");
1077 
1078     /* Initialise OPAL kmsg dumper for flushing console on panic */
1079     opal_kmsg_init();
1080 
1081     /* Initialise OPAL powercap interface */
1082     opal_powercap_init();
1083 
1084     /* Initialise OPAL Power-Shifting-Ratio interface */
1085     opal_psr_init();
1086 
1087     /* Initialise OPAL sensor groups */
1088     opal_sensor_groups_init();
1089 
1090     /* Initialise OPAL Power control interface */
1091     opal_power_control_init();
1092 
1093     /* Initialize OPAL secure variables */
1094     opal_pdev_init("ibm,secvar-backend");
1095 
1096     return 0;
1097 }
1098 machine_subsys_initcall(powernv, opal_init);
1099 
1100 void opal_shutdown(void)
1101 {
1102     long rc = OPAL_BUSY;
1103 
1104     opal_event_shutdown();
1105 
1106     /*
1107      * Then sync with OPAL which ensure anything that can
1108      * potentially write to our memory has completed such
1109      * as an ongoing dump retrieval
1110      */
1111     while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
1112         rc = opal_sync_host_reboot();
1113         if (rc == OPAL_BUSY)
1114             opal_poll_events(NULL);
1115         else
1116             mdelay(10);
1117     }
1118 
1119     /* Unregister memory dump region */
1120     if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
1121         opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
1122 }
1123 
1124 /* Export this so that test modules can use it */
1125 EXPORT_SYMBOL_GPL(opal_invalid_call);
1126 EXPORT_SYMBOL_GPL(opal_xscom_read);
1127 EXPORT_SYMBOL_GPL(opal_xscom_write);
1128 EXPORT_SYMBOL_GPL(opal_ipmi_send);
1129 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
1130 EXPORT_SYMBOL_GPL(opal_flash_read);
1131 EXPORT_SYMBOL_GPL(opal_flash_write);
1132 EXPORT_SYMBOL_GPL(opal_flash_erase);
1133 EXPORT_SYMBOL_GPL(opal_prd_msg);
1134 EXPORT_SYMBOL_GPL(opal_check_token);
1135 
1136 /* Convert a region of vmalloc memory to an opal sg list */
1137 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
1138                          unsigned long vmalloc_size)
1139 {
1140     struct opal_sg_list *sg, *first = NULL;
1141     unsigned long i = 0;
1142 
1143     sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1144     if (!sg)
1145         goto nomem;
1146 
1147     first = sg;
1148 
1149     while (vmalloc_size > 0) {
1150         uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
1151         uint64_t length = min(vmalloc_size, PAGE_SIZE);
1152 
1153         sg->entry[i].data = cpu_to_be64(data);
1154         sg->entry[i].length = cpu_to_be64(length);
1155         i++;
1156 
1157         if (i >= SG_ENTRIES_PER_NODE) {
1158             struct opal_sg_list *next;
1159 
1160             next = kzalloc(PAGE_SIZE, GFP_KERNEL);
1161             if (!next)
1162                 goto nomem;
1163 
1164             sg->length = cpu_to_be64(
1165                     i * sizeof(struct opal_sg_entry) + 16);
1166             i = 0;
1167             sg->next = cpu_to_be64(__pa(next));
1168             sg = next;
1169         }
1170 
1171         vmalloc_addr += length;
1172         vmalloc_size -= length;
1173     }
1174 
1175     sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
1176 
1177     return first;
1178 
1179 nomem:
1180     pr_err("%s : Failed to allocate memory\n", __func__);
1181     opal_free_sg_list(first);
1182     return NULL;
1183 }
1184 
1185 void opal_free_sg_list(struct opal_sg_list *sg)
1186 {
1187     while (sg) {
1188         uint64_t next = be64_to_cpu(sg->next);
1189 
1190         kfree(sg);
1191 
1192         if (next)
1193             sg = __va(next);
1194         else
1195             sg = NULL;
1196     }
1197 }
1198 
1199 int opal_error_code(int rc)
1200 {
1201     switch (rc) {
1202     case OPAL_SUCCESS:      return 0;
1203 
1204     case OPAL_PARAMETER:        return -EINVAL;
1205     case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
1206     case OPAL_BUSY:
1207     case OPAL_BUSY_EVENT:       return -EBUSY;
1208     case OPAL_NO_MEM:       return -ENOMEM;
1209     case OPAL_PERMISSION:       return -EPERM;
1210 
1211     case OPAL_UNSUPPORTED:      return -EIO;
1212     case OPAL_HARDWARE:     return -EIO;
1213     case OPAL_INTERNAL_ERROR:   return -EIO;
1214     case OPAL_TIMEOUT:      return -ETIMEDOUT;
1215     default:
1216         pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
1217         return -EIO;
1218     }
1219 }
1220 
1221 void powernv_set_nmmu_ptcr(unsigned long ptcr)
1222 {
1223     int rc;
1224 
1225     if (firmware_has_feature(FW_FEATURE_OPAL)) {
1226         rc = opal_nmmu_set_ptcr(-1UL, ptcr);
1227         if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
1228             pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
1229     }
1230 }
1231 
1232 EXPORT_SYMBOL_GPL(opal_poll_events);
1233 EXPORT_SYMBOL_GPL(opal_rtc_read);
1234 EXPORT_SYMBOL_GPL(opal_rtc_write);
1235 EXPORT_SYMBOL_GPL(opal_tpo_read);
1236 EXPORT_SYMBOL_GPL(opal_tpo_write);
1237 EXPORT_SYMBOL_GPL(opal_i2c_request);
1238 /* Export these symbols for PowerNV LED class driver */
1239 EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1240 EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1241 /* Export this symbol for PowerNV Operator Panel class driver */
1242 EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1243 /* Export this for KVM */
1244 EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1245 EXPORT_SYMBOL_GPL(opal_int_eoi);
1246 EXPORT_SYMBOL_GPL(opal_error_code);
1247 /* Export the below symbol for NX compression */
1248 EXPORT_SYMBOL(opal_nx_coproc_init);