Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * PowerNV setup code.
0004  *
0005  * Copyright 2011 IBM Corp.
0006  */
0007 
0008 #undef DEBUG
0009 
0010 #include <linux/cpu.h>
0011 #include <linux/errno.h>
0012 #include <linux/sched.h>
0013 #include <linux/kernel.h>
0014 #include <linux/tty.h>
0015 #include <linux/reboot.h>
0016 #include <linux/init.h>
0017 #include <linux/console.h>
0018 #include <linux/delay.h>
0019 #include <linux/irq.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/of.h>
0022 #include <linux/of_fdt.h>
0023 #include <linux/interrupt.h>
0024 #include <linux/bug.h>
0025 #include <linux/pci.h>
0026 #include <linux/cpufreq.h>
0027 #include <linux/memblock.h>
0028 
0029 #include <asm/machdep.h>
0030 #include <asm/firmware.h>
0031 #include <asm/xics.h>
0032 #include <asm/xive.h>
0033 #include <asm/opal.h>
0034 #include <asm/kexec.h>
0035 #include <asm/smp.h>
0036 #include <asm/tm.h>
0037 #include <asm/setup.h>
0038 #include <asm/security_features.h>
0039 
0040 #include "powernv.h"
0041 
0042 
0043 static bool __init fw_feature_is(const char *state, const char *name,
0044               struct device_node *fw_features)
0045 {
0046     struct device_node *np;
0047     bool rc = false;
0048 
0049     np = of_get_child_by_name(fw_features, name);
0050     if (np) {
0051         rc = of_property_read_bool(np, state);
0052         of_node_put(np);
0053     }
0054 
0055     return rc;
0056 }
0057 
0058 static void __init init_fw_feat_flags(struct device_node *np)
0059 {
0060     if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
0061         security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
0062 
0063     if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
0064         security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
0065 
0066     if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
0067         security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
0068 
0069     if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
0070         security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
0071 
0072     if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
0073         security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
0074 
0075     if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
0076         security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
0077 
0078     if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
0079         security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
0080 
0081     if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
0082         security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
0083 
0084     /*
0085      * The features below are enabled by default, so we instead look to see
0086      * if firmware has *disabled* them, and clear them if so.
0087      */
0088     if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
0089         security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
0090 
0091     if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
0092         security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
0093 
0094     if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
0095         security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
0096 
0097     if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
0098         security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
0099 
0100     if (fw_feature_is("enabled", "no-need-l1d-flush-msr-pr-1-to-0", np))
0101         security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
0102 
0103     if (fw_feature_is("enabled", "no-need-l1d-flush-kernel-on-user-access", np))
0104         security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
0105 
0106     if (fw_feature_is("enabled", "no-need-store-drain-on-priv-state-switch", np))
0107         security_ftr_clear(SEC_FTR_STF_BARRIER);
0108 }
0109 
0110 static void __init pnv_setup_security_mitigations(void)
0111 {
0112     struct device_node *np, *fw_features;
0113     enum l1d_flush_type type;
0114     bool enable;
0115 
0116     /* Default to fallback in case fw-features are not available */
0117     type = L1D_FLUSH_FALLBACK;
0118 
0119     np = of_find_node_by_name(NULL, "ibm,opal");
0120     fw_features = of_get_child_by_name(np, "fw-features");
0121     of_node_put(np);
0122 
0123     if (fw_features) {
0124         init_fw_feat_flags(fw_features);
0125         of_node_put(fw_features);
0126 
0127         if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
0128             type = L1D_FLUSH_MTTRIG;
0129 
0130         if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
0131             type = L1D_FLUSH_ORI;
0132     }
0133 
0134     /*
0135      * The issues addressed by the entry and uaccess flush don't affect P7
0136      * or P8, so on bare metal disable them explicitly in case firmware does
0137      * not include the features to disable them. POWER9 and newer processors
0138      * should have the appropriate firmware flags.
0139      */
0140     if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) ||
0141         pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) ||
0142         pvr_version_is(PVR_POWER8)) {
0143         security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
0144         security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
0145     }
0146 
0147     enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
0148          (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
0149           security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
0150 
0151     setup_rfi_flush(type, enable);
0152     setup_count_cache_flush();
0153 
0154     enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
0155          security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
0156     setup_entry_flush(enable);
0157 
0158     enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
0159          security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
0160     setup_uaccess_flush(enable);
0161 
0162     setup_stf_barrier();
0163 }
0164 
0165 static void __init pnv_check_guarded_cores(void)
0166 {
0167     struct device_node *dn;
0168     int bad_count = 0;
0169 
0170     for_each_node_by_type(dn, "cpu") {
0171         if (of_property_match_string(dn, "status", "bad") >= 0)
0172             bad_count++;
0173     }
0174 
0175     if (bad_count) {
0176         printk("  _     _______________\n");
0177         pr_cont(" | |   /               \\\n");
0178         pr_cont(" | |   |    WARNING!   |\n");
0179         pr_cont(" | |   |               |\n");
0180         pr_cont(" | |   | It looks like |\n");
0181         pr_cont(" |_|   |  you have %*d |\n", 3, bad_count);
0182         pr_cont("  _    | guarded cores |\n");
0183         pr_cont(" (_)   \\_______________/\n");
0184     }
0185 }
0186 
0187 static void __init pnv_setup_arch(void)
0188 {
0189     set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
0190 
0191     pnv_setup_security_mitigations();
0192 
0193     /* Initialize SMP */
0194     pnv_smp_init();
0195 
0196     /* Setup RTC and NVRAM callbacks */
0197     if (firmware_has_feature(FW_FEATURE_OPAL))
0198         opal_nvram_init();
0199 
0200     /* Enable NAP mode */
0201     powersave_nap = 1;
0202 
0203     pnv_check_guarded_cores();
0204 
0205     /* XXX PMCS */
0206 
0207     pnv_rng_init();
0208 }
0209 
0210 static void __init pnv_init(void)
0211 {
0212     /*
0213      * Initialize the LPC bus now so that legacy serial
0214      * ports can be found on it
0215      */
0216     opal_lpc_init();
0217 
0218 #ifdef CONFIG_HVC_OPAL
0219     if (firmware_has_feature(FW_FEATURE_OPAL))
0220         hvc_opal_init_early();
0221     else
0222 #endif
0223         add_preferred_console("hvc", 0, NULL);
0224 
0225 #ifdef CONFIG_PPC_64S_HASH_MMU
0226     if (!radix_enabled()) {
0227         size_t size = sizeof(struct slb_entry) * mmu_slb_size;
0228         int i;
0229 
0230         /* Allocate per cpu area to save old slb contents during MCE */
0231         for_each_possible_cpu(i) {
0232             paca_ptrs[i]->mce_faulty_slbs =
0233                     memblock_alloc_node(size,
0234                         __alignof__(struct slb_entry),
0235                         cpu_to_node(i));
0236         }
0237     }
0238 #endif
0239 }
0240 
0241 static void __init pnv_init_IRQ(void)
0242 {
0243     /* Try using a XIVE if available, otherwise use a XICS */
0244     if (!xive_native_init())
0245         xics_init();
0246 
0247     WARN_ON(!ppc_md.get_irq);
0248 }
0249 
0250 static void pnv_show_cpuinfo(struct seq_file *m)
0251 {
0252     struct device_node *root;
0253     const char *model = "";
0254 
0255     root = of_find_node_by_path("/");
0256     if (root)
0257         model = of_get_property(root, "model", NULL);
0258     seq_printf(m, "machine\t\t: PowerNV %s\n", model);
0259     if (firmware_has_feature(FW_FEATURE_OPAL))
0260         seq_printf(m, "firmware\t: OPAL\n");
0261     else
0262         seq_printf(m, "firmware\t: BML\n");
0263     of_node_put(root);
0264     if (radix_enabled())
0265         seq_printf(m, "MMU\t\t: Radix\n");
0266     else
0267         seq_printf(m, "MMU\t\t: Hash\n");
0268 }
0269 
0270 static void pnv_prepare_going_down(void)
0271 {
0272     /*
0273      * Disable all notifiers from OPAL, we can't
0274      * service interrupts anymore anyway
0275      */
0276     opal_event_shutdown();
0277 
0278     /* Print flash update message if one is scheduled. */
0279     opal_flash_update_print_message();
0280 
0281     smp_send_stop();
0282 
0283     hard_irq_disable();
0284 }
0285 
0286 static void  __noreturn pnv_restart(char *cmd)
0287 {
0288     long rc;
0289 
0290     pnv_prepare_going_down();
0291 
0292     do {
0293         if (!cmd || !strlen(cmd))
0294             rc = opal_cec_reboot();
0295         else if (strcmp(cmd, "full") == 0)
0296             rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
0297         else if (strcmp(cmd, "mpipl") == 0)
0298             rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
0299         else if (strcmp(cmd, "error") == 0)
0300             rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
0301         else if (strcmp(cmd, "fast") == 0)
0302             rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL);
0303         else
0304             rc = OPAL_UNSUPPORTED;
0305 
0306         if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
0307             /* Opal is busy wait for some time and retry */
0308             opal_poll_events(NULL);
0309             mdelay(10);
0310 
0311         } else  if (cmd && rc) {
0312             /* Unknown error while issuing reboot */
0313             if (rc == OPAL_UNSUPPORTED)
0314                 pr_err("Unsupported '%s' reboot.\n", cmd);
0315             else
0316                 pr_err("Unable to issue '%s' reboot. Err=%ld\n",
0317                        cmd, rc);
0318             pr_info("Forcing a cec-reboot\n");
0319             cmd = NULL;
0320             rc = OPAL_BUSY;
0321 
0322         } else if (rc != OPAL_SUCCESS) {
0323             /* Unknown error while issuing cec-reboot */
0324             pr_err("Unable to reboot. Err=%ld\n", rc);
0325         }
0326 
0327     } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
0328 
0329     for (;;)
0330         opal_poll_events(NULL);
0331 }
0332 
0333 static void __noreturn pnv_power_off(void)
0334 {
0335     long rc = OPAL_BUSY;
0336 
0337     pnv_prepare_going_down();
0338 
0339     while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
0340         rc = opal_cec_power_down(0);
0341         if (rc == OPAL_BUSY_EVENT)
0342             opal_poll_events(NULL);
0343         else
0344             mdelay(10);
0345     }
0346     for (;;)
0347         opal_poll_events(NULL);
0348 }
0349 
0350 static void __noreturn pnv_halt(void)
0351 {
0352     pnv_power_off();
0353 }
0354 
0355 static void pnv_progress(char *s, unsigned short hex)
0356 {
0357 }
0358 
0359 static void pnv_shutdown(void)
0360 {
0361     /* Let the PCI code clear up IODA tables */
0362     pnv_pci_shutdown();
0363 
0364     /*
0365      * Stop OPAL activity: Unregister all OPAL interrupts so they
0366      * don't fire up while we kexec and make sure all potentially
0367      * DMA'ing ops are complete (such as dump retrieval).
0368      */
0369     opal_shutdown();
0370 }
0371 
0372 #ifdef CONFIG_KEXEC_CORE
0373 static void pnv_kexec_wait_secondaries_down(void)
0374 {
0375     int my_cpu, i, notified = -1;
0376 
0377     my_cpu = get_cpu();
0378 
0379     for_each_online_cpu(i) {
0380         uint8_t status;
0381         int64_t rc, timeout = 1000;
0382 
0383         if (i == my_cpu)
0384             continue;
0385 
0386         for (;;) {
0387             rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
0388                            &status);
0389             if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
0390                 break;
0391             barrier();
0392             if (i != notified) {
0393                 printk(KERN_INFO "kexec: waiting for cpu %d "
0394                        "(physical %d) to enter OPAL\n",
0395                        i, paca_ptrs[i]->hw_cpu_id);
0396                 notified = i;
0397             }
0398 
0399             /*
0400              * On crash secondaries might be unreachable or hung,
0401              * so timeout if we've waited too long
0402              * */
0403             mdelay(1);
0404             if (timeout-- == 0) {
0405                 printk(KERN_ERR "kexec: timed out waiting for "
0406                        "cpu %d (physical %d) to enter OPAL\n",
0407                        i, paca_ptrs[i]->hw_cpu_id);
0408                 break;
0409             }
0410         }
0411     }
0412 }
0413 
0414 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
0415 {
0416     u64 reinit_flags;
0417 
0418     if (xive_enabled())
0419         xive_teardown_cpu();
0420     else
0421         xics_kexec_teardown_cpu(secondary);
0422 
0423     /* On OPAL, we return all CPUs to firmware */
0424     if (!firmware_has_feature(FW_FEATURE_OPAL))
0425         return;
0426 
0427     if (secondary) {
0428         /* Return secondary CPUs to firmware on OPAL v3 */
0429         mb();
0430         get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
0431         mb();
0432 
0433         /* Return the CPU to OPAL */
0434         opal_return_cpu();
0435     } else {
0436         /* Primary waits for the secondaries to have reached OPAL */
0437         pnv_kexec_wait_secondaries_down();
0438 
0439         /* Switch XIVE back to emulation mode */
0440         if (xive_enabled())
0441             xive_shutdown();
0442 
0443         /*
0444          * We might be running as little-endian - now that interrupts
0445          * are disabled, reset the HILE bit to big-endian so we don't
0446          * take interrupts in the wrong endian later
0447          *
0448          * We reinit to enable both radix and hash on P9 to ensure
0449          * the mode used by the next kernel is always supported.
0450          */
0451         reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
0452         if (cpu_has_feature(CPU_FTR_ARCH_300))
0453             reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
0454                 OPAL_REINIT_CPUS_MMU_HASH;
0455         opal_reinit_cpus(reinit_flags);
0456     }
0457 }
0458 #endif /* CONFIG_KEXEC_CORE */
0459 
0460 #ifdef CONFIG_MEMORY_HOTPLUG
0461 static unsigned long pnv_memory_block_size(void)
0462 {
0463     /*
0464      * We map the kernel linear region with 1GB large pages on radix. For
0465      * memory hot unplug to work our memory block size must be at least
0466      * this size.
0467      */
0468     if (radix_enabled())
0469         return radix_mem_block_size;
0470     else
0471         return 256UL * 1024 * 1024;
0472 }
0473 #endif
0474 
0475 static void __init pnv_setup_machdep_opal(void)
0476 {
0477     ppc_md.get_boot_time = opal_get_boot_time;
0478     ppc_md.restart = pnv_restart;
0479     pm_power_off = pnv_power_off;
0480     ppc_md.halt = pnv_halt;
0481     /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
0482     ppc_md.machine_check_exception = opal_machine_check;
0483     ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
0484     if (opal_check_token(OPAL_HANDLE_HMI2))
0485         ppc_md.hmi_exception_early = opal_hmi_exception_early2;
0486     else
0487         ppc_md.hmi_exception_early = opal_hmi_exception_early;
0488     ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
0489 }
0490 
0491 static int __init pnv_probe(void)
0492 {
0493     if (!of_machine_is_compatible("ibm,powernv"))
0494         return 0;
0495 
0496     if (firmware_has_feature(FW_FEATURE_OPAL))
0497         pnv_setup_machdep_opal();
0498 
0499     pr_debug("PowerNV detected !\n");
0500 
0501     pnv_init();
0502 
0503     return 1;
0504 }
0505 
0506 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
0507 void __init pnv_tm_init(void)
0508 {
0509     if (!firmware_has_feature(FW_FEATURE_OPAL) ||
0510         !pvr_version_is(PVR_POWER9) ||
0511         early_cpu_has_feature(CPU_FTR_TM))
0512         return;
0513 
0514     if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
0515         return;
0516 
0517     pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
0518     cur_cpu_spec->cpu_features |= CPU_FTR_TM;
0519     /* Make sure "normal" HTM is off (it should be) */
0520     cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
0521     /* Turn on no suspend mode, and HTM no SC */
0522     cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
0523                         PPC_FEATURE2_HTM_NOSC;
0524     tm_suspend_disabled = true;
0525 }
0526 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
0527 
0528 /*
0529  * Returns the cpu frequency for 'cpu' in Hz. This is used by
0530  * /proc/cpuinfo
0531  */
0532 static unsigned long pnv_get_proc_freq(unsigned int cpu)
0533 {
0534     unsigned long ret_freq;
0535 
0536     ret_freq = cpufreq_get(cpu) * 1000ul;
0537 
0538     /*
0539      * If the backend cpufreq driver does not exist,
0540          * then fallback to old way of reporting the clockrate.
0541      */
0542     if (!ret_freq)
0543         ret_freq = ppc_proc_freq;
0544     return ret_freq;
0545 }
0546 
0547 static long pnv_machine_check_early(struct pt_regs *regs)
0548 {
0549     long handled = 0;
0550 
0551     if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
0552         handled = cur_cpu_spec->machine_check_early(regs);
0553 
0554     return handled;
0555 }
0556 
0557 define_machine(powernv) {
0558     .name           = "PowerNV",
0559     .probe          = pnv_probe,
0560     .setup_arch     = pnv_setup_arch,
0561     .init_IRQ       = pnv_init_IRQ,
0562     .show_cpuinfo       = pnv_show_cpuinfo,
0563     .get_proc_freq          = pnv_get_proc_freq,
0564     .discover_phbs      = pnv_pci_init,
0565     .progress       = pnv_progress,
0566     .machine_shutdown   = pnv_shutdown,
0567     .power_save             = NULL,
0568     .calibrate_decr     = generic_calibrate_decr,
0569     .machine_check_early    = pnv_machine_check_early,
0570 #ifdef CONFIG_KEXEC_CORE
0571     .kexec_cpu_down     = pnv_kexec_cpu_down,
0572 #endif
0573 #ifdef CONFIG_MEMORY_HOTPLUG
0574     .memory_block_size  = pnv_memory_block_size,
0575 #endif
0576 };