Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * PowerNV cpuidle code
0004  *
0005  * Copyright 2015 IBM Corp.
0006  */
0007 
0008 #include <linux/types.h>
0009 #include <linux/mm.h>
0010 #include <linux/slab.h>
0011 #include <linux/of.h>
0012 #include <linux/device.h>
0013 #include <linux/cpu.h>
0014 
0015 #include <asm/firmware.h>
0016 #include <asm/interrupt.h>
0017 #include <asm/machdep.h>
0018 #include <asm/opal.h>
0019 #include <asm/cputhreads.h>
0020 #include <asm/cpuidle.h>
0021 #include <asm/code-patching.h>
0022 #include <asm/smp.h>
0023 #include <asm/runlatch.h>
0024 #include <asm/dbell.h>
0025 
0026 #include "powernv.h"
0027 #include "subcore.h"
0028 
0029 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
0030 #define MAX_STOP_STATE  0xF
0031 
0032 #define P9_STOP_SPR_MSR 2000
0033 #define P9_STOP_SPR_PSSCR      855
0034 
0035 static u32 supported_cpuidle_states;
0036 struct pnv_idle_states_t *pnv_idle_states;
0037 int nr_pnv_idle_states;
0038 
0039 /*
0040  * The default stop state that will be used by ppc_md.power_save
0041  * function on platforms that support stop instruction.
0042  */
0043 static u64 pnv_default_stop_val;
0044 static u64 pnv_default_stop_mask;
0045 static bool default_stop_found;
0046 
0047 /*
0048  * First stop state levels when SPR and TB loss can occur.
0049  */
0050 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
0051 static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
0052 
0053 /*
0054  * psscr value and mask of the deepest stop idle state.
0055  * Used when a cpu is offlined.
0056  */
0057 static u64 pnv_deepest_stop_psscr_val;
0058 static u64 pnv_deepest_stop_psscr_mask;
0059 static u64 pnv_deepest_stop_flag;
0060 static bool deepest_stop_found;
0061 
0062 static unsigned long power7_offline_type;
0063 
0064 static int __init pnv_save_sprs_for_deep_states(void)
0065 {
0066     int cpu;
0067     int rc;
0068 
0069     /*
0070      * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
0071      * all cpus at boot. Get these reg values of current cpu and use the
0072      * same across all cpus.
0073      */
0074     uint64_t lpcr_val   = mfspr(SPRN_LPCR);
0075     uint64_t hid0_val   = mfspr(SPRN_HID0);
0076     uint64_t hmeer_val  = mfspr(SPRN_HMEER);
0077     uint64_t msr_val = MSR_IDLE;
0078     uint64_t psscr_val = pnv_deepest_stop_psscr_val;
0079 
0080     for_each_present_cpu(cpu) {
0081         uint64_t pir = get_hard_smp_processor_id(cpu);
0082         uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
0083 
0084         rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
0085         if (rc != 0)
0086             return rc;
0087 
0088         rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
0089         if (rc != 0)
0090             return rc;
0091 
0092         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0093             rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
0094             if (rc)
0095                 return rc;
0096 
0097             rc = opal_slw_set_reg(pir,
0098                           P9_STOP_SPR_PSSCR, psscr_val);
0099 
0100             if (rc)
0101                 return rc;
0102         }
0103 
0104         /* HIDs are per core registers */
0105         if (cpu_thread_in_core(cpu) == 0) {
0106 
0107             rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
0108             if (rc != 0)
0109                 return rc;
0110 
0111             rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
0112             if (rc != 0)
0113                 return rc;
0114 
0115             /* Only p8 needs to set extra HID registers */
0116             if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
0117                 uint64_t hid1_val = mfspr(SPRN_HID1);
0118                 uint64_t hid4_val = mfspr(SPRN_HID4);
0119                 uint64_t hid5_val = mfspr(SPRN_HID5);
0120 
0121                 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
0122                 if (rc != 0)
0123                     return rc;
0124 
0125                 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
0126                 if (rc != 0)
0127                     return rc;
0128 
0129                 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
0130                 if (rc != 0)
0131                     return rc;
0132             }
0133         }
0134     }
0135 
0136     return 0;
0137 }
0138 
0139 u32 pnv_get_supported_cpuidle_states(void)
0140 {
0141     return supported_cpuidle_states;
0142 }
0143 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
0144 
0145 static void pnv_fastsleep_workaround_apply(void *info)
0146 
0147 {
0148     int cpu = smp_processor_id();
0149     int rc;
0150     int *err = info;
0151 
0152     if (cpu_first_thread_sibling(cpu) != cpu)
0153         return;
0154 
0155     rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
0156                     OPAL_CONFIG_IDLE_APPLY);
0157     if (rc)
0158         *err = 1;
0159 }
0160 
0161 static bool power7_fastsleep_workaround_entry = true;
0162 static bool power7_fastsleep_workaround_exit = true;
0163 
0164 /*
0165  * Used to store fastsleep workaround state
0166  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
0167  * 1 - Workaround applied once, never undone.
0168  */
0169 static u8 fastsleep_workaround_applyonce;
0170 
0171 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
0172         struct device_attribute *attr, char *buf)
0173 {
0174     return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
0175 }
0176 
0177 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
0178         struct device_attribute *attr, const char *buf,
0179         size_t count)
0180 {
0181     int err;
0182     u8 val;
0183 
0184     if (kstrtou8(buf, 0, &val) || val != 1)
0185         return -EINVAL;
0186 
0187     if (fastsleep_workaround_applyonce == 1)
0188         return count;
0189 
0190     /*
0191      * fastsleep_workaround_applyonce = 1 implies
0192      * fastsleep workaround needs to be left in 'applied' state on all
0193      * the cores. Do this by-
0194      * 1. Disable the 'undo' workaround in fastsleep exit path
0195      * 2. Sendi IPIs to all the cores which have at least one online thread
0196      * 3. Disable the 'apply' workaround in fastsleep entry path
0197      *
0198      * There is no need to send ipi to cores which have all threads
0199      * offlined, as last thread of the core entering fastsleep or deeper
0200      * state would have applied workaround.
0201      */
0202     power7_fastsleep_workaround_exit = false;
0203 
0204     cpus_read_lock();
0205     on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
0206     cpus_read_unlock();
0207     if (err) {
0208         pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
0209         goto fail;
0210     }
0211 
0212     power7_fastsleep_workaround_entry = false;
0213 
0214     fastsleep_workaround_applyonce = 1;
0215 
0216     return count;
0217 fail:
0218     return -EIO;
0219 }
0220 
0221 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
0222             show_fastsleep_workaround_applyonce,
0223             store_fastsleep_workaround_applyonce);
0224 
0225 static inline void atomic_start_thread_idle(void)
0226 {
0227     int cpu = raw_smp_processor_id();
0228     int first = cpu_first_thread_sibling(cpu);
0229     int thread_nr = cpu_thread_in_core(cpu);
0230     unsigned long *state = &paca_ptrs[first]->idle_state;
0231 
0232     clear_bit(thread_nr, state);
0233 }
0234 
0235 static inline void atomic_stop_thread_idle(void)
0236 {
0237     int cpu = raw_smp_processor_id();
0238     int first = cpu_first_thread_sibling(cpu);
0239     int thread_nr = cpu_thread_in_core(cpu);
0240     unsigned long *state = &paca_ptrs[first]->idle_state;
0241 
0242     set_bit(thread_nr, state);
0243 }
0244 
0245 static inline void atomic_lock_thread_idle(void)
0246 {
0247     int cpu = raw_smp_processor_id();
0248     int first = cpu_first_thread_sibling(cpu);
0249     unsigned long *state = &paca_ptrs[first]->idle_state;
0250 
0251     while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
0252         barrier();
0253 }
0254 
0255 static inline void atomic_unlock_and_stop_thread_idle(void)
0256 {
0257     int cpu = raw_smp_processor_id();
0258     int first = cpu_first_thread_sibling(cpu);
0259     unsigned long thread = 1UL << cpu_thread_in_core(cpu);
0260     unsigned long *state = &paca_ptrs[first]->idle_state;
0261     u64 s = READ_ONCE(*state);
0262     u64 new, tmp;
0263 
0264     BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
0265     BUG_ON(s & thread);
0266 
0267 again:
0268     new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
0269     tmp = cmpxchg(state, s, new);
0270     if (unlikely(tmp != s)) {
0271         s = tmp;
0272         goto again;
0273     }
0274 }
0275 
0276 static inline void atomic_unlock_thread_idle(void)
0277 {
0278     int cpu = raw_smp_processor_id();
0279     int first = cpu_first_thread_sibling(cpu);
0280     unsigned long *state = &paca_ptrs[first]->idle_state;
0281 
0282     BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
0283     clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
0284 }
0285 
0286 /* P7 and P8 */
0287 struct p7_sprs {
0288     /* per core */
0289     u64 tscr;
0290     u64 worc;
0291 
0292     /* per subcore */
0293     u64 sdr1;
0294     u64 rpr;
0295 
0296     /* per thread */
0297     u64 lpcr;
0298     u64 hfscr;
0299     u64 fscr;
0300     u64 purr;
0301     u64 spurr;
0302     u64 dscr;
0303     u64 wort;
0304 
0305     /* per thread SPRs that get lost in shallow states */
0306     u64 amr;
0307     u64 iamr;
0308     u64 uamor;
0309     /* amor is restored to constant ~0 */
0310 };
0311 
0312 static unsigned long power7_idle_insn(unsigned long type)
0313 {
0314     int cpu = raw_smp_processor_id();
0315     int first = cpu_first_thread_sibling(cpu);
0316     unsigned long *state = &paca_ptrs[first]->idle_state;
0317     unsigned long thread = 1UL << cpu_thread_in_core(cpu);
0318     unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
0319     unsigned long srr1;
0320     bool full_winkle;
0321     struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
0322     bool sprs_saved = false;
0323     int rc;
0324 
0325     if (unlikely(type != PNV_THREAD_NAP)) {
0326         atomic_lock_thread_idle();
0327 
0328         BUG_ON(!(*state & thread));
0329         *state &= ~thread;
0330 
0331         if (power7_fastsleep_workaround_entry) {
0332             if ((*state & core_thread_mask) == 0) {
0333                 rc = opal_config_cpu_idle_state(
0334                         OPAL_CONFIG_IDLE_FASTSLEEP,
0335                         OPAL_CONFIG_IDLE_APPLY);
0336                 BUG_ON(rc);
0337             }
0338         }
0339 
0340         if (type == PNV_THREAD_WINKLE) {
0341             sprs.tscr   = mfspr(SPRN_TSCR);
0342             sprs.worc   = mfspr(SPRN_WORC);
0343 
0344             sprs.sdr1   = mfspr(SPRN_SDR1);
0345             sprs.rpr    = mfspr(SPRN_RPR);
0346 
0347             sprs.lpcr   = mfspr(SPRN_LPCR);
0348             if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
0349                 sprs.hfscr  = mfspr(SPRN_HFSCR);
0350                 sprs.fscr   = mfspr(SPRN_FSCR);
0351             }
0352             sprs.purr   = mfspr(SPRN_PURR);
0353             sprs.spurr  = mfspr(SPRN_SPURR);
0354             sprs.dscr   = mfspr(SPRN_DSCR);
0355             sprs.wort   = mfspr(SPRN_WORT);
0356 
0357             sprs_saved = true;
0358 
0359             /*
0360              * Increment winkle counter and set all winkle bits if
0361              * all threads are winkling. This allows wakeup side to
0362              * distinguish between fast sleep and winkle state
0363              * loss. Fast sleep still has to resync the timebase so
0364              * this may not be a really big win.
0365              */
0366             *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
0367             if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
0368                     >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
0369                     == threads_per_core)
0370                 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
0371             WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
0372         }
0373 
0374         atomic_unlock_thread_idle();
0375     }
0376 
0377     if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
0378         sprs.amr    = mfspr(SPRN_AMR);
0379         sprs.iamr   = mfspr(SPRN_IAMR);
0380         sprs.uamor  = mfspr(SPRN_UAMOR);
0381     }
0382 
0383     local_paca->thread_idle_state = type;
0384     srr1 = isa206_idle_insn_mayloss(type);      /* go idle */
0385     local_paca->thread_idle_state = PNV_THREAD_RUNNING;
0386 
0387     WARN_ON_ONCE(!srr1);
0388     WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
0389 
0390     if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
0391         if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
0392             /*
0393              * We don't need an isync after the mtsprs here because
0394              * the upcoming mtmsrd is execution synchronizing.
0395              */
0396             mtspr(SPRN_AMR,     sprs.amr);
0397             mtspr(SPRN_IAMR,    sprs.iamr);
0398             mtspr(SPRN_AMOR,    ~0);
0399             mtspr(SPRN_UAMOR,   sprs.uamor);
0400         }
0401     }
0402 
0403     if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
0404         hmi_exception_realmode(NULL);
0405 
0406     if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
0407         if (unlikely(type != PNV_THREAD_NAP)) {
0408             atomic_lock_thread_idle();
0409             if (type == PNV_THREAD_WINKLE) {
0410                 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
0411                 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
0412                 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
0413             }
0414             atomic_unlock_and_stop_thread_idle();
0415         }
0416         return srr1;
0417     }
0418 
0419     /* HV state loss */
0420     BUG_ON(type == PNV_THREAD_NAP);
0421 
0422     atomic_lock_thread_idle();
0423 
0424     full_winkle = false;
0425     if (type == PNV_THREAD_WINKLE) {
0426         WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
0427         *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
0428         if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
0429             *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
0430             full_winkle = true;
0431             BUG_ON(!sprs_saved);
0432         }
0433     }
0434 
0435     WARN_ON(*state & thread);
0436 
0437     if ((*state & core_thread_mask) != 0)
0438         goto core_woken;
0439 
0440     /* Per-core SPRs */
0441     if (full_winkle) {
0442         mtspr(SPRN_TSCR,    sprs.tscr);
0443         mtspr(SPRN_WORC,    sprs.worc);
0444     }
0445 
0446     if (power7_fastsleep_workaround_exit) {
0447         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
0448                         OPAL_CONFIG_IDLE_UNDO);
0449         BUG_ON(rc);
0450     }
0451 
0452     /* TB */
0453     if (opal_resync_timebase() != OPAL_SUCCESS)
0454         BUG();
0455 
0456 core_woken:
0457     if (!full_winkle)
0458         goto subcore_woken;
0459 
0460     if ((*state & local_paca->subcore_sibling_mask) != 0)
0461         goto subcore_woken;
0462 
0463     /* Per-subcore SPRs */
0464     mtspr(SPRN_SDR1,    sprs.sdr1);
0465     mtspr(SPRN_RPR,     sprs.rpr);
0466 
0467 subcore_woken:
0468     /*
0469      * isync after restoring shared SPRs and before unlocking. Unlock
0470      * only contains hwsync which does not necessarily do the right
0471      * thing for SPRs.
0472      */
0473     isync();
0474     atomic_unlock_and_stop_thread_idle();
0475 
0476     /* Fast sleep does not lose SPRs */
0477     if (!full_winkle)
0478         return srr1;
0479 
0480     /* Per-thread SPRs */
0481     mtspr(SPRN_LPCR,    sprs.lpcr);
0482     if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
0483         mtspr(SPRN_HFSCR,   sprs.hfscr);
0484         mtspr(SPRN_FSCR,    sprs.fscr);
0485     }
0486     mtspr(SPRN_PURR,    sprs.purr);
0487     mtspr(SPRN_SPURR,   sprs.spurr);
0488     mtspr(SPRN_DSCR,    sprs.dscr);
0489     mtspr(SPRN_WORT,    sprs.wort);
0490 
0491     mtspr(SPRN_SPRG3,   local_paca->sprg_vdso);
0492 
0493 #ifdef CONFIG_PPC_64S_HASH_MMU
0494     /*
0495      * The SLB has to be restored here, but it sometimes still
0496      * contains entries, so the __ variant must be used to prevent
0497      * multi hits.
0498      */
0499     __slb_restore_bolted_realmode();
0500 #endif
0501 
0502     return srr1;
0503 }
0504 
0505 extern unsigned long idle_kvm_start_guest(unsigned long srr1);
0506 
0507 #ifdef CONFIG_HOTPLUG_CPU
0508 static unsigned long power7_offline(void)
0509 {
0510     unsigned long srr1;
0511 
0512     mtmsr(MSR_IDLE);
0513 
0514 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
0515     /* Tell KVM we're entering idle. */
0516     /******************************************************/
0517     /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
0518     /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
0519     /* MUST occur in real mode, i.e. with the MMU off,    */
0520     /* and the MMU must stay off until we clear this flag */
0521     /* and test HSTATE_HWTHREAD_REQ(r13) in               */
0522     /* pnv_powersave_wakeup in this file.                 */
0523     /* The reason is that another thread can switch the   */
0524     /* MMU to a guest context whenever this flag is set   */
0525     /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
0526     /* that would potentially cause this thread to start  */
0527     /* executing instructions from guest memory in        */
0528     /* hypervisor mode, leading to a host crash or data   */
0529     /* corruption, or worse.                              */
0530     /******************************************************/
0531     local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
0532 #endif
0533 
0534     __ppc64_runlatch_off();
0535     srr1 = power7_idle_insn(power7_offline_type);
0536     __ppc64_runlatch_on();
0537 
0538 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
0539     local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
0540     /* Order setting hwthread_state vs. testing hwthread_req */
0541     smp_mb();
0542     if (local_paca->kvm_hstate.hwthread_req)
0543         srr1 = idle_kvm_start_guest(srr1);
0544 #endif
0545 
0546     mtmsr(MSR_KERNEL);
0547 
0548     return srr1;
0549 }
0550 #endif
0551 
0552 void power7_idle_type(unsigned long type)
0553 {
0554     unsigned long srr1;
0555 
0556     if (!prep_irq_for_idle_irqsoff())
0557         return;
0558 
0559     mtmsr(MSR_IDLE);
0560     __ppc64_runlatch_off();
0561     srr1 = power7_idle_insn(type);
0562     __ppc64_runlatch_on();
0563     mtmsr(MSR_KERNEL);
0564 
0565     fini_irq_for_idle_irqsoff();
0566     irq_set_pending_from_srr1(srr1);
0567 }
0568 
0569 static void power7_idle(void)
0570 {
0571     if (!powersave_nap)
0572         return;
0573 
0574     power7_idle_type(PNV_THREAD_NAP);
0575 }
0576 
0577 struct p9_sprs {
0578     /* per core */
0579     u64 ptcr;
0580     u64 rpr;
0581     u64 tscr;
0582     u64 ldbar;
0583 
0584     /* per thread */
0585     u64 lpcr;
0586     u64 hfscr;
0587     u64 fscr;
0588     u64 pid;
0589     u64 purr;
0590     u64 spurr;
0591     u64 dscr;
0592     u64 ciabr;
0593 
0594     u64 mmcra;
0595     u32 mmcr0;
0596     u32 mmcr1;
0597     u64 mmcr2;
0598 
0599     /* per thread SPRs that get lost in shallow states */
0600     u64 amr;
0601     u64 iamr;
0602     u64 amor;
0603     u64 uamor;
0604 };
0605 
0606 static unsigned long power9_idle_stop(unsigned long psscr)
0607 {
0608     int cpu = raw_smp_processor_id();
0609     int first = cpu_first_thread_sibling(cpu);
0610     unsigned long *state = &paca_ptrs[first]->idle_state;
0611     unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
0612     unsigned long srr1;
0613     unsigned long pls;
0614     unsigned long mmcr0 = 0;
0615     unsigned long mmcra = 0;
0616     struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
0617     bool sprs_saved = false;
0618 
0619     if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
0620         /* EC=ESL=0 case */
0621 
0622         /*
0623          * Wake synchronously. SRESET via xscom may still cause
0624          * a 0x100 powersave wakeup with SRR1 reason!
0625          */
0626         srr1 = isa300_idle_stop_noloss(psscr);      /* go idle */
0627         if (likely(!srr1))
0628             return 0;
0629 
0630         /*
0631          * Registers not saved, can't recover!
0632          * This would be a hardware bug
0633          */
0634         BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
0635 
0636         goto out;
0637     }
0638 
0639     /* EC=ESL=1 case */
0640 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
0641     if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
0642         local_paca->requested_psscr = psscr;
0643         /* order setting requested_psscr vs testing dont_stop */
0644         smp_mb();
0645         if (atomic_read(&local_paca->dont_stop)) {
0646             local_paca->requested_psscr = 0;
0647             return 0;
0648         }
0649     }
0650 #endif
0651 
0652     if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
0653          /*
0654           * POWER9 DD2 can incorrectly set PMAO when waking up
0655           * after a state-loss idle. Saving and restoring MMCR0
0656           * over idle is a workaround.
0657           */
0658         mmcr0       = mfspr(SPRN_MMCR0);
0659     }
0660 
0661     if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
0662         sprs.lpcr   = mfspr(SPRN_LPCR);
0663         sprs.hfscr  = mfspr(SPRN_HFSCR);
0664         sprs.fscr   = mfspr(SPRN_FSCR);
0665         sprs.pid    = mfspr(SPRN_PID);
0666         sprs.purr   = mfspr(SPRN_PURR);
0667         sprs.spurr  = mfspr(SPRN_SPURR);
0668         sprs.dscr   = mfspr(SPRN_DSCR);
0669         sprs.ciabr  = mfspr(SPRN_CIABR);
0670 
0671         sprs.mmcra  = mfspr(SPRN_MMCRA);
0672         sprs.mmcr0  = mfspr(SPRN_MMCR0);
0673         sprs.mmcr1  = mfspr(SPRN_MMCR1);
0674         sprs.mmcr2  = mfspr(SPRN_MMCR2);
0675 
0676         sprs.ptcr   = mfspr(SPRN_PTCR);
0677         sprs.rpr    = mfspr(SPRN_RPR);
0678         sprs.tscr   = mfspr(SPRN_TSCR);
0679         if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
0680             sprs.ldbar = mfspr(SPRN_LDBAR);
0681 
0682         sprs_saved = true;
0683 
0684         atomic_start_thread_idle();
0685     }
0686 
0687     sprs.amr    = mfspr(SPRN_AMR);
0688     sprs.iamr   = mfspr(SPRN_IAMR);
0689     sprs.uamor  = mfspr(SPRN_UAMOR);
0690 
0691     srr1 = isa300_idle_stop_mayloss(psscr);     /* go idle */
0692 
0693 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
0694     local_paca->requested_psscr = 0;
0695 #endif
0696 
0697     psscr = mfspr(SPRN_PSSCR);
0698 
0699     WARN_ON_ONCE(!srr1);
0700     WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
0701 
0702     if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
0703         /*
0704          * We don't need an isync after the mtsprs here because the
0705          * upcoming mtmsrd is execution synchronizing.
0706          */
0707         mtspr(SPRN_AMR,     sprs.amr);
0708         mtspr(SPRN_IAMR,    sprs.iamr);
0709         mtspr(SPRN_AMOR,    ~0);
0710         mtspr(SPRN_UAMOR,   sprs.uamor);
0711 
0712         /*
0713          * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
0714          * might have been corrupted and needs flushing. We also need
0715          * to reload MMCR0 (see mmcr0 comment above).
0716          */
0717         if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
0718             asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
0719             mtspr(SPRN_MMCR0, mmcr0);
0720         }
0721 
0722         /*
0723          * DD2.2 and earlier need to set then clear bit 60 in MMCRA
0724          * to ensure the PMU starts running.
0725          */
0726         mmcra = mfspr(SPRN_MMCRA);
0727         mmcra |= PPC_BIT(60);
0728         mtspr(SPRN_MMCRA, mmcra);
0729         mmcra &= ~PPC_BIT(60);
0730         mtspr(SPRN_MMCRA, mmcra);
0731     }
0732 
0733     if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
0734         hmi_exception_realmode(NULL);
0735 
0736     /*
0737      * On POWER9, SRR1 bits do not match exactly as expected.
0738      * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
0739      * just always test PSSCR for SPR/TB state loss.
0740      */
0741     pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
0742     if (likely(pls < deep_spr_loss_state)) {
0743         if (sprs_saved)
0744             atomic_stop_thread_idle();
0745         goto out;
0746     }
0747 
0748     /* HV state loss */
0749     BUG_ON(!sprs_saved);
0750 
0751     atomic_lock_thread_idle();
0752 
0753     if ((*state & core_thread_mask) != 0)
0754         goto core_woken;
0755 
0756     /* Per-core SPRs */
0757     mtspr(SPRN_PTCR,    sprs.ptcr);
0758     mtspr(SPRN_RPR,     sprs.rpr);
0759     mtspr(SPRN_TSCR,    sprs.tscr);
0760 
0761     if (pls >= pnv_first_tb_loss_level) {
0762         /* TB loss */
0763         if (opal_resync_timebase() != OPAL_SUCCESS)
0764             BUG();
0765     }
0766 
0767     /*
0768      * isync after restoring shared SPRs and before unlocking. Unlock
0769      * only contains hwsync which does not necessarily do the right
0770      * thing for SPRs.
0771      */
0772     isync();
0773 
0774 core_woken:
0775     atomic_unlock_and_stop_thread_idle();
0776 
0777     /* Per-thread SPRs */
0778     mtspr(SPRN_LPCR,    sprs.lpcr);
0779     mtspr(SPRN_HFSCR,   sprs.hfscr);
0780     mtspr(SPRN_FSCR,    sprs.fscr);
0781     mtspr(SPRN_PID,     sprs.pid);
0782     mtspr(SPRN_PURR,    sprs.purr);
0783     mtspr(SPRN_SPURR,   sprs.spurr);
0784     mtspr(SPRN_DSCR,    sprs.dscr);
0785     mtspr(SPRN_CIABR,   sprs.ciabr);
0786 
0787     mtspr(SPRN_MMCRA,   sprs.mmcra);
0788     mtspr(SPRN_MMCR0,   sprs.mmcr0);
0789     mtspr(SPRN_MMCR1,   sprs.mmcr1);
0790     mtspr(SPRN_MMCR2,   sprs.mmcr2);
0791     if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
0792         mtspr(SPRN_LDBAR, sprs.ldbar);
0793 
0794     mtspr(SPRN_SPRG3,   local_paca->sprg_vdso);
0795 
0796     if (!radix_enabled())
0797         __slb_restore_bolted_realmode();
0798 
0799 out:
0800     mtmsr(MSR_KERNEL);
0801 
0802     return srr1;
0803 }
0804 
0805 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
0806 /*
0807  * This is used in working around bugs in thread reconfiguration
0808  * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
0809  * memory and the way that XER[SO] is checkpointed.
0810  * This function forces the core into SMT4 in order by asking
0811  * all other threads not to stop, and sending a message to any
0812  * that are in a stop state.
0813  * Must be called with preemption disabled.
0814  */
0815 void pnv_power9_force_smt4_catch(void)
0816 {
0817     int cpu, cpu0, thr;
0818     int awake_threads = 1;      /* this thread is awake */
0819     int poke_threads = 0;
0820     int need_awake = threads_per_core;
0821 
0822     cpu = smp_processor_id();
0823     cpu0 = cpu & ~(threads_per_core - 1);
0824     for (thr = 0; thr < threads_per_core; ++thr) {
0825         if (cpu != cpu0 + thr)
0826             atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
0827     }
0828     /* order setting dont_stop vs testing requested_psscr */
0829     smp_mb();
0830     for (thr = 0; thr < threads_per_core; ++thr) {
0831         if (!paca_ptrs[cpu0+thr]->requested_psscr)
0832             ++awake_threads;
0833         else
0834             poke_threads |= (1 << thr);
0835     }
0836 
0837     /* If at least 3 threads are awake, the core is in SMT4 already */
0838     if (awake_threads < need_awake) {
0839         /* We have to wake some threads; we'll use msgsnd */
0840         for (thr = 0; thr < threads_per_core; ++thr) {
0841             if (poke_threads & (1 << thr)) {
0842                 ppc_msgsnd_sync();
0843                 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
0844                        paca_ptrs[cpu0+thr]->hw_cpu_id);
0845             }
0846         }
0847         /* now spin until at least 3 threads are awake */
0848         do {
0849             for (thr = 0; thr < threads_per_core; ++thr) {
0850                 if ((poke_threads & (1 << thr)) &&
0851                     !paca_ptrs[cpu0+thr]->requested_psscr) {
0852                     ++awake_threads;
0853                     poke_threads &= ~(1 << thr);
0854                 }
0855             }
0856         } while (awake_threads < need_awake);
0857     }
0858 }
0859 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
0860 
0861 void pnv_power9_force_smt4_release(void)
0862 {
0863     int cpu, cpu0, thr;
0864 
0865     cpu = smp_processor_id();
0866     cpu0 = cpu & ~(threads_per_core - 1);
0867 
0868     /* clear all the dont_stop flags */
0869     for (thr = 0; thr < threads_per_core; ++thr) {
0870         if (cpu != cpu0 + thr)
0871             atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
0872     }
0873 }
0874 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
0875 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
0876 
0877 struct p10_sprs {
0878     /*
0879      * SPRs that get lost in shallow states:
0880      *
0881      * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
0882      * isa300 idle routines restore CR, LR.
0883      * CTR is volatile
0884      * idle thread doesn't use FP or VEC
0885      * kernel doesn't use TAR
0886      * HSPRG1 is only live in HV interrupt entry
0887      * SPRG2 is only live in KVM guests, KVM handles it.
0888      */
0889 };
0890 
0891 static unsigned long power10_idle_stop(unsigned long psscr)
0892 {
0893     int cpu = raw_smp_processor_id();
0894     int first = cpu_first_thread_sibling(cpu);
0895     unsigned long *state = &paca_ptrs[first]->idle_state;
0896     unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
0897     unsigned long srr1;
0898     unsigned long pls;
0899 //  struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
0900     bool sprs_saved = false;
0901 
0902     if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
0903         /* EC=ESL=0 case */
0904 
0905         /*
0906          * Wake synchronously. SRESET via xscom may still cause
0907          * a 0x100 powersave wakeup with SRR1 reason!
0908          */
0909         srr1 = isa300_idle_stop_noloss(psscr);      /* go idle */
0910         if (likely(!srr1))
0911             return 0;
0912 
0913         /*
0914          * Registers not saved, can't recover!
0915          * This would be a hardware bug
0916          */
0917         BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
0918 
0919         goto out;
0920     }
0921 
0922     /* EC=ESL=1 case */
0923     if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
0924         /* XXX: save SPRs for deep state loss here. */
0925 
0926         sprs_saved = true;
0927 
0928         atomic_start_thread_idle();
0929     }
0930 
0931     srr1 = isa300_idle_stop_mayloss(psscr);     /* go idle */
0932 
0933     psscr = mfspr(SPRN_PSSCR);
0934 
0935     WARN_ON_ONCE(!srr1);
0936     WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
0937 
0938     if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
0939         hmi_exception_realmode(NULL);
0940 
0941     /*
0942      * On POWER10, SRR1 bits do not match exactly as expected.
0943      * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
0944      * just always test PSSCR for SPR/TB state loss.
0945      */
0946     pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
0947     if (likely(pls < deep_spr_loss_state)) {
0948         if (sprs_saved)
0949             atomic_stop_thread_idle();
0950         goto out;
0951     }
0952 
0953     /* HV state loss */
0954     BUG_ON(!sprs_saved);
0955 
0956     atomic_lock_thread_idle();
0957 
0958     if ((*state & core_thread_mask) != 0)
0959         goto core_woken;
0960 
0961     /* XXX: restore per-core SPRs here */
0962 
0963     if (pls >= pnv_first_tb_loss_level) {
0964         /* TB loss */
0965         if (opal_resync_timebase() != OPAL_SUCCESS)
0966             BUG();
0967     }
0968 
0969     /*
0970      * isync after restoring shared SPRs and before unlocking. Unlock
0971      * only contains hwsync which does not necessarily do the right
0972      * thing for SPRs.
0973      */
0974     isync();
0975 
0976 core_woken:
0977     atomic_unlock_and_stop_thread_idle();
0978 
0979     /* XXX: restore per-thread SPRs here */
0980 
0981     if (!radix_enabled())
0982         __slb_restore_bolted_realmode();
0983 
0984 out:
0985     mtmsr(MSR_KERNEL);
0986 
0987     return srr1;
0988 }
0989 
0990 #ifdef CONFIG_HOTPLUG_CPU
0991 static unsigned long arch300_offline_stop(unsigned long psscr)
0992 {
0993     unsigned long srr1;
0994 
0995     if (cpu_has_feature(CPU_FTR_ARCH_31))
0996         srr1 = power10_idle_stop(psscr);
0997     else
0998         srr1 = power9_idle_stop(psscr);
0999 
1000     return srr1;
1001 }
1002 #endif
1003 
1004 void arch300_idle_type(unsigned long stop_psscr_val,
1005                       unsigned long stop_psscr_mask)
1006 {
1007     unsigned long psscr;
1008     unsigned long srr1;
1009 
1010     if (!prep_irq_for_idle_irqsoff())
1011         return;
1012 
1013     psscr = mfspr(SPRN_PSSCR);
1014     psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
1015 
1016     __ppc64_runlatch_off();
1017     if (cpu_has_feature(CPU_FTR_ARCH_31))
1018         srr1 = power10_idle_stop(psscr);
1019     else
1020         srr1 = power9_idle_stop(psscr);
1021     __ppc64_runlatch_on();
1022 
1023     fini_irq_for_idle_irqsoff();
1024 
1025     irq_set_pending_from_srr1(srr1);
1026 }
1027 
1028 /*
1029  * Used for ppc_md.power_save which needs a function with no parameters
1030  */
1031 static void arch300_idle(void)
1032 {
1033     arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
1034 }
1035 
1036 #ifdef CONFIG_HOTPLUG_CPU
1037 
1038 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
1039 {
1040     u64 pir = get_hard_smp_processor_id(cpu);
1041 
1042     mtspr(SPRN_LPCR, lpcr_val);
1043 
1044     /*
1045      * Program the LPCR via stop-api only if the deepest stop state
1046      * can lose hypervisor context.
1047      */
1048     if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
1049         opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
1050 }
1051 
1052 /*
1053  * pnv_cpu_offline: A function that puts the CPU into the deepest
1054  * available platform idle state on a CPU-Offline.
1055  * interrupts hard disabled and no lazy irq pending.
1056  */
1057 unsigned long pnv_cpu_offline(unsigned int cpu)
1058 {
1059     unsigned long srr1;
1060 
1061     __ppc64_runlatch_off();
1062 
1063     if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
1064         unsigned long psscr;
1065 
1066         psscr = mfspr(SPRN_PSSCR);
1067         psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
1068                         pnv_deepest_stop_psscr_val;
1069         srr1 = arch300_offline_stop(psscr);
1070     } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
1071         srr1 = power7_offline();
1072     } else {
1073         /* This is the fallback method. We emulate snooze */
1074         while (!generic_check_cpu_restart(cpu)) {
1075             HMT_low();
1076             HMT_very_low();
1077         }
1078         srr1 = 0;
1079         HMT_medium();
1080     }
1081 
1082     __ppc64_runlatch_on();
1083 
1084     return srr1;
1085 }
1086 #endif
1087 
1088 /*
1089  * Power ISA 3.0 idle initialization.
1090  *
1091  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
1092  * Register (PSSCR) to control idle behavior.
1093  *
1094  * PSSCR layout:
1095  * ----------------------------------------------------------
1096  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
1097  * ----------------------------------------------------------
1098  * 0      4     41   42    43   44     48    54   56    60
1099  *
1100  * PSSCR key fields:
1101  *  Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
1102  *  lowest power-saving state the thread entered since stop instruction was
1103  *  last executed.
1104  *
1105  *  Bit 41 - Status Disable(SD)
1106  *  0 - Shows PLS entries
1107  *  1 - PLS entries are all 0
1108  *
1109  *  Bit 42 - Enable State Loss
1110  *  0 - No state is lost irrespective of other fields
1111  *  1 - Allows state loss
1112  *
1113  *  Bit 43 - Exit Criterion
1114  *  0 - Exit from power-save mode on any interrupt
1115  *  1 - Exit from power-save mode controlled by LPCR's PECE bits
1116  *
1117  *  Bits 44:47 - Power-Saving Level Limit
1118  *  This limits the power-saving level that can be entered into.
1119  *
1120  *  Bits 60:63 - Requested Level
1121  *  Used to specify which power-saving level must be entered on executing
1122  *  stop instruction
1123  */
1124 
1125 int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
1126 {
1127     int err = 0;
1128 
1129     /*
1130      * psscr_mask == 0xf indicates an older firmware.
1131      * Set remaining fields of psscr to the default values.
1132      * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
1133      */
1134     if (*psscr_mask == 0xf) {
1135         *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
1136         *psscr_mask = PSSCR_HV_DEFAULT_MASK;
1137         return err;
1138     }
1139 
1140     /*
1141      * New firmware is expected to set the psscr_val bits correctly.
1142      * Validate that the following invariants are correctly maintained by
1143      * the new firmware.
1144      * - ESL bit value matches the EC bit value.
1145      * - ESL bit is set for all the deep stop states.
1146      */
1147     if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
1148         err = ERR_EC_ESL_MISMATCH;
1149     } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1150         GET_PSSCR_ESL(*psscr_val) == 0) {
1151         err = ERR_DEEP_STATE_ESL_MISMATCH;
1152     }
1153 
1154     return err;
1155 }
1156 
1157 /*
1158  * pnv_arch300_idle_init: Initializes the default idle state, first
1159  *                        deep idle state and deepest idle state on
1160  *                        ISA 3.0 CPUs.
1161  *
1162  * @np: /ibm,opal/power-mgt device node
1163  * @flags: cpu-idle-state-flags array
1164  * @dt_idle_states: Number of idle state entries
1165  * Returns 0 on success
1166  */
1167 static void __init pnv_arch300_idle_init(void)
1168 {
1169     u64 max_residency_ns = 0;
1170     int i;
1171 
1172     /* stop is not really architected, we only have p9,p10 drivers */
1173     if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
1174         return;
1175 
1176     /*
1177      * pnv_deepest_stop_{val,mask} should be set to values corresponding to
1178      * the deepest stop state.
1179      *
1180      * pnv_default_stop_{val,mask} should be set to values corresponding to
1181      * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
1182      */
1183     pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1184     deep_spr_loss_state = MAX_STOP_STATE + 1;
1185     for (i = 0; i < nr_pnv_idle_states; i++) {
1186         int err;
1187         struct pnv_idle_states_t *state = &pnv_idle_states[i];
1188         u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
1189 
1190         /* No deep loss driver implemented for POWER10 yet */
1191         if (pvr_version_is(PVR_POWER10) &&
1192                 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
1193             continue;
1194 
1195         if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1196              (pnv_first_tb_loss_level > psscr_rl))
1197             pnv_first_tb_loss_level = psscr_rl;
1198 
1199         if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1200              (deep_spr_loss_state > psscr_rl))
1201             deep_spr_loss_state = psscr_rl;
1202 
1203         /*
1204          * The idle code does not deal with TB loss occurring
1205          * in a shallower state than SPR loss, so force it to
1206          * behave like SPRs are lost if TB is lost. POWER9 would
1207          * never encounter this, but a POWER8 core would if it
1208          * implemented the stop instruction. So this is for forward
1209          * compatibility.
1210          */
1211         if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1212              (deep_spr_loss_state > psscr_rl))
1213             deep_spr_loss_state = psscr_rl;
1214 
1215         err = validate_psscr_val_mask(&state->psscr_val,
1216                           &state->psscr_mask,
1217                           state->flags);
1218         if (err) {
1219             report_invalid_psscr_val(state->psscr_val, err);
1220             continue;
1221         }
1222 
1223         state->valid = true;
1224 
1225         if (max_residency_ns < state->residency_ns) {
1226             max_residency_ns = state->residency_ns;
1227             pnv_deepest_stop_psscr_val = state->psscr_val;
1228             pnv_deepest_stop_psscr_mask = state->psscr_mask;
1229             pnv_deepest_stop_flag = state->flags;
1230             deepest_stop_found = true;
1231         }
1232 
1233         if (!default_stop_found &&
1234             (state->flags & OPAL_PM_STOP_INST_FAST)) {
1235             pnv_default_stop_val = state->psscr_val;
1236             pnv_default_stop_mask = state->psscr_mask;
1237             default_stop_found = true;
1238             WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
1239         }
1240     }
1241 
1242     if (unlikely(!default_stop_found)) {
1243         pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
1244     } else {
1245         ppc_md.power_save = arch300_idle;
1246         pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
1247             pnv_default_stop_val, pnv_default_stop_mask);
1248     }
1249 
1250     if (unlikely(!deepest_stop_found)) {
1251         pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
1252     } else {
1253         pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
1254             pnv_deepest_stop_psscr_val,
1255             pnv_deepest_stop_psscr_mask);
1256     }
1257 
1258     pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1259         deep_spr_loss_state);
1260 
1261     pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1262         pnv_first_tb_loss_level);
1263 }
1264 
1265 static void __init pnv_disable_deep_states(void)
1266 {
1267     /*
1268      * The stop-api is unable to restore hypervisor
1269      * resources on wakeup from platform idle states which
1270      * lose full context. So disable such states.
1271      */
1272     supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1273     pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1274     pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1275 
1276     if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1277         (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1278         /*
1279          * Use the default stop state for CPU-Hotplug
1280          * if available.
1281          */
1282         if (default_stop_found) {
1283             pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1284             pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1285             pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1286                 pnv_deepest_stop_psscr_val);
1287         } else { /* Fallback to snooze loop for CPU-Hotplug */
1288             deepest_stop_found = false;
1289             pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1290         }
1291     }
1292 }
1293 
1294 /*
1295  * Probe device tree for supported idle states
1296  */
1297 static void __init pnv_probe_idle_states(void)
1298 {
1299     int i;
1300 
1301     if (nr_pnv_idle_states < 0) {
1302         pr_warn("cpuidle-powernv: no idle states found in the DT\n");
1303         return;
1304     }
1305 
1306     if (cpu_has_feature(CPU_FTR_ARCH_300))
1307         pnv_arch300_idle_init();
1308 
1309     for (i = 0; i < nr_pnv_idle_states; i++)
1310         supported_cpuidle_states |= pnv_idle_states[i].flags;
1311 }
1312 
1313 /*
1314  * This function parses device-tree and populates all the information
1315  * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
1316  * which is the number of cpuidle states discovered through device-tree.
1317  */
1318 
1319 static int __init pnv_parse_cpuidle_dt(void)
1320 {
1321     struct device_node *np;
1322     int nr_idle_states, i;
1323     int rc = 0;
1324     u32 *temp_u32;
1325     u64 *temp_u64;
1326     const char **temp_string;
1327 
1328     np = of_find_node_by_path("/ibm,opal/power-mgt");
1329     if (!np) {
1330         pr_warn("opal: PowerMgmt Node not found\n");
1331         return -ENODEV;
1332     }
1333     nr_idle_states = of_property_count_u32_elems(np,
1334                         "ibm,cpu-idle-state-flags");
1335 
1336     pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
1337                   GFP_KERNEL);
1338     temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
1339     temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
1340     temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
1341 
1342     if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
1343         pr_err("Could not allocate memory for dt parsing\n");
1344         rc = -ENOMEM;
1345         goto out;
1346     }
1347 
1348     /* Read flags */
1349     if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
1350                        temp_u32, nr_idle_states)) {
1351         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
1352         rc = -EINVAL;
1353         goto out;
1354     }
1355     for (i = 0; i < nr_idle_states; i++)
1356         pnv_idle_states[i].flags = temp_u32[i];
1357 
1358     /* Read latencies */
1359     if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
1360                        temp_u32, nr_idle_states)) {
1361         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1362         rc = -EINVAL;
1363         goto out;
1364     }
1365     for (i = 0; i < nr_idle_states; i++)
1366         pnv_idle_states[i].latency_ns = temp_u32[i];
1367 
1368     /* Read residencies */
1369     if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
1370                        temp_u32, nr_idle_states)) {
1371         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
1372         rc = -EINVAL;
1373         goto out;
1374     }
1375     for (i = 0; i < nr_idle_states; i++)
1376         pnv_idle_states[i].residency_ns = temp_u32[i];
1377 
1378     /* For power9 and later */
1379     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1380         /* Read pm_crtl_val */
1381         if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
1382                            temp_u64, nr_idle_states)) {
1383             pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
1384             rc = -EINVAL;
1385             goto out;
1386         }
1387         for (i = 0; i < nr_idle_states; i++)
1388             pnv_idle_states[i].psscr_val = temp_u64[i];
1389 
1390         /* Read pm_crtl_mask */
1391         if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
1392                            temp_u64, nr_idle_states)) {
1393             pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
1394             rc = -EINVAL;
1395             goto out;
1396         }
1397         for (i = 0; i < nr_idle_states; i++)
1398             pnv_idle_states[i].psscr_mask = temp_u64[i];
1399     }
1400 
1401     /*
1402      * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
1403      * ibm,cpu-idle-state-pmicr-val were never used and there is no
1404      * plan to use it in near future. Hence, not parsing these properties
1405      */
1406 
1407     if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
1408                       temp_string, nr_idle_states) < 0) {
1409         pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
1410         rc = -EINVAL;
1411         goto out;
1412     }
1413     for (i = 0; i < nr_idle_states; i++)
1414         strlcpy(pnv_idle_states[i].name, temp_string[i],
1415             PNV_IDLE_NAME_LEN);
1416     nr_pnv_idle_states = nr_idle_states;
1417     rc = 0;
1418 out:
1419     kfree(temp_u32);
1420     kfree(temp_u64);
1421     kfree(temp_string);
1422     return rc;
1423 }
1424 
1425 static int __init pnv_init_idle_states(void)
1426 {
1427     int cpu;
1428     int rc = 0;
1429 
1430     /* Set up PACA fields */
1431     for_each_present_cpu(cpu) {
1432         struct paca_struct *p = paca_ptrs[cpu];
1433 
1434         p->idle_state = 0;
1435         if (cpu == cpu_first_thread_sibling(cpu))
1436             p->idle_state = (1 << threads_per_core) - 1;
1437 
1438         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1439             /* P7/P8 nap */
1440             p->thread_idle_state = PNV_THREAD_RUNNING;
1441         } else if (pvr_version_is(PVR_POWER9)) {
1442             /* P9 stop workarounds */
1443 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1444             p->requested_psscr = 0;
1445             atomic_set(&p->dont_stop, 0);
1446 #endif
1447         }
1448     }
1449 
1450     /* In case we error out nr_pnv_idle_states will be zero */
1451     nr_pnv_idle_states = 0;
1452     supported_cpuidle_states = 0;
1453 
1454     if (cpuidle_disable != IDLE_NO_OVERRIDE)
1455         goto out;
1456     rc = pnv_parse_cpuidle_dt();
1457     if (rc)
1458         return rc;
1459     pnv_probe_idle_states();
1460 
1461     if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1462         if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1463             power7_fastsleep_workaround_entry = false;
1464             power7_fastsleep_workaround_exit = false;
1465         } else {
1466             /*
1467              * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1468              * workaround is needed to use fastsleep. Provide sysfs
1469              * control to choose how this workaround has to be
1470              * applied.
1471              */
1472             device_create_file(cpu_subsys.dev_root,
1473                 &dev_attr_fastsleep_workaround_applyonce);
1474         }
1475 
1476         update_subcore_sibling_mask();
1477 
1478         if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1479             ppc_md.power_save = power7_idle;
1480             power7_offline_type = PNV_THREAD_NAP;
1481         }
1482 
1483         if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1484                (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1485             power7_offline_type = PNV_THREAD_WINKLE;
1486         else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1487                (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1488             power7_offline_type = PNV_THREAD_SLEEP;
1489     }
1490 
1491     if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1492         if (pnv_save_sprs_for_deep_states())
1493             pnv_disable_deep_states();
1494     }
1495 
1496 out:
1497     return 0;
1498 }
1499 machine_subsys_initcall(powernv, pnv_init_idle_states);