Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  cpuidle-powernv - idle state cpuidle driver.
0004  *  Adapted from drivers/cpuidle/cpuidle-pseries
0005  *
0006  */
0007 
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/init.h>
0011 #include <linux/moduleparam.h>
0012 #include <linux/cpuidle.h>
0013 #include <linux/cpu.h>
0014 #include <linux/notifier.h>
0015 #include <linux/clockchips.h>
0016 #include <linux/of.h>
0017 #include <linux/slab.h>
0018 
0019 #include <asm/machdep.h>
0020 #include <asm/firmware.h>
0021 #include <asm/opal.h>
0022 #include <asm/runlatch.h>
0023 #include <asm/cpuidle.h>
0024 
0025 /*
0026  * Expose only those Hardware idle states via the cpuidle framework
0027  * that have latency value below POWERNV_THRESHOLD_LATENCY_NS.
0028  */
0029 #define POWERNV_THRESHOLD_LATENCY_NS 200000
0030 
0031 static struct cpuidle_driver powernv_idle_driver = {
0032     .name             = "powernv_idle",
0033     .owner            = THIS_MODULE,
0034 };
0035 
0036 static int max_idle_state __read_mostly;
0037 static struct cpuidle_state *cpuidle_state_table __read_mostly;
0038 
0039 struct stop_psscr_table {
0040     u64 val;
0041     u64 mask;
0042 };
0043 
0044 static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
0045 
0046 static u64 default_snooze_timeout __read_mostly;
0047 static bool snooze_timeout_en __read_mostly;
0048 
0049 static u64 get_snooze_timeout(struct cpuidle_device *dev,
0050                   struct cpuidle_driver *drv,
0051                   int index)
0052 {
0053     int i;
0054 
0055     if (unlikely(!snooze_timeout_en))
0056         return default_snooze_timeout;
0057 
0058     for (i = index + 1; i < drv->state_count; i++) {
0059         if (dev->states_usage[i].disable)
0060             continue;
0061 
0062         return drv->states[i].target_residency * tb_ticks_per_usec;
0063     }
0064 
0065     return default_snooze_timeout;
0066 }
0067 
0068 static int snooze_loop(struct cpuidle_device *dev,
0069             struct cpuidle_driver *drv,
0070             int index)
0071 {
0072     u64 snooze_exit_time;
0073 
0074     set_thread_flag(TIF_POLLING_NRFLAG);
0075 
0076     local_irq_enable();
0077 
0078     snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
0079     ppc64_runlatch_off();
0080     HMT_very_low();
0081     while (!need_resched()) {
0082         if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
0083             /*
0084              * Task has not woken up but we are exiting the polling
0085              * loop anyway. Require a barrier after polling is
0086              * cleared to order subsequent test of need_resched().
0087              */
0088             clear_thread_flag(TIF_POLLING_NRFLAG);
0089             smp_mb();
0090             break;
0091         }
0092     }
0093 
0094     HMT_medium();
0095     ppc64_runlatch_on();
0096     clear_thread_flag(TIF_POLLING_NRFLAG);
0097 
0098     local_irq_disable();
0099 
0100     return index;
0101 }
0102 
0103 static int nap_loop(struct cpuidle_device *dev,
0104             struct cpuidle_driver *drv,
0105             int index)
0106 {
0107     power7_idle_type(PNV_THREAD_NAP);
0108 
0109     return index;
0110 }
0111 
0112 /* Register for fastsleep only in oneshot mode of broadcast */
0113 #ifdef CONFIG_TICK_ONESHOT
0114 static int fastsleep_loop(struct cpuidle_device *dev,
0115                 struct cpuidle_driver *drv,
0116                 int index)
0117 {
0118     unsigned long old_lpcr = mfspr(SPRN_LPCR);
0119     unsigned long new_lpcr;
0120 
0121     if (unlikely(system_state < SYSTEM_RUNNING))
0122         return index;
0123 
0124     new_lpcr = old_lpcr;
0125     /* Do not exit powersave upon decrementer as we've setup the timer
0126      * offload.
0127      */
0128     new_lpcr &= ~LPCR_PECE1;
0129 
0130     mtspr(SPRN_LPCR, new_lpcr);
0131 
0132     power7_idle_type(PNV_THREAD_SLEEP);
0133 
0134     mtspr(SPRN_LPCR, old_lpcr);
0135 
0136     return index;
0137 }
0138 #endif
0139 
0140 static int stop_loop(struct cpuidle_device *dev,
0141              struct cpuidle_driver *drv,
0142              int index)
0143 {
0144     arch300_idle_type(stop_psscr_table[index].val,
0145              stop_psscr_table[index].mask);
0146     return index;
0147 }
0148 
0149 /*
0150  * States for dedicated partition case.
0151  */
0152 static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
0153     { /* Snooze */
0154         .name = "snooze",
0155         .desc = "snooze",
0156         .exit_latency = 0,
0157         .target_residency = 0,
0158         .enter = snooze_loop },
0159 };
0160 
0161 static int powernv_cpuidle_cpu_online(unsigned int cpu)
0162 {
0163     struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
0164 
0165     if (dev && cpuidle_get_driver()) {
0166         cpuidle_pause_and_lock();
0167         cpuidle_enable_device(dev);
0168         cpuidle_resume_and_unlock();
0169     }
0170     return 0;
0171 }
0172 
0173 static int powernv_cpuidle_cpu_dead(unsigned int cpu)
0174 {
0175     struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
0176 
0177     if (dev && cpuidle_get_driver()) {
0178         cpuidle_pause_and_lock();
0179         cpuidle_disable_device(dev);
0180         cpuidle_resume_and_unlock();
0181     }
0182     return 0;
0183 }
0184 
0185 /*
0186  * powernv_cpuidle_driver_init()
0187  */
0188 static int powernv_cpuidle_driver_init(void)
0189 {
0190     int idle_state;
0191     struct cpuidle_driver *drv = &powernv_idle_driver;
0192 
0193     drv->state_count = 0;
0194 
0195     for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
0196         /* Is the state not enabled? */
0197         if (cpuidle_state_table[idle_state].enter == NULL)
0198             continue;
0199 
0200         drv->states[drv->state_count] = /* structure copy */
0201             cpuidle_state_table[idle_state];
0202 
0203         drv->state_count += 1;
0204     }
0205 
0206     /*
0207      * On the PowerNV platform cpu_present may be less than cpu_possible in
0208      * cases when firmware detects the CPU, but it is not available to the
0209      * OS.  If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
0210      * run time and hence cpu_devices are not created for those CPUs by the
0211      * generic topology_init().
0212      *
0213      * drv->cpumask defaults to cpu_possible_mask in
0214      * __cpuidle_driver_init().  This breaks cpuidle on PowerNV where
0215      * cpu_devices are not created for CPUs in cpu_possible_mask that
0216      * cannot be hot-added later at run time.
0217      *
0218      * Trying cpuidle_register_device() on a CPU without a cpu_device is
0219      * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
0220      */
0221 
0222     drv->cpumask = (struct cpumask *)cpu_present_mask;
0223 
0224     return 0;
0225 }
0226 
0227 static inline void add_powernv_state(int index, const char *name,
0228                      unsigned int flags,
0229                      int (*idle_fn)(struct cpuidle_device *,
0230                             struct cpuidle_driver *,
0231                             int),
0232                      unsigned int target_residency,
0233                      unsigned int exit_latency,
0234                      u64 psscr_val, u64 psscr_mask)
0235 {
0236     strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
0237     strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
0238     powernv_states[index].flags = flags;
0239     powernv_states[index].target_residency = target_residency;
0240     powernv_states[index].exit_latency = exit_latency;
0241     powernv_states[index].enter = idle_fn;
0242     /* For power8 and below psscr_* will be 0 */
0243     stop_psscr_table[index].val = psscr_val;
0244     stop_psscr_table[index].mask = psscr_mask;
0245 }
0246 
0247 extern u32 pnv_get_supported_cpuidle_states(void);
0248 static int powernv_add_idle_states(void)
0249 {
0250     int nr_idle_states = 1; /* Snooze */
0251     int dt_idle_states;
0252     u32 has_stop_states = 0;
0253     int i;
0254     u32 supported_flags = pnv_get_supported_cpuidle_states();
0255 
0256 
0257     /* Currently we have snooze statically defined */
0258     if (nr_pnv_idle_states <= 0) {
0259         pr_warn("cpuidle-powernv : Only Snooze is available\n");
0260         goto out;
0261     }
0262 
0263     /* TODO: Count only states which are eligible for cpuidle */
0264     dt_idle_states = nr_pnv_idle_states;
0265 
0266     /*
0267      * Since snooze is used as first idle state, max idle states allowed is
0268      * CPUIDLE_STATE_MAX -1
0269      */
0270     if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) {
0271         pr_warn("cpuidle-powernv: discovered idle states more than allowed");
0272         dt_idle_states = CPUIDLE_STATE_MAX - 1;
0273     }
0274 
0275     /*
0276      * If the idle states use stop instruction, probe for psscr values
0277      * and psscr mask which are necessary to specify required stop level.
0278      */
0279     has_stop_states = (pnv_idle_states[0].flags &
0280                (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
0281 
0282     for (i = 0; i < dt_idle_states; i++) {
0283         unsigned int exit_latency, target_residency;
0284         bool stops_timebase = false;
0285         struct pnv_idle_states_t *state = &pnv_idle_states[i];
0286 
0287         /*
0288          * Skip the platform idle state whose flag isn't in
0289          * the supported_cpuidle_states flag mask.
0290          */
0291         if ((state->flags & supported_flags) != state->flags)
0292             continue;
0293         /*
0294          * If an idle state has exit latency beyond
0295          * POWERNV_THRESHOLD_LATENCY_NS then don't use it
0296          * in cpu-idle.
0297          */
0298         if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS)
0299             continue;
0300         /*
0301          * Firmware passes residency and latency values in ns.
0302          * cpuidle expects it in us.
0303          */
0304         exit_latency = DIV_ROUND_UP(state->latency_ns, 1000);
0305         target_residency = DIV_ROUND_UP(state->residency_ns, 1000);
0306 
0307         if (has_stop_states && !(state->valid))
0308                 continue;
0309 
0310         if (state->flags & OPAL_PM_TIMEBASE_STOP)
0311             stops_timebase = true;
0312 
0313         if (state->flags & OPAL_PM_NAP_ENABLED) {
0314             /* Add NAP state */
0315             add_powernv_state(nr_idle_states, "Nap",
0316                       CPUIDLE_FLAG_NONE, nap_loop,
0317                       target_residency, exit_latency, 0, 0);
0318         } else if (has_stop_states && !stops_timebase) {
0319             add_powernv_state(nr_idle_states, state->name,
0320                       CPUIDLE_FLAG_NONE, stop_loop,
0321                       target_residency, exit_latency,
0322                       state->psscr_val,
0323                       state->psscr_mask);
0324         }
0325 
0326         /*
0327          * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
0328          * within this config dependency check.
0329          */
0330 #ifdef CONFIG_TICK_ONESHOT
0331         else if (state->flags & OPAL_PM_SLEEP_ENABLED ||
0332              state->flags & OPAL_PM_SLEEP_ENABLED_ER1) {
0333             /* Add FASTSLEEP state */
0334             add_powernv_state(nr_idle_states, "FastSleep",
0335                       CPUIDLE_FLAG_TIMER_STOP,
0336                       fastsleep_loop,
0337                       target_residency, exit_latency, 0, 0);
0338         } else if (has_stop_states && stops_timebase) {
0339             add_powernv_state(nr_idle_states, state->name,
0340                       CPUIDLE_FLAG_TIMER_STOP, stop_loop,
0341                       target_residency, exit_latency,
0342                       state->psscr_val,
0343                       state->psscr_mask);
0344         }
0345 #endif
0346         else
0347             continue;
0348         nr_idle_states++;
0349     }
0350 out:
0351     return nr_idle_states;
0352 }
0353 
0354 /*
0355  * powernv_idle_probe()
0356  * Choose state table for shared versus dedicated partition
0357  */
0358 static int powernv_idle_probe(void)
0359 {
0360     if (cpuidle_disable != IDLE_NO_OVERRIDE)
0361         return -ENODEV;
0362 
0363     if (firmware_has_feature(FW_FEATURE_OPAL)) {
0364         cpuidle_state_table = powernv_states;
0365         /* Device tree can indicate more idle states */
0366         max_idle_state = powernv_add_idle_states();
0367         default_snooze_timeout = TICK_USEC * tb_ticks_per_usec;
0368         if (max_idle_state > 1)
0369             snooze_timeout_en = true;
0370     } else
0371         return -ENODEV;
0372 
0373     return 0;
0374 }
0375 
0376 static int __init powernv_processor_idle_init(void)
0377 {
0378     int retval;
0379 
0380     retval = powernv_idle_probe();
0381     if (retval)
0382         return retval;
0383 
0384     powernv_cpuidle_driver_init();
0385     retval = cpuidle_register(&powernv_idle_driver, NULL);
0386     if (retval) {
0387         printk(KERN_DEBUG "Registration of powernv driver failed.\n");
0388         return retval;
0389     }
0390 
0391     retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0392                        "cpuidle/powernv:online",
0393                        powernv_cpuidle_cpu_online, NULL);
0394     WARN_ON(retval < 0);
0395     retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
0396                        "cpuidle/powernv:dead", NULL,
0397                        powernv_cpuidle_cpu_dead);
0398     WARN_ON(retval < 0);
0399     printk(KERN_DEBUG "powernv_idle_driver registered\n");
0400     return 0;
0401 }
0402 
0403 device_initcall(powernv_processor_idle_init);