Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * intel_idle.c - native hardware idle loop for modern Intel processors
0004  *
0005  * Copyright (c) 2013 - 2020, Intel Corporation.
0006  * Len Brown <len.brown@intel.com>
0007  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
0008  */
0009 
0010 /*
0011  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
0012  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
0013  * make Linux more efficient on these processors, as intel_idle knows
0014  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
0015  */
0016 
0017 /*
0018  * Design Assumptions
0019  *
0020  * All CPUs have same idle states as boot CPU
0021  *
0022  * Chipset BM_STS (bus master status) bit is a NOP
0023  *  for preventing entry into deep C-states
0024  *
0025  * CPU will flush caches as needed when entering a C-state via MWAIT
0026  *  (in contrast to entering ACPI C3, in which case the WBINVD
0027  *  instruction needs to be executed to flush the caches)
0028  */
0029 
0030 /*
0031  * Known limitations
0032  *
0033  * ACPI has a .suspend hack to turn off deep c-statees during suspend
0034  * to avoid complications with the lapic timer workaround.
0035  * Have not seen issues with suspend, but may need same workaround here.
0036  *
0037  */
0038 
0039 /* un-comment DEBUG to enable pr_debug() statements */
0040 /* #define DEBUG */
0041 
0042 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0043 
0044 #include <linux/acpi.h>
0045 #include <linux/kernel.h>
0046 #include <linux/cpuidle.h>
0047 #include <linux/tick.h>
0048 #include <trace/events/power.h>
0049 #include <linux/sched.h>
0050 #include <linux/sched/smt.h>
0051 #include <linux/notifier.h>
0052 #include <linux/cpu.h>
0053 #include <linux/moduleparam.h>
0054 #include <asm/cpu_device_id.h>
0055 #include <asm/intel-family.h>
0056 #include <asm/nospec-branch.h>
0057 #include <asm/mwait.h>
0058 #include <asm/msr.h>
0059 #include <asm/fpu/api.h>
0060 
0061 #define INTEL_IDLE_VERSION "0.5.1"
0062 
0063 static struct cpuidle_driver intel_idle_driver = {
0064     .name = "intel_idle",
0065     .owner = THIS_MODULE,
0066 };
0067 /* intel_idle.max_cstate=0 disables driver */
0068 static int max_cstate = CPUIDLE_STATE_MAX - 1;
0069 static unsigned int disabled_states_mask;
0070 static unsigned int preferred_states_mask;
0071 
0072 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
0073 
0074 static unsigned long auto_demotion_disable_flags;
0075 
0076 static enum {
0077     C1E_PROMOTION_PRESERVE,
0078     C1E_PROMOTION_ENABLE,
0079     C1E_PROMOTION_DISABLE
0080 } c1e_promotion = C1E_PROMOTION_PRESERVE;
0081 
0082 struct idle_cpu {
0083     struct cpuidle_state *state_table;
0084 
0085     /*
0086      * Hardware C-state auto-demotion may not always be optimal.
0087      * Indicate which enable bits to clear here.
0088      */
0089     unsigned long auto_demotion_disable_flags;
0090     bool byt_auto_demotion_disable_flag;
0091     bool disable_promotion_to_c1e;
0092     bool use_acpi;
0093 };
0094 
0095 static const struct idle_cpu *icpu __initdata;
0096 static struct cpuidle_state *cpuidle_state_table __initdata;
0097 
0098 static unsigned int mwait_substates __initdata;
0099 
0100 /*
0101  * Enable interrupts before entering the C-state. On some platforms and for
0102  * some C-states, this may measurably decrease interrupt latency.
0103  */
0104 #define CPUIDLE_FLAG_IRQ_ENABLE     BIT(14)
0105 
0106 /*
0107  * Enable this state by default even if the ACPI _CST does not list it.
0108  */
0109 #define CPUIDLE_FLAG_ALWAYS_ENABLE  BIT(15)
0110 
0111 /*
0112  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
0113  * above.
0114  */
0115 #define CPUIDLE_FLAG_IBRS       BIT(16)
0116 
0117 /*
0118  * Initialize large xstate for the C6-state entrance.
0119  */
0120 #define CPUIDLE_FLAG_INIT_XSTATE    BIT(17)
0121 
0122 /*
0123  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
0124  * the C-state (top nibble) and sub-state (bottom nibble)
0125  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
0126  *
0127  * We store the hint at the top of our "flags" for each state.
0128  */
0129 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
0130 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
0131 
0132 static __always_inline int __intel_idle(struct cpuidle_device *dev,
0133                     struct cpuidle_driver *drv, int index)
0134 {
0135     struct cpuidle_state *state = &drv->states[index];
0136     unsigned long eax = flg2MWAIT(state->flags);
0137     unsigned long ecx = 1; /* break on interrupt flag */
0138 
0139     mwait_idle_with_hints(eax, ecx);
0140 
0141     return index;
0142 }
0143 
0144 /**
0145  * intel_idle - Ask the processor to enter the given idle state.
0146  * @dev: cpuidle device of the target CPU.
0147  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
0148  * @index: Target idle state index.
0149  *
0150  * Use the MWAIT instruction to notify the processor that the CPU represented by
0151  * @dev is idle and it can try to enter the idle state corresponding to @index.
0152  *
0153  * If the local APIC timer is not known to be reliable in the target idle state,
0154  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
0155  *
0156  * Must be called under local_irq_disable().
0157  */
0158 static __cpuidle int intel_idle(struct cpuidle_device *dev,
0159                 struct cpuidle_driver *drv, int index)
0160 {
0161     return __intel_idle(dev, drv, index);
0162 }
0163 
0164 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
0165                     struct cpuidle_driver *drv, int index)
0166 {
0167     int ret;
0168 
0169     raw_local_irq_enable();
0170     ret = __intel_idle(dev, drv, index);
0171 
0172     /*
0173      * The lockdep hardirqs state may be changed to 'on' with timer
0174      * tick interrupt followed by __do_softirq(). Use local_irq_disable()
0175      * to keep the hardirqs state correct.
0176      */
0177     local_irq_disable();
0178 
0179     return ret;
0180 }
0181 
0182 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
0183                      struct cpuidle_driver *drv, int index)
0184 {
0185     bool smt_active = sched_smt_active();
0186     u64 spec_ctrl = spec_ctrl_current();
0187     int ret;
0188 
0189     if (smt_active)
0190         wrmsrl(MSR_IA32_SPEC_CTRL, 0);
0191 
0192     ret = __intel_idle(dev, drv, index);
0193 
0194     if (smt_active)
0195         wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
0196 
0197     return ret;
0198 }
0199 
0200 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
0201                        struct cpuidle_driver *drv, int index)
0202 {
0203     fpu_idle_fpregs();
0204     return __intel_idle(dev, drv, index);
0205 }
0206 
0207 /**
0208  * intel_idle_s2idle - Ask the processor to enter the given idle state.
0209  * @dev: cpuidle device of the target CPU.
0210  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
0211  * @index: Target idle state index.
0212  *
0213  * Use the MWAIT instruction to notify the processor that the CPU represented by
0214  * @dev is idle and it can try to enter the idle state corresponding to @index.
0215  *
0216  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
0217  * scheduler tick and suspended scheduler clock on the target CPU.
0218  */
0219 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
0220                        struct cpuidle_driver *drv, int index)
0221 {
0222     unsigned long ecx = 1; /* break on interrupt flag */
0223     struct cpuidle_state *state = &drv->states[index];
0224     unsigned long eax = flg2MWAIT(state->flags);
0225 
0226     if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
0227         fpu_idle_fpregs();
0228 
0229     mwait_idle_with_hints(eax, ecx);
0230 
0231     return 0;
0232 }
0233 
0234 /*
0235  * States are indexed by the cstate number,
0236  * which is also the index into the MWAIT hint array.
0237  * Thus C0 is a dummy.
0238  */
0239 static struct cpuidle_state nehalem_cstates[] __initdata = {
0240     {
0241         .name = "C1",
0242         .desc = "MWAIT 0x00",
0243         .flags = MWAIT2flg(0x00),
0244         .exit_latency = 3,
0245         .target_residency = 6,
0246         .enter = &intel_idle,
0247         .enter_s2idle = intel_idle_s2idle, },
0248     {
0249         .name = "C1E",
0250         .desc = "MWAIT 0x01",
0251         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0252         .exit_latency = 10,
0253         .target_residency = 20,
0254         .enter = &intel_idle,
0255         .enter_s2idle = intel_idle_s2idle, },
0256     {
0257         .name = "C3",
0258         .desc = "MWAIT 0x10",
0259         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0260         .exit_latency = 20,
0261         .target_residency = 80,
0262         .enter = &intel_idle,
0263         .enter_s2idle = intel_idle_s2idle, },
0264     {
0265         .name = "C6",
0266         .desc = "MWAIT 0x20",
0267         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0268         .exit_latency = 200,
0269         .target_residency = 800,
0270         .enter = &intel_idle,
0271         .enter_s2idle = intel_idle_s2idle, },
0272     {
0273         .enter = NULL }
0274 };
0275 
0276 static struct cpuidle_state snb_cstates[] __initdata = {
0277     {
0278         .name = "C1",
0279         .desc = "MWAIT 0x00",
0280         .flags = MWAIT2flg(0x00),
0281         .exit_latency = 2,
0282         .target_residency = 2,
0283         .enter = &intel_idle,
0284         .enter_s2idle = intel_idle_s2idle, },
0285     {
0286         .name = "C1E",
0287         .desc = "MWAIT 0x01",
0288         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0289         .exit_latency = 10,
0290         .target_residency = 20,
0291         .enter = &intel_idle,
0292         .enter_s2idle = intel_idle_s2idle, },
0293     {
0294         .name = "C3",
0295         .desc = "MWAIT 0x10",
0296         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0297         .exit_latency = 80,
0298         .target_residency = 211,
0299         .enter = &intel_idle,
0300         .enter_s2idle = intel_idle_s2idle, },
0301     {
0302         .name = "C6",
0303         .desc = "MWAIT 0x20",
0304         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0305         .exit_latency = 104,
0306         .target_residency = 345,
0307         .enter = &intel_idle,
0308         .enter_s2idle = intel_idle_s2idle, },
0309     {
0310         .name = "C7",
0311         .desc = "MWAIT 0x30",
0312         .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
0313         .exit_latency = 109,
0314         .target_residency = 345,
0315         .enter = &intel_idle,
0316         .enter_s2idle = intel_idle_s2idle, },
0317     {
0318         .enter = NULL }
0319 };
0320 
0321 static struct cpuidle_state byt_cstates[] __initdata = {
0322     {
0323         .name = "C1",
0324         .desc = "MWAIT 0x00",
0325         .flags = MWAIT2flg(0x00),
0326         .exit_latency = 1,
0327         .target_residency = 1,
0328         .enter = &intel_idle,
0329         .enter_s2idle = intel_idle_s2idle, },
0330     {
0331         .name = "C6N",
0332         .desc = "MWAIT 0x58",
0333         .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
0334         .exit_latency = 300,
0335         .target_residency = 275,
0336         .enter = &intel_idle,
0337         .enter_s2idle = intel_idle_s2idle, },
0338     {
0339         .name = "C6S",
0340         .desc = "MWAIT 0x52",
0341         .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
0342         .exit_latency = 500,
0343         .target_residency = 560,
0344         .enter = &intel_idle,
0345         .enter_s2idle = intel_idle_s2idle, },
0346     {
0347         .name = "C7",
0348         .desc = "MWAIT 0x60",
0349         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0350         .exit_latency = 1200,
0351         .target_residency = 4000,
0352         .enter = &intel_idle,
0353         .enter_s2idle = intel_idle_s2idle, },
0354     {
0355         .name = "C7S",
0356         .desc = "MWAIT 0x64",
0357         .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
0358         .exit_latency = 10000,
0359         .target_residency = 20000,
0360         .enter = &intel_idle,
0361         .enter_s2idle = intel_idle_s2idle, },
0362     {
0363         .enter = NULL }
0364 };
0365 
0366 static struct cpuidle_state cht_cstates[] __initdata = {
0367     {
0368         .name = "C1",
0369         .desc = "MWAIT 0x00",
0370         .flags = MWAIT2flg(0x00),
0371         .exit_latency = 1,
0372         .target_residency = 1,
0373         .enter = &intel_idle,
0374         .enter_s2idle = intel_idle_s2idle, },
0375     {
0376         .name = "C6N",
0377         .desc = "MWAIT 0x58",
0378         .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
0379         .exit_latency = 80,
0380         .target_residency = 275,
0381         .enter = &intel_idle,
0382         .enter_s2idle = intel_idle_s2idle, },
0383     {
0384         .name = "C6S",
0385         .desc = "MWAIT 0x52",
0386         .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
0387         .exit_latency = 200,
0388         .target_residency = 560,
0389         .enter = &intel_idle,
0390         .enter_s2idle = intel_idle_s2idle, },
0391     {
0392         .name = "C7",
0393         .desc = "MWAIT 0x60",
0394         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0395         .exit_latency = 1200,
0396         .target_residency = 4000,
0397         .enter = &intel_idle,
0398         .enter_s2idle = intel_idle_s2idle, },
0399     {
0400         .name = "C7S",
0401         .desc = "MWAIT 0x64",
0402         .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
0403         .exit_latency = 10000,
0404         .target_residency = 20000,
0405         .enter = &intel_idle,
0406         .enter_s2idle = intel_idle_s2idle, },
0407     {
0408         .enter = NULL }
0409 };
0410 
0411 static struct cpuidle_state ivb_cstates[] __initdata = {
0412     {
0413         .name = "C1",
0414         .desc = "MWAIT 0x00",
0415         .flags = MWAIT2flg(0x00),
0416         .exit_latency = 1,
0417         .target_residency = 1,
0418         .enter = &intel_idle,
0419         .enter_s2idle = intel_idle_s2idle, },
0420     {
0421         .name = "C1E",
0422         .desc = "MWAIT 0x01",
0423         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0424         .exit_latency = 10,
0425         .target_residency = 20,
0426         .enter = &intel_idle,
0427         .enter_s2idle = intel_idle_s2idle, },
0428     {
0429         .name = "C3",
0430         .desc = "MWAIT 0x10",
0431         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0432         .exit_latency = 59,
0433         .target_residency = 156,
0434         .enter = &intel_idle,
0435         .enter_s2idle = intel_idle_s2idle, },
0436     {
0437         .name = "C6",
0438         .desc = "MWAIT 0x20",
0439         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0440         .exit_latency = 80,
0441         .target_residency = 300,
0442         .enter = &intel_idle,
0443         .enter_s2idle = intel_idle_s2idle, },
0444     {
0445         .name = "C7",
0446         .desc = "MWAIT 0x30",
0447         .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
0448         .exit_latency = 87,
0449         .target_residency = 300,
0450         .enter = &intel_idle,
0451         .enter_s2idle = intel_idle_s2idle, },
0452     {
0453         .enter = NULL }
0454 };
0455 
0456 static struct cpuidle_state ivt_cstates[] __initdata = {
0457     {
0458         .name = "C1",
0459         .desc = "MWAIT 0x00",
0460         .flags = MWAIT2flg(0x00),
0461         .exit_latency = 1,
0462         .target_residency = 1,
0463         .enter = &intel_idle,
0464         .enter_s2idle = intel_idle_s2idle, },
0465     {
0466         .name = "C1E",
0467         .desc = "MWAIT 0x01",
0468         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0469         .exit_latency = 10,
0470         .target_residency = 80,
0471         .enter = &intel_idle,
0472         .enter_s2idle = intel_idle_s2idle, },
0473     {
0474         .name = "C3",
0475         .desc = "MWAIT 0x10",
0476         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0477         .exit_latency = 59,
0478         .target_residency = 156,
0479         .enter = &intel_idle,
0480         .enter_s2idle = intel_idle_s2idle, },
0481     {
0482         .name = "C6",
0483         .desc = "MWAIT 0x20",
0484         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0485         .exit_latency = 82,
0486         .target_residency = 300,
0487         .enter = &intel_idle,
0488         .enter_s2idle = intel_idle_s2idle, },
0489     {
0490         .enter = NULL }
0491 };
0492 
0493 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
0494     {
0495         .name = "C1",
0496         .desc = "MWAIT 0x00",
0497         .flags = MWAIT2flg(0x00),
0498         .exit_latency = 1,
0499         .target_residency = 1,
0500         .enter = &intel_idle,
0501         .enter_s2idle = intel_idle_s2idle, },
0502     {
0503         .name = "C1E",
0504         .desc = "MWAIT 0x01",
0505         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0506         .exit_latency = 10,
0507         .target_residency = 250,
0508         .enter = &intel_idle,
0509         .enter_s2idle = intel_idle_s2idle, },
0510     {
0511         .name = "C3",
0512         .desc = "MWAIT 0x10",
0513         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0514         .exit_latency = 59,
0515         .target_residency = 300,
0516         .enter = &intel_idle,
0517         .enter_s2idle = intel_idle_s2idle, },
0518     {
0519         .name = "C6",
0520         .desc = "MWAIT 0x20",
0521         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0522         .exit_latency = 84,
0523         .target_residency = 400,
0524         .enter = &intel_idle,
0525         .enter_s2idle = intel_idle_s2idle, },
0526     {
0527         .enter = NULL }
0528 };
0529 
0530 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
0531     {
0532         .name = "C1",
0533         .desc = "MWAIT 0x00",
0534         .flags = MWAIT2flg(0x00),
0535         .exit_latency = 1,
0536         .target_residency = 1,
0537         .enter = &intel_idle,
0538         .enter_s2idle = intel_idle_s2idle, },
0539     {
0540         .name = "C1E",
0541         .desc = "MWAIT 0x01",
0542         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0543         .exit_latency = 10,
0544         .target_residency = 500,
0545         .enter = &intel_idle,
0546         .enter_s2idle = intel_idle_s2idle, },
0547     {
0548         .name = "C3",
0549         .desc = "MWAIT 0x10",
0550         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0551         .exit_latency = 59,
0552         .target_residency = 600,
0553         .enter = &intel_idle,
0554         .enter_s2idle = intel_idle_s2idle, },
0555     {
0556         .name = "C6",
0557         .desc = "MWAIT 0x20",
0558         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0559         .exit_latency = 88,
0560         .target_residency = 700,
0561         .enter = &intel_idle,
0562         .enter_s2idle = intel_idle_s2idle, },
0563     {
0564         .enter = NULL }
0565 };
0566 
0567 static struct cpuidle_state hsw_cstates[] __initdata = {
0568     {
0569         .name = "C1",
0570         .desc = "MWAIT 0x00",
0571         .flags = MWAIT2flg(0x00),
0572         .exit_latency = 2,
0573         .target_residency = 2,
0574         .enter = &intel_idle,
0575         .enter_s2idle = intel_idle_s2idle, },
0576     {
0577         .name = "C1E",
0578         .desc = "MWAIT 0x01",
0579         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0580         .exit_latency = 10,
0581         .target_residency = 20,
0582         .enter = &intel_idle,
0583         .enter_s2idle = intel_idle_s2idle, },
0584     {
0585         .name = "C3",
0586         .desc = "MWAIT 0x10",
0587         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0588         .exit_latency = 33,
0589         .target_residency = 100,
0590         .enter = &intel_idle,
0591         .enter_s2idle = intel_idle_s2idle, },
0592     {
0593         .name = "C6",
0594         .desc = "MWAIT 0x20",
0595         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0596         .exit_latency = 133,
0597         .target_residency = 400,
0598         .enter = &intel_idle,
0599         .enter_s2idle = intel_idle_s2idle, },
0600     {
0601         .name = "C7s",
0602         .desc = "MWAIT 0x32",
0603         .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
0604         .exit_latency = 166,
0605         .target_residency = 500,
0606         .enter = &intel_idle,
0607         .enter_s2idle = intel_idle_s2idle, },
0608     {
0609         .name = "C8",
0610         .desc = "MWAIT 0x40",
0611         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
0612         .exit_latency = 300,
0613         .target_residency = 900,
0614         .enter = &intel_idle,
0615         .enter_s2idle = intel_idle_s2idle, },
0616     {
0617         .name = "C9",
0618         .desc = "MWAIT 0x50",
0619         .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
0620         .exit_latency = 600,
0621         .target_residency = 1800,
0622         .enter = &intel_idle,
0623         .enter_s2idle = intel_idle_s2idle, },
0624     {
0625         .name = "C10",
0626         .desc = "MWAIT 0x60",
0627         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0628         .exit_latency = 2600,
0629         .target_residency = 7700,
0630         .enter = &intel_idle,
0631         .enter_s2idle = intel_idle_s2idle, },
0632     {
0633         .enter = NULL }
0634 };
0635 static struct cpuidle_state bdw_cstates[] __initdata = {
0636     {
0637         .name = "C1",
0638         .desc = "MWAIT 0x00",
0639         .flags = MWAIT2flg(0x00),
0640         .exit_latency = 2,
0641         .target_residency = 2,
0642         .enter = &intel_idle,
0643         .enter_s2idle = intel_idle_s2idle, },
0644     {
0645         .name = "C1E",
0646         .desc = "MWAIT 0x01",
0647         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0648         .exit_latency = 10,
0649         .target_residency = 20,
0650         .enter = &intel_idle,
0651         .enter_s2idle = intel_idle_s2idle, },
0652     {
0653         .name = "C3",
0654         .desc = "MWAIT 0x10",
0655         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0656         .exit_latency = 40,
0657         .target_residency = 100,
0658         .enter = &intel_idle,
0659         .enter_s2idle = intel_idle_s2idle, },
0660     {
0661         .name = "C6",
0662         .desc = "MWAIT 0x20",
0663         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0664         .exit_latency = 133,
0665         .target_residency = 400,
0666         .enter = &intel_idle,
0667         .enter_s2idle = intel_idle_s2idle, },
0668     {
0669         .name = "C7s",
0670         .desc = "MWAIT 0x32",
0671         .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
0672         .exit_latency = 166,
0673         .target_residency = 500,
0674         .enter = &intel_idle,
0675         .enter_s2idle = intel_idle_s2idle, },
0676     {
0677         .name = "C8",
0678         .desc = "MWAIT 0x40",
0679         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
0680         .exit_latency = 300,
0681         .target_residency = 900,
0682         .enter = &intel_idle,
0683         .enter_s2idle = intel_idle_s2idle, },
0684     {
0685         .name = "C9",
0686         .desc = "MWAIT 0x50",
0687         .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
0688         .exit_latency = 600,
0689         .target_residency = 1800,
0690         .enter = &intel_idle,
0691         .enter_s2idle = intel_idle_s2idle, },
0692     {
0693         .name = "C10",
0694         .desc = "MWAIT 0x60",
0695         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0696         .exit_latency = 2600,
0697         .target_residency = 7700,
0698         .enter = &intel_idle,
0699         .enter_s2idle = intel_idle_s2idle, },
0700     {
0701         .enter = NULL }
0702 };
0703 
0704 static struct cpuidle_state skl_cstates[] __initdata = {
0705     {
0706         .name = "C1",
0707         .desc = "MWAIT 0x00",
0708         .flags = MWAIT2flg(0x00),
0709         .exit_latency = 2,
0710         .target_residency = 2,
0711         .enter = &intel_idle,
0712         .enter_s2idle = intel_idle_s2idle, },
0713     {
0714         .name = "C1E",
0715         .desc = "MWAIT 0x01",
0716         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0717         .exit_latency = 10,
0718         .target_residency = 20,
0719         .enter = &intel_idle,
0720         .enter_s2idle = intel_idle_s2idle, },
0721     {
0722         .name = "C3",
0723         .desc = "MWAIT 0x10",
0724         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
0725         .exit_latency = 70,
0726         .target_residency = 100,
0727         .enter = &intel_idle,
0728         .enter_s2idle = intel_idle_s2idle, },
0729     {
0730         .name = "C6",
0731         .desc = "MWAIT 0x20",
0732         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0733         .exit_latency = 85,
0734         .target_residency = 200,
0735         .enter = &intel_idle,
0736         .enter_s2idle = intel_idle_s2idle, },
0737     {
0738         .name = "C7s",
0739         .desc = "MWAIT 0x33",
0740         .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0741         .exit_latency = 124,
0742         .target_residency = 800,
0743         .enter = &intel_idle,
0744         .enter_s2idle = intel_idle_s2idle, },
0745     {
0746         .name = "C8",
0747         .desc = "MWAIT 0x40",
0748         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0749         .exit_latency = 200,
0750         .target_residency = 800,
0751         .enter = &intel_idle,
0752         .enter_s2idle = intel_idle_s2idle, },
0753     {
0754         .name = "C9",
0755         .desc = "MWAIT 0x50",
0756         .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0757         .exit_latency = 480,
0758         .target_residency = 5000,
0759         .enter = &intel_idle,
0760         .enter_s2idle = intel_idle_s2idle, },
0761     {
0762         .name = "C10",
0763         .desc = "MWAIT 0x60",
0764         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0765         .exit_latency = 890,
0766         .target_residency = 5000,
0767         .enter = &intel_idle,
0768         .enter_s2idle = intel_idle_s2idle, },
0769     {
0770         .enter = NULL }
0771 };
0772 
0773 static struct cpuidle_state skx_cstates[] __initdata = {
0774     {
0775         .name = "C1",
0776         .desc = "MWAIT 0x00",
0777         .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
0778         .exit_latency = 2,
0779         .target_residency = 2,
0780         .enter = &intel_idle,
0781         .enter_s2idle = intel_idle_s2idle, },
0782     {
0783         .name = "C1E",
0784         .desc = "MWAIT 0x01",
0785         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0786         .exit_latency = 10,
0787         .target_residency = 20,
0788         .enter = &intel_idle,
0789         .enter_s2idle = intel_idle_s2idle, },
0790     {
0791         .name = "C6",
0792         .desc = "MWAIT 0x20",
0793         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
0794         .exit_latency = 133,
0795         .target_residency = 600,
0796         .enter = &intel_idle,
0797         .enter_s2idle = intel_idle_s2idle, },
0798     {
0799         .enter = NULL }
0800 };
0801 
0802 static struct cpuidle_state icx_cstates[] __initdata = {
0803     {
0804         .name = "C1",
0805         .desc = "MWAIT 0x00",
0806         .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
0807         .exit_latency = 1,
0808         .target_residency = 1,
0809         .enter = &intel_idle,
0810         .enter_s2idle = intel_idle_s2idle, },
0811     {
0812         .name = "C1E",
0813         .desc = "MWAIT 0x01",
0814         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0815         .exit_latency = 4,
0816         .target_residency = 4,
0817         .enter = &intel_idle,
0818         .enter_s2idle = intel_idle_s2idle, },
0819     {
0820         .name = "C6",
0821         .desc = "MWAIT 0x20",
0822         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0823         .exit_latency = 170,
0824         .target_residency = 600,
0825         .enter = &intel_idle,
0826         .enter_s2idle = intel_idle_s2idle, },
0827     {
0828         .enter = NULL }
0829 };
0830 
0831 /*
0832  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
0833  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
0834  * But in this case there is effectively no C1, because C1 requests are
0835  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
0836  * and C1E requests end up with C1, so there is effectively no C1E.
0837  *
0838  * By default we enable C1E and disable C1 by marking it with
0839  * 'CPUIDLE_FLAG_UNUSABLE'.
0840  */
0841 static struct cpuidle_state adl_cstates[] __initdata = {
0842     {
0843         .name = "C1",
0844         .desc = "MWAIT 0x00",
0845         .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
0846         .exit_latency = 1,
0847         .target_residency = 1,
0848         .enter = &intel_idle,
0849         .enter_s2idle = intel_idle_s2idle, },
0850     {
0851         .name = "C1E",
0852         .desc = "MWAIT 0x01",
0853         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0854         .exit_latency = 2,
0855         .target_residency = 4,
0856         .enter = &intel_idle,
0857         .enter_s2idle = intel_idle_s2idle, },
0858     {
0859         .name = "C6",
0860         .desc = "MWAIT 0x20",
0861         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0862         .exit_latency = 220,
0863         .target_residency = 600,
0864         .enter = &intel_idle,
0865         .enter_s2idle = intel_idle_s2idle, },
0866     {
0867         .name = "C8",
0868         .desc = "MWAIT 0x40",
0869         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
0870         .exit_latency = 280,
0871         .target_residency = 800,
0872         .enter = &intel_idle,
0873         .enter_s2idle = intel_idle_s2idle, },
0874     {
0875         .name = "C10",
0876         .desc = "MWAIT 0x60",
0877         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0878         .exit_latency = 680,
0879         .target_residency = 2000,
0880         .enter = &intel_idle,
0881         .enter_s2idle = intel_idle_s2idle, },
0882     {
0883         .enter = NULL }
0884 };
0885 
0886 static struct cpuidle_state adl_l_cstates[] __initdata = {
0887     {
0888         .name = "C1",
0889         .desc = "MWAIT 0x00",
0890         .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
0891         .exit_latency = 1,
0892         .target_residency = 1,
0893         .enter = &intel_idle,
0894         .enter_s2idle = intel_idle_s2idle, },
0895     {
0896         .name = "C1E",
0897         .desc = "MWAIT 0x01",
0898         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0899         .exit_latency = 2,
0900         .target_residency = 4,
0901         .enter = &intel_idle,
0902         .enter_s2idle = intel_idle_s2idle, },
0903     {
0904         .name = "C6",
0905         .desc = "MWAIT 0x20",
0906         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
0907         .exit_latency = 170,
0908         .target_residency = 500,
0909         .enter = &intel_idle,
0910         .enter_s2idle = intel_idle_s2idle, },
0911     {
0912         .name = "C8",
0913         .desc = "MWAIT 0x40",
0914         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
0915         .exit_latency = 200,
0916         .target_residency = 600,
0917         .enter = &intel_idle,
0918         .enter_s2idle = intel_idle_s2idle, },
0919     {
0920         .name = "C10",
0921         .desc = "MWAIT 0x60",
0922         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
0923         .exit_latency = 230,
0924         .target_residency = 700,
0925         .enter = &intel_idle,
0926         .enter_s2idle = intel_idle_s2idle, },
0927     {
0928         .enter = NULL }
0929 };
0930 
0931 static struct cpuidle_state spr_cstates[] __initdata = {
0932     {
0933         .name = "C1",
0934         .desc = "MWAIT 0x00",
0935         .flags = MWAIT2flg(0x00),
0936         .exit_latency = 1,
0937         .target_residency = 1,
0938         .enter = &intel_idle,
0939         .enter_s2idle = intel_idle_s2idle, },
0940     {
0941         .name = "C1E",
0942         .desc = "MWAIT 0x01",
0943         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
0944         .exit_latency = 2,
0945         .target_residency = 4,
0946         .enter = &intel_idle,
0947         .enter_s2idle = intel_idle_s2idle, },
0948     {
0949         .name = "C6",
0950         .desc = "MWAIT 0x20",
0951         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
0952                        CPUIDLE_FLAG_INIT_XSTATE,
0953         .exit_latency = 290,
0954         .target_residency = 800,
0955         .enter = &intel_idle,
0956         .enter_s2idle = intel_idle_s2idle, },
0957     {
0958         .enter = NULL }
0959 };
0960 
0961 static struct cpuidle_state atom_cstates[] __initdata = {
0962     {
0963         .name = "C1E",
0964         .desc = "MWAIT 0x00",
0965         .flags = MWAIT2flg(0x00),
0966         .exit_latency = 10,
0967         .target_residency = 20,
0968         .enter = &intel_idle,
0969         .enter_s2idle = intel_idle_s2idle, },
0970     {
0971         .name = "C2",
0972         .desc = "MWAIT 0x10",
0973         .flags = MWAIT2flg(0x10),
0974         .exit_latency = 20,
0975         .target_residency = 80,
0976         .enter = &intel_idle,
0977         .enter_s2idle = intel_idle_s2idle, },
0978     {
0979         .name = "C4",
0980         .desc = "MWAIT 0x30",
0981         .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
0982         .exit_latency = 100,
0983         .target_residency = 400,
0984         .enter = &intel_idle,
0985         .enter_s2idle = intel_idle_s2idle, },
0986     {
0987         .name = "C6",
0988         .desc = "MWAIT 0x52",
0989         .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
0990         .exit_latency = 140,
0991         .target_residency = 560,
0992         .enter = &intel_idle,
0993         .enter_s2idle = intel_idle_s2idle, },
0994     {
0995         .enter = NULL }
0996 };
0997 static struct cpuidle_state tangier_cstates[] __initdata = {
0998     {
0999         .name = "C1",
1000         .desc = "MWAIT 0x00",
1001         .flags = MWAIT2flg(0x00),
1002         .exit_latency = 1,
1003         .target_residency = 4,
1004         .enter = &intel_idle,
1005         .enter_s2idle = intel_idle_s2idle, },
1006     {
1007         .name = "C4",
1008         .desc = "MWAIT 0x30",
1009         .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1010         .exit_latency = 100,
1011         .target_residency = 400,
1012         .enter = &intel_idle,
1013         .enter_s2idle = intel_idle_s2idle, },
1014     {
1015         .name = "C6",
1016         .desc = "MWAIT 0x52",
1017         .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1018         .exit_latency = 140,
1019         .target_residency = 560,
1020         .enter = &intel_idle,
1021         .enter_s2idle = intel_idle_s2idle, },
1022     {
1023         .name = "C7",
1024         .desc = "MWAIT 0x60",
1025         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1026         .exit_latency = 1200,
1027         .target_residency = 4000,
1028         .enter = &intel_idle,
1029         .enter_s2idle = intel_idle_s2idle, },
1030     {
1031         .name = "C9",
1032         .desc = "MWAIT 0x64",
1033         .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1034         .exit_latency = 10000,
1035         .target_residency = 20000,
1036         .enter = &intel_idle,
1037         .enter_s2idle = intel_idle_s2idle, },
1038     {
1039         .enter = NULL }
1040 };
1041 static struct cpuidle_state avn_cstates[] __initdata = {
1042     {
1043         .name = "C1",
1044         .desc = "MWAIT 0x00",
1045         .flags = MWAIT2flg(0x00),
1046         .exit_latency = 2,
1047         .target_residency = 2,
1048         .enter = &intel_idle,
1049         .enter_s2idle = intel_idle_s2idle, },
1050     {
1051         .name = "C6",
1052         .desc = "MWAIT 0x51",
1053         .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1054         .exit_latency = 15,
1055         .target_residency = 45,
1056         .enter = &intel_idle,
1057         .enter_s2idle = intel_idle_s2idle, },
1058     {
1059         .enter = NULL }
1060 };
1061 static struct cpuidle_state knl_cstates[] __initdata = {
1062     {
1063         .name = "C1",
1064         .desc = "MWAIT 0x00",
1065         .flags = MWAIT2flg(0x00),
1066         .exit_latency = 1,
1067         .target_residency = 2,
1068         .enter = &intel_idle,
1069         .enter_s2idle = intel_idle_s2idle },
1070     {
1071         .name = "C6",
1072         .desc = "MWAIT 0x10",
1073         .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1074         .exit_latency = 120,
1075         .target_residency = 500,
1076         .enter = &intel_idle,
1077         .enter_s2idle = intel_idle_s2idle },
1078     {
1079         .enter = NULL }
1080 };
1081 
1082 static struct cpuidle_state bxt_cstates[] __initdata = {
1083     {
1084         .name = "C1",
1085         .desc = "MWAIT 0x00",
1086         .flags = MWAIT2flg(0x00),
1087         .exit_latency = 2,
1088         .target_residency = 2,
1089         .enter = &intel_idle,
1090         .enter_s2idle = intel_idle_s2idle, },
1091     {
1092         .name = "C1E",
1093         .desc = "MWAIT 0x01",
1094         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1095         .exit_latency = 10,
1096         .target_residency = 20,
1097         .enter = &intel_idle,
1098         .enter_s2idle = intel_idle_s2idle, },
1099     {
1100         .name = "C6",
1101         .desc = "MWAIT 0x20",
1102         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1103         .exit_latency = 133,
1104         .target_residency = 133,
1105         .enter = &intel_idle,
1106         .enter_s2idle = intel_idle_s2idle, },
1107     {
1108         .name = "C7s",
1109         .desc = "MWAIT 0x31",
1110         .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1111         .exit_latency = 155,
1112         .target_residency = 155,
1113         .enter = &intel_idle,
1114         .enter_s2idle = intel_idle_s2idle, },
1115     {
1116         .name = "C8",
1117         .desc = "MWAIT 0x40",
1118         .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1119         .exit_latency = 1000,
1120         .target_residency = 1000,
1121         .enter = &intel_idle,
1122         .enter_s2idle = intel_idle_s2idle, },
1123     {
1124         .name = "C9",
1125         .desc = "MWAIT 0x50",
1126         .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1127         .exit_latency = 2000,
1128         .target_residency = 2000,
1129         .enter = &intel_idle,
1130         .enter_s2idle = intel_idle_s2idle, },
1131     {
1132         .name = "C10",
1133         .desc = "MWAIT 0x60",
1134         .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1135         .exit_latency = 10000,
1136         .target_residency = 10000,
1137         .enter = &intel_idle,
1138         .enter_s2idle = intel_idle_s2idle, },
1139     {
1140         .enter = NULL }
1141 };
1142 
1143 static struct cpuidle_state dnv_cstates[] __initdata = {
1144     {
1145         .name = "C1",
1146         .desc = "MWAIT 0x00",
1147         .flags = MWAIT2flg(0x00),
1148         .exit_latency = 2,
1149         .target_residency = 2,
1150         .enter = &intel_idle,
1151         .enter_s2idle = intel_idle_s2idle, },
1152     {
1153         .name = "C1E",
1154         .desc = "MWAIT 0x01",
1155         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1156         .exit_latency = 10,
1157         .target_residency = 20,
1158         .enter = &intel_idle,
1159         .enter_s2idle = intel_idle_s2idle, },
1160     {
1161         .name = "C6",
1162         .desc = "MWAIT 0x20",
1163         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1164         .exit_latency = 50,
1165         .target_residency = 500,
1166         .enter = &intel_idle,
1167         .enter_s2idle = intel_idle_s2idle, },
1168     {
1169         .enter = NULL }
1170 };
1171 
1172 /*
1173  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1174  * C6, and this is indicated in the CPUID mwait leaf.
1175  */
1176 static struct cpuidle_state snr_cstates[] __initdata = {
1177     {
1178         .name = "C1",
1179         .desc = "MWAIT 0x00",
1180         .flags = MWAIT2flg(0x00),
1181         .exit_latency = 2,
1182         .target_residency = 2,
1183         .enter = &intel_idle,
1184         .enter_s2idle = intel_idle_s2idle, },
1185     {
1186         .name = "C1E",
1187         .desc = "MWAIT 0x01",
1188         .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1189         .exit_latency = 15,
1190         .target_residency = 25,
1191         .enter = &intel_idle,
1192         .enter_s2idle = intel_idle_s2idle, },
1193     {
1194         .name = "C6",
1195         .desc = "MWAIT 0x20",
1196         .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1197         .exit_latency = 130,
1198         .target_residency = 500,
1199         .enter = &intel_idle,
1200         .enter_s2idle = intel_idle_s2idle, },
1201     {
1202         .enter = NULL }
1203 };
1204 
1205 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1206     .state_table = nehalem_cstates,
1207     .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1208     .disable_promotion_to_c1e = true,
1209 };
1210 
1211 static const struct idle_cpu idle_cpu_nhx __initconst = {
1212     .state_table = nehalem_cstates,
1213     .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1214     .disable_promotion_to_c1e = true,
1215     .use_acpi = true,
1216 };
1217 
1218 static const struct idle_cpu idle_cpu_atom __initconst = {
1219     .state_table = atom_cstates,
1220 };
1221 
1222 static const struct idle_cpu idle_cpu_tangier __initconst = {
1223     .state_table = tangier_cstates,
1224 };
1225 
1226 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1227     .state_table = atom_cstates,
1228     .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1229 };
1230 
1231 static const struct idle_cpu idle_cpu_snb __initconst = {
1232     .state_table = snb_cstates,
1233     .disable_promotion_to_c1e = true,
1234 };
1235 
1236 static const struct idle_cpu idle_cpu_snx __initconst = {
1237     .state_table = snb_cstates,
1238     .disable_promotion_to_c1e = true,
1239     .use_acpi = true,
1240 };
1241 
1242 static const struct idle_cpu idle_cpu_byt __initconst = {
1243     .state_table = byt_cstates,
1244     .disable_promotion_to_c1e = true,
1245     .byt_auto_demotion_disable_flag = true,
1246 };
1247 
1248 static const struct idle_cpu idle_cpu_cht __initconst = {
1249     .state_table = cht_cstates,
1250     .disable_promotion_to_c1e = true,
1251     .byt_auto_demotion_disable_flag = true,
1252 };
1253 
1254 static const struct idle_cpu idle_cpu_ivb __initconst = {
1255     .state_table = ivb_cstates,
1256     .disable_promotion_to_c1e = true,
1257 };
1258 
1259 static const struct idle_cpu idle_cpu_ivt __initconst = {
1260     .state_table = ivt_cstates,
1261     .disable_promotion_to_c1e = true,
1262     .use_acpi = true,
1263 };
1264 
1265 static const struct idle_cpu idle_cpu_hsw __initconst = {
1266     .state_table = hsw_cstates,
1267     .disable_promotion_to_c1e = true,
1268 };
1269 
1270 static const struct idle_cpu idle_cpu_hsx __initconst = {
1271     .state_table = hsw_cstates,
1272     .disable_promotion_to_c1e = true,
1273     .use_acpi = true,
1274 };
1275 
1276 static const struct idle_cpu idle_cpu_bdw __initconst = {
1277     .state_table = bdw_cstates,
1278     .disable_promotion_to_c1e = true,
1279 };
1280 
1281 static const struct idle_cpu idle_cpu_bdx __initconst = {
1282     .state_table = bdw_cstates,
1283     .disable_promotion_to_c1e = true,
1284     .use_acpi = true,
1285 };
1286 
1287 static const struct idle_cpu idle_cpu_skl __initconst = {
1288     .state_table = skl_cstates,
1289     .disable_promotion_to_c1e = true,
1290 };
1291 
1292 static const struct idle_cpu idle_cpu_skx __initconst = {
1293     .state_table = skx_cstates,
1294     .disable_promotion_to_c1e = true,
1295     .use_acpi = true,
1296 };
1297 
1298 static const struct idle_cpu idle_cpu_icx __initconst = {
1299     .state_table = icx_cstates,
1300     .disable_promotion_to_c1e = true,
1301     .use_acpi = true,
1302 };
1303 
1304 static const struct idle_cpu idle_cpu_adl __initconst = {
1305     .state_table = adl_cstates,
1306 };
1307 
1308 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1309     .state_table = adl_l_cstates,
1310 };
1311 
1312 static const struct idle_cpu idle_cpu_spr __initconst = {
1313     .state_table = spr_cstates,
1314     .disable_promotion_to_c1e = true,
1315     .use_acpi = true,
1316 };
1317 
1318 static const struct idle_cpu idle_cpu_avn __initconst = {
1319     .state_table = avn_cstates,
1320     .disable_promotion_to_c1e = true,
1321     .use_acpi = true,
1322 };
1323 
1324 static const struct idle_cpu idle_cpu_knl __initconst = {
1325     .state_table = knl_cstates,
1326     .use_acpi = true,
1327 };
1328 
1329 static const struct idle_cpu idle_cpu_bxt __initconst = {
1330     .state_table = bxt_cstates,
1331     .disable_promotion_to_c1e = true,
1332 };
1333 
1334 static const struct idle_cpu idle_cpu_dnv __initconst = {
1335     .state_table = dnv_cstates,
1336     .disable_promotion_to_c1e = true,
1337     .use_acpi = true,
1338 };
1339 
1340 static const struct idle_cpu idle_cpu_snr __initconst = {
1341     .state_table = snr_cstates,
1342     .disable_promotion_to_c1e = true,
1343     .use_acpi = true,
1344 };
1345 
1346 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1347     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,      &idle_cpu_nhx),
1348     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,     &idle_cpu_nehalem),
1349     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,       &idle_cpu_nehalem),
1350     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,        &idle_cpu_nehalem),
1351     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,     &idle_cpu_nhx),
1352     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,      &idle_cpu_nhx),
1353     X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,    &idle_cpu_atom),
1354     X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1355     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,     &idle_cpu_nhx),
1356     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,     &idle_cpu_snb),
1357     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,   &idle_cpu_snx),
1358     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,   &idle_cpu_atom),
1359     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
1360     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1361     X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,    &idle_cpu_cht),
1362     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,       &idle_cpu_ivb),
1363     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,     &idle_cpu_ivt),
1364     X86_MATCH_INTEL_FAM6_MODEL(HASWELL,     &idle_cpu_hsw),
1365     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,       &idle_cpu_hsx),
1366     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,       &idle_cpu_hsw),
1367     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,       &idle_cpu_hsw),
1368     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1369     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,       &idle_cpu_bdw),
1370     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,     &idle_cpu_bdw),
1371     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,     &idle_cpu_bdx),
1372     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,     &idle_cpu_bdx),
1373     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,       &idle_cpu_skl),
1374     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,     &idle_cpu_skl),
1375     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,      &idle_cpu_skl),
1376     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,        &idle_cpu_skl),
1377     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,       &idle_cpu_skx),
1378     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,       &idle_cpu_icx),
1379     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,       &idle_cpu_icx),
1380     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,       &idle_cpu_adl),
1381     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,     &idle_cpu_adl_l),
1382     X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
1383     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,    &idle_cpu_knl),
1384     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,    &idle_cpu_knl),
1385     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,   &idle_cpu_bxt),
1386     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1387     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
1388     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,  &idle_cpu_snr),
1389     {}
1390 };
1391 
1392 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1393     X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1394     {}
1395 };
1396 
1397 static bool __init intel_idle_max_cstate_reached(int cstate)
1398 {
1399     if (cstate + 1 > max_cstate) {
1400         pr_info("max_cstate %d reached\n", max_cstate);
1401         return true;
1402     }
1403     return false;
1404 }
1405 
1406 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1407 {
1408     unsigned long eax = flg2MWAIT(state->flags);
1409 
1410     if (boot_cpu_has(X86_FEATURE_ARAT))
1411         return false;
1412 
1413     /*
1414      * Switch over to one-shot tick broadcast if the target C-state
1415      * is deeper than C1.
1416      */
1417     return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1418 }
1419 
1420 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1421 #include <acpi/processor.h>
1422 
1423 static bool no_acpi __read_mostly;
1424 module_param(no_acpi, bool, 0444);
1425 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1426 
1427 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1428 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1429 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1430 
1431 static struct acpi_processor_power acpi_state_table __initdata;
1432 
1433 /**
1434  * intel_idle_cst_usable - Check if the _CST information can be used.
1435  *
1436  * Check if all of the C-states listed by _CST in the max_cstate range are
1437  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1438  */
1439 static bool __init intel_idle_cst_usable(void)
1440 {
1441     int cstate, limit;
1442 
1443     limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1444               acpi_state_table.count);
1445 
1446     for (cstate = 1; cstate < limit; cstate++) {
1447         struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1448 
1449         if (cx->entry_method != ACPI_CSTATE_FFH)
1450             return false;
1451     }
1452 
1453     return true;
1454 }
1455 
1456 static bool __init intel_idle_acpi_cst_extract(void)
1457 {
1458     unsigned int cpu;
1459 
1460     if (no_acpi) {
1461         pr_debug("Not allowed to use ACPI _CST\n");
1462         return false;
1463     }
1464 
1465     for_each_possible_cpu(cpu) {
1466         struct acpi_processor *pr = per_cpu(processors, cpu);
1467 
1468         if (!pr)
1469             continue;
1470 
1471         if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1472             continue;
1473 
1474         acpi_state_table.count++;
1475 
1476         if (!intel_idle_cst_usable())
1477             continue;
1478 
1479         if (!acpi_processor_claim_cst_control())
1480             break;
1481 
1482         return true;
1483     }
1484 
1485     acpi_state_table.count = 0;
1486     pr_debug("ACPI _CST not found or not usable\n");
1487     return false;
1488 }
1489 
1490 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1491 {
1492     int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1493 
1494     /*
1495      * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1496      * the interesting states are ACPI_CSTATE_FFH.
1497      */
1498     for (cstate = 1; cstate < limit; cstate++) {
1499         struct acpi_processor_cx *cx;
1500         struct cpuidle_state *state;
1501 
1502         if (intel_idle_max_cstate_reached(cstate - 1))
1503             break;
1504 
1505         cx = &acpi_state_table.states[cstate];
1506 
1507         state = &drv->states[drv->state_count++];
1508 
1509         snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1510         strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1511         state->exit_latency = cx->latency;
1512         /*
1513          * For C1-type C-states use the same number for both the exit
1514          * latency and target residency, because that is the case for
1515          * C1 in the majority of the static C-states tables above.
1516          * For the other types of C-states, however, set the target
1517          * residency to 3 times the exit latency which should lead to
1518          * a reasonable balance between energy-efficiency and
1519          * performance in the majority of interesting cases.
1520          */
1521         state->target_residency = cx->latency;
1522         if (cx->type > ACPI_STATE_C1)
1523             state->target_residency *= 3;
1524 
1525         state->flags = MWAIT2flg(cx->address);
1526         if (cx->type > ACPI_STATE_C2)
1527             state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1528 
1529         if (disabled_states_mask & BIT(cstate))
1530             state->flags |= CPUIDLE_FLAG_OFF;
1531 
1532         if (intel_idle_state_needs_timer_stop(state))
1533             state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1534 
1535         state->enter = intel_idle;
1536         state->enter_s2idle = intel_idle_s2idle;
1537     }
1538 }
1539 
1540 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1541 {
1542     int cstate, limit;
1543 
1544     /*
1545      * If there are no _CST C-states, do not disable any C-states by
1546      * default.
1547      */
1548     if (!acpi_state_table.count)
1549         return false;
1550 
1551     limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1552     /*
1553      * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1554      * the interesting states are ACPI_CSTATE_FFH.
1555      */
1556     for (cstate = 1; cstate < limit; cstate++) {
1557         if (acpi_state_table.states[cstate].address == mwait_hint)
1558             return false;
1559     }
1560     return true;
1561 }
1562 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1563 #define force_use_acpi  (false)
1564 
1565 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1566 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1567 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1568 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1569 
1570 /**
1571  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1572  *
1573  * Tune IVT multi-socket targets.
1574  * Assumption: num_sockets == (max_package_num + 1).
1575  */
1576 static void __init ivt_idle_state_table_update(void)
1577 {
1578     /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1579     int cpu, package_num, num_sockets = 1;
1580 
1581     for_each_online_cpu(cpu) {
1582         package_num = topology_physical_package_id(cpu);
1583         if (package_num + 1 > num_sockets) {
1584             num_sockets = package_num + 1;
1585 
1586             if (num_sockets > 4) {
1587                 cpuidle_state_table = ivt_cstates_8s;
1588                 return;
1589             }
1590         }
1591     }
1592 
1593     if (num_sockets > 2)
1594         cpuidle_state_table = ivt_cstates_4s;
1595 
1596     /* else, 1 and 2 socket systems use default ivt_cstates */
1597 }
1598 
1599 /**
1600  * irtl_2_usec - IRTL to microseconds conversion.
1601  * @irtl: IRTL MSR value.
1602  *
1603  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1604  */
1605 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1606 {
1607     static const unsigned int irtl_ns_units[] __initconst = {
1608         1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1609     };
1610     unsigned long long ns;
1611 
1612     if (!irtl)
1613         return 0;
1614 
1615     ns = irtl_ns_units[(irtl >> 10) & 0x7];
1616 
1617     return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1618 }
1619 
1620 /**
1621  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1622  *
1623  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1624  * definitive maximum latency and use the same value for target_residency.
1625  */
1626 static void __init bxt_idle_state_table_update(void)
1627 {
1628     unsigned long long msr;
1629     unsigned int usec;
1630 
1631     rdmsrl(MSR_PKGC6_IRTL, msr);
1632     usec = irtl_2_usec(msr);
1633     if (usec) {
1634         bxt_cstates[2].exit_latency = usec;
1635         bxt_cstates[2].target_residency = usec;
1636     }
1637 
1638     rdmsrl(MSR_PKGC7_IRTL, msr);
1639     usec = irtl_2_usec(msr);
1640     if (usec) {
1641         bxt_cstates[3].exit_latency = usec;
1642         bxt_cstates[3].target_residency = usec;
1643     }
1644 
1645     rdmsrl(MSR_PKGC8_IRTL, msr);
1646     usec = irtl_2_usec(msr);
1647     if (usec) {
1648         bxt_cstates[4].exit_latency = usec;
1649         bxt_cstates[4].target_residency = usec;
1650     }
1651 
1652     rdmsrl(MSR_PKGC9_IRTL, msr);
1653     usec = irtl_2_usec(msr);
1654     if (usec) {
1655         bxt_cstates[5].exit_latency = usec;
1656         bxt_cstates[5].target_residency = usec;
1657     }
1658 
1659     rdmsrl(MSR_PKGC10_IRTL, msr);
1660     usec = irtl_2_usec(msr);
1661     if (usec) {
1662         bxt_cstates[6].exit_latency = usec;
1663         bxt_cstates[6].target_residency = usec;
1664     }
1665 
1666 }
1667 
1668 /**
1669  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1670  *
1671  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1672  */
1673 static void __init sklh_idle_state_table_update(void)
1674 {
1675     unsigned long long msr;
1676     unsigned int eax, ebx, ecx, edx;
1677 
1678 
1679     /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1680     if (max_cstate <= 7)
1681         return;
1682 
1683     /* if PC10 not present in CPUID.MWAIT.EDX */
1684     if ((mwait_substates & (0xF << 28)) == 0)
1685         return;
1686 
1687     rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1688 
1689     /* PC10 is not enabled in PKG C-state limit */
1690     if ((msr & 0xF) != 8)
1691         return;
1692 
1693     ecx = 0;
1694     cpuid(7, &eax, &ebx, &ecx, &edx);
1695 
1696     /* if SGX is present */
1697     if (ebx & (1 << 2)) {
1698 
1699         rdmsrl(MSR_IA32_FEAT_CTL, msr);
1700 
1701         /* if SGX is enabled */
1702         if (msr & (1 << 18))
1703             return;
1704     }
1705 
1706     skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1707     skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1708 }
1709 
1710 /**
1711  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1712  * idle states table.
1713  */
1714 static void __init skx_idle_state_table_update(void)
1715 {
1716     unsigned long long msr;
1717 
1718     rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1719 
1720     /*
1721      * 000b: C0/C1 (no package C-state support)
1722      * 001b: C2
1723      * 010b: C6 (non-retention)
1724      * 011b: C6 (retention)
1725      * 111b: No Package C state limits.
1726      */
1727     if ((msr & 0x7) < 2) {
1728         /*
1729          * Uses the CC6 + PC0 latency and 3 times of
1730          * latency for target_residency if the PC6
1731          * is disabled in BIOS. This is consistent
1732          * with how intel_idle driver uses _CST
1733          * to set the target_residency.
1734          */
1735         skx_cstates[2].exit_latency = 92;
1736         skx_cstates[2].target_residency = 276;
1737     }
1738 }
1739 
1740 /**
1741  * adl_idle_state_table_update - Adjust AlderLake idle states table.
1742  */
1743 static void __init adl_idle_state_table_update(void)
1744 {
1745     /* Check if user prefers C1 over C1E. */
1746     if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1747         cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1748         cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1749 
1750         /* Disable C1E by clearing the "C1E promotion" bit. */
1751         c1e_promotion = C1E_PROMOTION_DISABLE;
1752         return;
1753     }
1754 
1755     /* Make sure C1E is enabled by default */
1756     c1e_promotion = C1E_PROMOTION_ENABLE;
1757 }
1758 
1759 /**
1760  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1761  */
1762 static void __init spr_idle_state_table_update(void)
1763 {
1764     unsigned long long msr;
1765 
1766     /*
1767      * By default, the C6 state assumes the worst-case scenario of package
1768      * C6. However, if PC6 is disabled, we update the numbers to match
1769      * core C6.
1770      */
1771     rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1772 
1773     /* Limit value 2 and above allow for PC6. */
1774     if ((msr & 0x7) < 2) {
1775         spr_cstates[2].exit_latency = 190;
1776         spr_cstates[2].target_residency = 600;
1777     }
1778 }
1779 
1780 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1781 {
1782     unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1783     unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1784                     MWAIT_SUBSTATE_MASK;
1785 
1786     /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1787     if (num_substates == 0)
1788         return false;
1789 
1790     if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1791         mark_tsc_unstable("TSC halts in idle states deeper than C2");
1792 
1793     return true;
1794 }
1795 
1796 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1797 {
1798     int cstate;
1799 
1800     switch (boot_cpu_data.x86_model) {
1801     case INTEL_FAM6_IVYBRIDGE_X:
1802         ivt_idle_state_table_update();
1803         break;
1804     case INTEL_FAM6_ATOM_GOLDMONT:
1805     case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1806         bxt_idle_state_table_update();
1807         break;
1808     case INTEL_FAM6_SKYLAKE:
1809         sklh_idle_state_table_update();
1810         break;
1811     case INTEL_FAM6_SKYLAKE_X:
1812         skx_idle_state_table_update();
1813         break;
1814     case INTEL_FAM6_SAPPHIRERAPIDS_X:
1815         spr_idle_state_table_update();
1816         break;
1817     case INTEL_FAM6_ALDERLAKE:
1818     case INTEL_FAM6_ALDERLAKE_L:
1819         adl_idle_state_table_update();
1820         break;
1821     }
1822 
1823     for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1824         unsigned int mwait_hint;
1825 
1826         if (intel_idle_max_cstate_reached(cstate))
1827             break;
1828 
1829         if (!cpuidle_state_table[cstate].enter &&
1830             !cpuidle_state_table[cstate].enter_s2idle)
1831             break;
1832 
1833         /* If marked as unusable, skip this state. */
1834         if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1835             pr_debug("state %s is disabled\n",
1836                  cpuidle_state_table[cstate].name);
1837             continue;
1838         }
1839 
1840         mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1841         if (!intel_idle_verify_cstate(mwait_hint))
1842             continue;
1843 
1844         /* Structure copy. */
1845         drv->states[drv->state_count] = cpuidle_state_table[cstate];
1846 
1847         if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
1848             drv->states[drv->state_count].enter = intel_idle_irq;
1849 
1850         if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1851             cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1852             WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1853             drv->states[drv->state_count].enter = intel_idle_ibrs;
1854         }
1855 
1856         if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
1857             drv->states[drv->state_count].enter = intel_idle_xstate;
1858 
1859         if ((disabled_states_mask & BIT(drv->state_count)) ||
1860             ((icpu->use_acpi || force_use_acpi) &&
1861              intel_idle_off_by_default(mwait_hint) &&
1862              !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1863             drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1864 
1865         if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1866             drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1867 
1868         drv->state_count++;
1869     }
1870 
1871     if (icpu->byt_auto_demotion_disable_flag) {
1872         wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1873         wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1874     }
1875 }
1876 
1877 /**
1878  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1879  * @drv: cpuidle driver structure to initialize.
1880  */
1881 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1882 {
1883     cpuidle_poll_state_init(drv);
1884 
1885     if (disabled_states_mask & BIT(0))
1886         drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1887 
1888     drv->state_count = 1;
1889 
1890     if (icpu)
1891         intel_idle_init_cstates_icpu(drv);
1892     else
1893         intel_idle_init_cstates_acpi(drv);
1894 }
1895 
1896 static void auto_demotion_disable(void)
1897 {
1898     unsigned long long msr_bits;
1899 
1900     rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1901     msr_bits &= ~auto_demotion_disable_flags;
1902     wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1903 }
1904 
1905 static void c1e_promotion_enable(void)
1906 {
1907     unsigned long long msr_bits;
1908 
1909     rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1910     msr_bits |= 0x2;
1911     wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1912 }
1913 
1914 static void c1e_promotion_disable(void)
1915 {
1916     unsigned long long msr_bits;
1917 
1918     rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1919     msr_bits &= ~0x2;
1920     wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1921 }
1922 
1923 /**
1924  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1925  * @cpu: CPU to initialize.
1926  *
1927  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1928  * with the processor model flags.
1929  */
1930 static int intel_idle_cpu_init(unsigned int cpu)
1931 {
1932     struct cpuidle_device *dev;
1933 
1934     dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1935     dev->cpu = cpu;
1936 
1937     if (cpuidle_register_device(dev)) {
1938         pr_debug("cpuidle_register_device %d failed!\n", cpu);
1939         return -EIO;
1940     }
1941 
1942     if (auto_demotion_disable_flags)
1943         auto_demotion_disable();
1944 
1945     if (c1e_promotion == C1E_PROMOTION_ENABLE)
1946         c1e_promotion_enable();
1947     else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1948         c1e_promotion_disable();
1949 
1950     return 0;
1951 }
1952 
1953 static int intel_idle_cpu_online(unsigned int cpu)
1954 {
1955     struct cpuidle_device *dev;
1956 
1957     if (!boot_cpu_has(X86_FEATURE_ARAT))
1958         tick_broadcast_enable();
1959 
1960     /*
1961      * Some systems can hotplug a cpu at runtime after
1962      * the kernel has booted, we have to initialize the
1963      * driver in this case
1964      */
1965     dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1966     if (!dev->registered)
1967         return intel_idle_cpu_init(cpu);
1968 
1969     return 0;
1970 }
1971 
1972 /**
1973  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1974  */
1975 static void __init intel_idle_cpuidle_devices_uninit(void)
1976 {
1977     int i;
1978 
1979     for_each_online_cpu(i)
1980         cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1981 }
1982 
1983 static int __init intel_idle_init(void)
1984 {
1985     const struct x86_cpu_id *id;
1986     unsigned int eax, ebx, ecx;
1987     int retval;
1988 
1989     /* Do not load intel_idle at all for now if idle= is passed */
1990     if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1991         return -ENODEV;
1992 
1993     if (max_cstate == 0) {
1994         pr_debug("disabled\n");
1995         return -EPERM;
1996     }
1997 
1998     id = x86_match_cpu(intel_idle_ids);
1999     if (id) {
2000         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2001             pr_debug("Please enable MWAIT in BIOS SETUP\n");
2002             return -ENODEV;
2003         }
2004     } else {
2005         id = x86_match_cpu(intel_mwait_ids);
2006         if (!id)
2007             return -ENODEV;
2008     }
2009 
2010     if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
2011         return -ENODEV;
2012 
2013     cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
2014 
2015     if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2016         !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2017         !mwait_substates)
2018             return -ENODEV;
2019 
2020     pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2021 
2022     icpu = (const struct idle_cpu *)id->driver_data;
2023     if (icpu) {
2024         cpuidle_state_table = icpu->state_table;
2025         auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2026         if (icpu->disable_promotion_to_c1e)
2027             c1e_promotion = C1E_PROMOTION_DISABLE;
2028         if (icpu->use_acpi || force_use_acpi)
2029             intel_idle_acpi_cst_extract();
2030     } else if (!intel_idle_acpi_cst_extract()) {
2031         return -ENODEV;
2032     }
2033 
2034     pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2035          boot_cpu_data.x86_model);
2036 
2037     intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2038     if (!intel_idle_cpuidle_devices)
2039         return -ENOMEM;
2040 
2041     intel_idle_cpuidle_driver_init(&intel_idle_driver);
2042 
2043     retval = cpuidle_register_driver(&intel_idle_driver);
2044     if (retval) {
2045         struct cpuidle_driver *drv = cpuidle_get_driver();
2046         printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2047                drv ? drv->name : "none");
2048         goto init_driver_fail;
2049     }
2050 
2051     retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2052                    intel_idle_cpu_online, NULL);
2053     if (retval < 0)
2054         goto hp_setup_fail;
2055 
2056     pr_debug("Local APIC timer is reliable in %s\n",
2057          boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2058 
2059     return 0;
2060 
2061 hp_setup_fail:
2062     intel_idle_cpuidle_devices_uninit();
2063     cpuidle_unregister_driver(&intel_idle_driver);
2064 init_driver_fail:
2065     free_percpu(intel_idle_cpuidle_devices);
2066     return retval;
2067 
2068 }
2069 device_initcall(intel_idle_init);
2070 
2071 /*
2072  * We are not really modular, but we used to support that.  Meaning we also
2073  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2074  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2075  * is the easiest way (currently) to continue doing that.
2076  */
2077 module_param(max_cstate, int, 0444);
2078 /*
2079  * The positions of the bits that are set in this number are the indices of the
2080  * idle states to be disabled by default (as reflected by the names of the
2081  * corresponding idle state directories in sysfs, "state0", "state1" ...
2082  * "state<i>" ..., where <i> is the index of the given state).
2083  */
2084 module_param_named(states_off, disabled_states_mask, uint, 0444);
2085 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2086 /*
2087  * Some platforms come with mutually exclusive C-states, so that if one is
2088  * enabled, the other C-states must not be used. Example: C1 and C1E on
2089  * Sapphire Rapids platform. This parameter allows for selecting the
2090  * preferred C-states among the groups of mutually exclusive C-states - the
2091  * selected C-states will be registered, the other C-states from the mutually
2092  * exclusive group won't be registered. If the platform has no mutually
2093  * exclusive C-states, this parameter has no effect.
2094  */
2095 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2096 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");