0001
0002
0003
0004
0005
0006
0007
0008
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010
0011 #include <linux/kernel.h>
0012 #include <linux/kernel_stat.h>
0013 #include <linux/module.h>
0014 #include <linux/ktime.h>
0015 #include <linux/hrtimer.h>
0016 #include <linux/tick.h>
0017 #include <linux/slab.h>
0018 #include <linux/sched/cpufreq.h>
0019 #include <linux/list.h>
0020 #include <linux/cpu.h>
0021 #include <linux/cpufreq.h>
0022 #include <linux/sysfs.h>
0023 #include <linux/types.h>
0024 #include <linux/fs.h>
0025 #include <linux/acpi.h>
0026 #include <linux/vmalloc.h>
0027 #include <linux/pm_qos.h>
0028 #include <trace/events/power.h>
0029
0030 #include <asm/div64.h>
0031 #include <asm/msr.h>
0032 #include <asm/cpu_device_id.h>
0033 #include <asm/cpufeature.h>
0034 #include <asm/intel-family.h>
0035 #include "../drivers/thermal/intel/thermal_interrupt.h"
0036
0037 #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
0038
0039 #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
0040 #define INTEL_CPUFREQ_TRANSITION_DELAY_HWP 5000
0041 #define INTEL_CPUFREQ_TRANSITION_DELAY 500
0042
0043 #ifdef CONFIG_ACPI
0044 #include <acpi/processor.h>
0045 #include <acpi/cppc_acpi.h>
0046 #endif
0047
0048 #define FRAC_BITS 8
0049 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
0050 #define fp_toint(X) ((X) >> FRAC_BITS)
0051
0052 #define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
0053
0054 #define EXT_BITS 6
0055 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
0056 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
0057 #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
0058
0059 static inline int32_t mul_fp(int32_t x, int32_t y)
0060 {
0061 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
0062 }
0063
0064 static inline int32_t div_fp(s64 x, s64 y)
0065 {
0066 return div64_s64((int64_t)x << FRAC_BITS, y);
0067 }
0068
0069 static inline int ceiling_fp(int32_t x)
0070 {
0071 int mask, ret;
0072
0073 ret = fp_toint(x);
0074 mask = (1 << FRAC_BITS) - 1;
0075 if (x & mask)
0076 ret += 1;
0077 return ret;
0078 }
0079
0080 static inline u64 mul_ext_fp(u64 x, u64 y)
0081 {
0082 return (x * y) >> EXT_FRAC_BITS;
0083 }
0084
0085 static inline u64 div_ext_fp(u64 x, u64 y)
0086 {
0087 return div64_u64(x << EXT_FRAC_BITS, y);
0088 }
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108 struct sample {
0109 int32_t core_avg_perf;
0110 int32_t busy_scaled;
0111 u64 aperf;
0112 u64 mperf;
0113 u64 tsc;
0114 u64 time;
0115 };
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134 struct pstate_data {
0135 int current_pstate;
0136 int min_pstate;
0137 int max_pstate;
0138 int max_pstate_physical;
0139 int perf_ctl_scaling;
0140 int scaling;
0141 int turbo_pstate;
0142 unsigned int min_freq;
0143 unsigned int max_freq;
0144 unsigned int turbo_freq;
0145 };
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160 struct vid_data {
0161 int min;
0162 int max;
0163 int turbo;
0164 int32_t ratio;
0165 };
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180 struct global_params {
0181 bool no_turbo;
0182 bool turbo_disabled;
0183 bool turbo_disabled_mf;
0184 int max_perf_pct;
0185 int min_perf_pct;
0186 };
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227 struct cpudata {
0228 int cpu;
0229
0230 unsigned int policy;
0231 struct update_util_data update_util;
0232 bool update_util_set;
0233
0234 struct pstate_data pstate;
0235 struct vid_data vid;
0236
0237 u64 last_update;
0238 u64 last_sample_time;
0239 u64 aperf_mperf_shift;
0240 u64 prev_aperf;
0241 u64 prev_mperf;
0242 u64 prev_tsc;
0243 u64 prev_cummulative_iowait;
0244 struct sample sample;
0245 int32_t min_perf_ratio;
0246 int32_t max_perf_ratio;
0247 #ifdef CONFIG_ACPI
0248 struct acpi_processor_performance acpi_perf_data;
0249 bool valid_pss_table;
0250 #endif
0251 unsigned int iowait_boost;
0252 s16 epp_powersave;
0253 s16 epp_policy;
0254 s16 epp_default;
0255 s16 epp_cached;
0256 u64 hwp_req_cached;
0257 u64 hwp_cap_cached;
0258 u64 last_io_update;
0259 unsigned int sched_flags;
0260 u32 hwp_boost_min;
0261 bool suspended;
0262 struct delayed_work hwp_notify_work;
0263 };
0264
0265 static struct cpudata **all_cpu_data;
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282 struct pstate_funcs {
0283 int (*get_max)(void);
0284 int (*get_max_physical)(void);
0285 int (*get_min)(void);
0286 int (*get_turbo)(void);
0287 int (*get_scaling)(void);
0288 int (*get_cpu_scaling)(int cpu);
0289 int (*get_aperf_mperf_shift)(void);
0290 u64 (*get_val)(struct cpudata*, int pstate);
0291 void (*get_vid)(struct cpudata *);
0292 };
0293
0294 static struct pstate_funcs pstate_funcs __read_mostly;
0295
0296 static int hwp_active __read_mostly;
0297 static int hwp_mode_bdw __read_mostly;
0298 static bool per_cpu_limits __read_mostly;
0299 static bool hwp_boost __read_mostly;
0300
0301 static struct cpufreq_driver *intel_pstate_driver __read_mostly;
0302
0303 #ifdef CONFIG_ACPI
0304 static bool acpi_ppc;
0305 #endif
0306
0307 static struct global_params global;
0308
0309 static DEFINE_MUTEX(intel_pstate_driver_lock);
0310 static DEFINE_MUTEX(intel_pstate_limits_lock);
0311
0312 #ifdef CONFIG_ACPI
0313
0314 static bool intel_pstate_acpi_pm_profile_server(void)
0315 {
0316 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
0317 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
0318 return true;
0319
0320 return false;
0321 }
0322
0323 static bool intel_pstate_get_ppc_enable_status(void)
0324 {
0325 if (intel_pstate_acpi_pm_profile_server())
0326 return true;
0327
0328 return acpi_ppc;
0329 }
0330
0331 #ifdef CONFIG_ACPI_CPPC_LIB
0332
0333
0334 static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
0335 {
0336 sched_set_itmt_support();
0337 }
0338
0339 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
0340
0341 #define CPPC_MAX_PERF U8_MAX
0342
0343 static void intel_pstate_set_itmt_prio(int cpu)
0344 {
0345 struct cppc_perf_caps cppc_perf;
0346 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
0347 int ret;
0348
0349 ret = cppc_get_perf_caps(cpu, &cppc_perf);
0350 if (ret)
0351 return;
0352
0353
0354
0355
0356
0357
0358 if (cppc_perf.highest_perf == CPPC_MAX_PERF)
0359 cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
0360
0361
0362
0363
0364
0365
0366 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
0367
0368 if (max_highest_perf <= min_highest_perf) {
0369 if (cppc_perf.highest_perf > max_highest_perf)
0370 max_highest_perf = cppc_perf.highest_perf;
0371
0372 if (cppc_perf.highest_perf < min_highest_perf)
0373 min_highest_perf = cppc_perf.highest_perf;
0374
0375 if (max_highest_perf > min_highest_perf) {
0376
0377
0378
0379
0380
0381
0382 schedule_work(&sched_itmt_work);
0383 }
0384 }
0385 }
0386
0387 static int intel_pstate_get_cppc_guaranteed(int cpu)
0388 {
0389 struct cppc_perf_caps cppc_perf;
0390 int ret;
0391
0392 ret = cppc_get_perf_caps(cpu, &cppc_perf);
0393 if (ret)
0394 return ret;
0395
0396 if (cppc_perf.guaranteed_perf)
0397 return cppc_perf.guaranteed_perf;
0398
0399 return cppc_perf.nominal_perf;
0400 }
0401
0402 static u32 intel_pstate_cppc_nominal(int cpu)
0403 {
0404 u64 nominal_perf;
0405
0406 if (cppc_get_nominal_perf(cpu, &nominal_perf))
0407 return 0;
0408
0409 return nominal_perf;
0410 }
0411 #else
0412 static inline void intel_pstate_set_itmt_prio(int cpu)
0413 {
0414 }
0415 #endif
0416
0417 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
0418 {
0419 struct cpudata *cpu;
0420 int ret;
0421 int i;
0422
0423 if (hwp_active) {
0424 intel_pstate_set_itmt_prio(policy->cpu);
0425 return;
0426 }
0427
0428 if (!intel_pstate_get_ppc_enable_status())
0429 return;
0430
0431 cpu = all_cpu_data[policy->cpu];
0432
0433 ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
0434 policy->cpu);
0435 if (ret)
0436 return;
0437
0438
0439
0440
0441
0442
0443 if (cpu->acpi_perf_data.control_register.space_id !=
0444 ACPI_ADR_SPACE_FIXED_HARDWARE)
0445 goto err;
0446
0447
0448
0449
0450
0451 if (cpu->acpi_perf_data.state_count < 2)
0452 goto err;
0453
0454 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
0455 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
0456 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
0457 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
0458 (u32) cpu->acpi_perf_data.states[i].core_frequency,
0459 (u32) cpu->acpi_perf_data.states[i].power,
0460 (u32) cpu->acpi_perf_data.states[i].control);
0461 }
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474 if (!global.turbo_disabled)
0475 cpu->acpi_perf_data.states[0].core_frequency =
0476 policy->cpuinfo.max_freq / 1000;
0477 cpu->valid_pss_table = true;
0478 pr_debug("_PPC limits will be enforced\n");
0479
0480 return;
0481
0482 err:
0483 cpu->valid_pss_table = false;
0484 acpi_processor_unregister_performance(policy->cpu);
0485 }
0486
0487 static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
0488 {
0489 struct cpudata *cpu;
0490
0491 cpu = all_cpu_data[policy->cpu];
0492 if (!cpu->valid_pss_table)
0493 return;
0494
0495 acpi_processor_unregister_performance(policy->cpu);
0496 }
0497 #else
0498 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
0499 {
0500 }
0501
0502 static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
0503 {
0504 }
0505
0506 static inline bool intel_pstate_acpi_pm_profile_server(void)
0507 {
0508 return false;
0509 }
0510 #endif
0511
0512 #ifndef CONFIG_ACPI_CPPC_LIB
0513 static inline int intel_pstate_get_cppc_guaranteed(int cpu)
0514 {
0515 return -ENOTSUPP;
0516 }
0517 #endif
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530 static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
0531 {
0532 int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
0533 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
0534 int perf_ctl_turbo = pstate_funcs.get_turbo();
0535 int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
0536 int scaling = cpu->pstate.scaling;
0537
0538 pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
0539 pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
0540 pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
0541 pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
0542 pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
0543 pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
0544 pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
0545
0546
0547
0548
0549
0550
0551
0552
0553 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling;
0554 if (turbo_freq < cpu->pstate.turbo_freq) {
0555 cpu->pstate.turbo_freq = turbo_freq;
0556 scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
0557 cpu->pstate.scaling = scaling;
0558
0559 pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
0560 cpu->cpu, scaling);
0561 }
0562
0563 cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
0564 perf_ctl_scaling);
0565
0566 cpu->pstate.max_pstate_physical =
0567 DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
0568 scaling);
0569
0570 cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
0571
0572
0573
0574
0575 cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
0576 }
0577
0578 static inline void update_turbo_state(void)
0579 {
0580 u64 misc_en;
0581 struct cpudata *cpu;
0582
0583 cpu = all_cpu_data[0];
0584 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
0585 global.turbo_disabled =
0586 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
0587 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
0588 }
0589
0590 static int min_perf_pct_min(void)
0591 {
0592 struct cpudata *cpu = all_cpu_data[0];
0593 int turbo_pstate = cpu->pstate.turbo_pstate;
0594
0595 return turbo_pstate ?
0596 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
0597 }
0598
0599 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
0600 {
0601 u64 epb;
0602 int ret;
0603
0604 if (!boot_cpu_has(X86_FEATURE_EPB))
0605 return -ENXIO;
0606
0607 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
0608 if (ret)
0609 return (s16)ret;
0610
0611 return (s16)(epb & 0x0f);
0612 }
0613
0614 static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
0615 {
0616 s16 epp;
0617
0618 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
0619
0620
0621
0622
0623 if (!hwp_req_data) {
0624 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
0625 &hwp_req_data);
0626 if (epp)
0627 return epp;
0628 }
0629 epp = (hwp_req_data >> 24) & 0xff;
0630 } else {
0631
0632 epp = intel_pstate_get_epb(cpu_data);
0633 }
0634
0635 return epp;
0636 }
0637
0638 static int intel_pstate_set_epb(int cpu, s16 pref)
0639 {
0640 u64 epb;
0641 int ret;
0642
0643 if (!boot_cpu_has(X86_FEATURE_EPB))
0644 return -ENXIO;
0645
0646 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
0647 if (ret)
0648 return ret;
0649
0650 epb = (epb & ~0x0f) | pref;
0651 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
0652
0653 return 0;
0654 }
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668 enum energy_perf_value_index {
0669 EPP_INDEX_DEFAULT = 0,
0670 EPP_INDEX_PERFORMANCE,
0671 EPP_INDEX_BALANCE_PERFORMANCE,
0672 EPP_INDEX_BALANCE_POWERSAVE,
0673 EPP_INDEX_POWERSAVE,
0674 };
0675
0676 static const char * const energy_perf_strings[] = {
0677 [EPP_INDEX_DEFAULT] = "default",
0678 [EPP_INDEX_PERFORMANCE] = "performance",
0679 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
0680 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
0681 [EPP_INDEX_POWERSAVE] = "power",
0682 NULL
0683 };
0684 static unsigned int epp_values[] = {
0685 [EPP_INDEX_DEFAULT] = 0,
0686 [EPP_INDEX_PERFORMANCE] = HWP_EPP_PERFORMANCE,
0687 [EPP_INDEX_BALANCE_PERFORMANCE] = HWP_EPP_BALANCE_PERFORMANCE,
0688 [EPP_INDEX_BALANCE_POWERSAVE] = HWP_EPP_BALANCE_POWERSAVE,
0689 [EPP_INDEX_POWERSAVE] = HWP_EPP_POWERSAVE,
0690 };
0691
0692 static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp)
0693 {
0694 s16 epp;
0695 int index = -EINVAL;
0696
0697 *raw_epp = 0;
0698 epp = intel_pstate_get_epp(cpu_data, 0);
0699 if (epp < 0)
0700 return epp;
0701
0702 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
0703 if (epp == epp_values[EPP_INDEX_PERFORMANCE])
0704 return EPP_INDEX_PERFORMANCE;
0705 if (epp == epp_values[EPP_INDEX_BALANCE_PERFORMANCE])
0706 return EPP_INDEX_BALANCE_PERFORMANCE;
0707 if (epp == epp_values[EPP_INDEX_BALANCE_POWERSAVE])
0708 return EPP_INDEX_BALANCE_POWERSAVE;
0709 if (epp == epp_values[EPP_INDEX_POWERSAVE])
0710 return EPP_INDEX_POWERSAVE;
0711 *raw_epp = epp;
0712 return 0;
0713 } else if (boot_cpu_has(X86_FEATURE_EPB)) {
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724 index = (epp >> 2) + 1;
0725 }
0726
0727 return index;
0728 }
0729
0730 static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
0731 {
0732 int ret;
0733
0734
0735
0736
0737
0738
0739 u64 value = READ_ONCE(cpu->hwp_req_cached);
0740
0741 value &= ~GENMASK_ULL(31, 24);
0742 value |= (u64)epp << 24;
0743
0744
0745
0746
0747
0748 WRITE_ONCE(cpu->hwp_req_cached, value);
0749 ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
0750 if (!ret)
0751 cpu->epp_cached = epp;
0752
0753 return ret;
0754 }
0755
0756 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
0757 int pref_index, bool use_raw,
0758 u32 raw_epp)
0759 {
0760 int epp = -EINVAL;
0761 int ret;
0762
0763 if (!pref_index)
0764 epp = cpu_data->epp_default;
0765
0766 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
0767 if (use_raw)
0768 epp = raw_epp;
0769 else if (epp == -EINVAL)
0770 epp = epp_values[pref_index];
0771
0772
0773
0774
0775
0776
0777 if (epp > 0 && cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
0778 return -EBUSY;
0779
0780 ret = intel_pstate_set_epp(cpu_data, epp);
0781 } else {
0782 if (epp == -EINVAL)
0783 epp = (pref_index - 1) << 2;
0784 ret = intel_pstate_set_epb(cpu_data->cpu, epp);
0785 }
0786
0787 return ret;
0788 }
0789
0790 static ssize_t show_energy_performance_available_preferences(
0791 struct cpufreq_policy *policy, char *buf)
0792 {
0793 int i = 0;
0794 int ret = 0;
0795
0796 while (energy_perf_strings[i] != NULL)
0797 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
0798
0799 ret += sprintf(&buf[ret], "\n");
0800
0801 return ret;
0802 }
0803
0804 cpufreq_freq_attr_ro(energy_performance_available_preferences);
0805
0806 static struct cpufreq_driver intel_pstate;
0807
0808 static ssize_t store_energy_performance_preference(
0809 struct cpufreq_policy *policy, const char *buf, size_t count)
0810 {
0811 struct cpudata *cpu = all_cpu_data[policy->cpu];
0812 char str_preference[21];
0813 bool raw = false;
0814 ssize_t ret;
0815 u32 epp = 0;
0816
0817 ret = sscanf(buf, "%20s", str_preference);
0818 if (ret != 1)
0819 return -EINVAL;
0820
0821 ret = match_string(energy_perf_strings, -1, str_preference);
0822 if (ret < 0) {
0823 if (!boot_cpu_has(X86_FEATURE_HWP_EPP))
0824 return ret;
0825
0826 ret = kstrtouint(buf, 10, &epp);
0827 if (ret)
0828 return ret;
0829
0830 if (epp > 255)
0831 return -EINVAL;
0832
0833 raw = true;
0834 }
0835
0836
0837
0838
0839
0840
0841 if (!intel_pstate_driver)
0842 return -EAGAIN;
0843
0844 mutex_lock(&intel_pstate_limits_lock);
0845
0846 if (intel_pstate_driver == &intel_pstate) {
0847 ret = intel_pstate_set_energy_pref_index(cpu, ret, raw, epp);
0848 } else {
0849
0850
0851
0852
0853
0854
0855 if (!raw)
0856 epp = ret ? epp_values[ret] : cpu->epp_default;
0857
0858 if (cpu->epp_cached != epp) {
0859 int err;
0860
0861 cpufreq_stop_governor(policy);
0862 ret = intel_pstate_set_epp(cpu, epp);
0863 err = cpufreq_start_governor(policy);
0864 if (!ret)
0865 ret = err;
0866 }
0867 }
0868
0869 mutex_unlock(&intel_pstate_limits_lock);
0870
0871 return ret ?: count;
0872 }
0873
0874 static ssize_t show_energy_performance_preference(
0875 struct cpufreq_policy *policy, char *buf)
0876 {
0877 struct cpudata *cpu_data = all_cpu_data[policy->cpu];
0878 int preference, raw_epp;
0879
0880 preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp);
0881 if (preference < 0)
0882 return preference;
0883
0884 if (raw_epp)
0885 return sprintf(buf, "%d\n", raw_epp);
0886 else
0887 return sprintf(buf, "%s\n", energy_perf_strings[preference]);
0888 }
0889
0890 cpufreq_freq_attr_rw(energy_performance_preference);
0891
0892 static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
0893 {
0894 struct cpudata *cpu = all_cpu_data[policy->cpu];
0895 int ratio, freq;
0896
0897 ratio = intel_pstate_get_cppc_guaranteed(policy->cpu);
0898 if (ratio <= 0) {
0899 u64 cap;
0900
0901 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
0902 ratio = HWP_GUARANTEED_PERF(cap);
0903 }
0904
0905 freq = ratio * cpu->pstate.scaling;
0906 if (cpu->pstate.scaling != cpu->pstate.perf_ctl_scaling)
0907 freq = rounddown(freq, cpu->pstate.perf_ctl_scaling);
0908
0909 return sprintf(buf, "%d\n", freq);
0910 }
0911
0912 cpufreq_freq_attr_ro(base_frequency);
0913
0914 static struct freq_attr *hwp_cpufreq_attrs[] = {
0915 &energy_performance_preference,
0916 &energy_performance_available_preferences,
0917 &base_frequency,
0918 NULL,
0919 };
0920
0921 static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
0922 {
0923 u64 cap;
0924
0925 rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
0926 WRITE_ONCE(cpu->hwp_cap_cached, cap);
0927 cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
0928 cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
0929 }
0930
0931 static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
0932 {
0933 int scaling = cpu->pstate.scaling;
0934
0935 __intel_pstate_get_hwp_cap(cpu);
0936
0937 cpu->pstate.max_freq = cpu->pstate.max_pstate * scaling;
0938 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling;
0939 if (scaling != cpu->pstate.perf_ctl_scaling) {
0940 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
0941
0942 cpu->pstate.max_freq = rounddown(cpu->pstate.max_freq,
0943 perf_ctl_scaling);
0944 cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_freq,
0945 perf_ctl_scaling);
0946 }
0947 }
0948
0949 static void intel_pstate_hwp_set(unsigned int cpu)
0950 {
0951 struct cpudata *cpu_data = all_cpu_data[cpu];
0952 int max, min;
0953 u64 value;
0954 s16 epp;
0955
0956 max = cpu_data->max_perf_ratio;
0957 min = cpu_data->min_perf_ratio;
0958
0959 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
0960 min = max;
0961
0962 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
0963
0964 value &= ~HWP_MIN_PERF(~0L);
0965 value |= HWP_MIN_PERF(min);
0966
0967 value &= ~HWP_MAX_PERF(~0L);
0968 value |= HWP_MAX_PERF(max);
0969
0970 if (cpu_data->epp_policy == cpu_data->policy)
0971 goto skip_epp;
0972
0973 cpu_data->epp_policy = cpu_data->policy;
0974
0975 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
0976 epp = intel_pstate_get_epp(cpu_data, value);
0977 cpu_data->epp_powersave = epp;
0978
0979 if (epp < 0)
0980 goto skip_epp;
0981
0982 epp = 0;
0983 } else {
0984
0985 if (cpu_data->epp_powersave < 0)
0986 goto skip_epp;
0987
0988
0989
0990
0991
0992
0993
0994
0995 epp = intel_pstate_get_epp(cpu_data, value);
0996 if (epp)
0997 goto skip_epp;
0998
0999 epp = cpu_data->epp_powersave;
1000 }
1001 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
1002 value &= ~GENMASK_ULL(31, 24);
1003 value |= (u64)epp << 24;
1004 } else {
1005 intel_pstate_set_epb(cpu, epp);
1006 }
1007 skip_epp:
1008 WRITE_ONCE(cpu_data->hwp_req_cached, value);
1009 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
1010 }
1011
1012 static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata);
1013
1014 static void intel_pstate_hwp_offline(struct cpudata *cpu)
1015 {
1016 u64 value = READ_ONCE(cpu->hwp_req_cached);
1017 int min_perf;
1018
1019 intel_pstate_disable_hwp_interrupt(cpu);
1020
1021 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
1022
1023
1024
1025
1026
1027 value &= ~GENMASK_ULL(31, 24);
1028 value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
1029
1030
1031
1032
1033
1034 cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1035 }
1036
1037
1038
1039
1040
1041
1042 value &= ~HWP_DESIRED_PERF(~0L);
1043 WRITE_ONCE(cpu->hwp_req_cached, value);
1044
1045 value &= ~GENMASK_ULL(31, 0);
1046 min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
1047
1048
1049 value |= HWP_MAX_PERF(min_perf);
1050 value |= HWP_MIN_PERF(min_perf);
1051
1052
1053 if (boot_cpu_has(X86_FEATURE_HWP_EPP))
1054 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
1055
1056 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
1057 }
1058
1059 #define POWER_CTL_EE_ENABLE 1
1060 #define POWER_CTL_EE_DISABLE 2
1061
1062 static int power_ctl_ee_state;
1063
1064 static void set_power_ctl_ee_state(bool input)
1065 {
1066 u64 power_ctl;
1067
1068 mutex_lock(&intel_pstate_driver_lock);
1069 rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
1070 if (input) {
1071 power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
1072 power_ctl_ee_state = POWER_CTL_EE_ENABLE;
1073 } else {
1074 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
1075 power_ctl_ee_state = POWER_CTL_EE_DISABLE;
1076 }
1077 wrmsrl(MSR_IA32_POWER_CTL, power_ctl);
1078 mutex_unlock(&intel_pstate_driver_lock);
1079 }
1080
1081 static void intel_pstate_hwp_enable(struct cpudata *cpudata);
1082
1083 static void intel_pstate_hwp_reenable(struct cpudata *cpu)
1084 {
1085 intel_pstate_hwp_enable(cpu);
1086 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
1087 }
1088
1089 static int intel_pstate_suspend(struct cpufreq_policy *policy)
1090 {
1091 struct cpudata *cpu = all_cpu_data[policy->cpu];
1092
1093 pr_debug("CPU %d suspending\n", cpu->cpu);
1094
1095 cpu->suspended = true;
1096
1097
1098 intel_pstate_disable_hwp_interrupt(cpu);
1099
1100 return 0;
1101 }
1102
1103 static int intel_pstate_resume(struct cpufreq_policy *policy)
1104 {
1105 struct cpudata *cpu = all_cpu_data[policy->cpu];
1106
1107 pr_debug("CPU %d resuming\n", cpu->cpu);
1108
1109
1110 if (power_ctl_ee_state == POWER_CTL_EE_ENABLE)
1111 set_power_ctl_ee_state(true);
1112 else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE)
1113 set_power_ctl_ee_state(false);
1114
1115 if (cpu->suspended && hwp_active) {
1116 mutex_lock(&intel_pstate_limits_lock);
1117
1118
1119 intel_pstate_hwp_reenable(cpu);
1120
1121 mutex_unlock(&intel_pstate_limits_lock);
1122 }
1123
1124 cpu->suspended = false;
1125
1126 return 0;
1127 }
1128
1129 static void intel_pstate_update_policies(void)
1130 {
1131 int cpu;
1132
1133 for_each_possible_cpu(cpu)
1134 cpufreq_update_policy(cpu);
1135 }
1136
1137 static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
1138 struct cpufreq_policy *policy)
1139 {
1140 policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
1141 cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
1142 refresh_frequency_limits(policy);
1143 }
1144
1145 static void intel_pstate_update_max_freq(unsigned int cpu)
1146 {
1147 struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
1148
1149 if (!policy)
1150 return;
1151
1152 __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
1153
1154 cpufreq_cpu_release(policy);
1155 }
1156
1157 static void intel_pstate_update_limits(unsigned int cpu)
1158 {
1159 mutex_lock(&intel_pstate_driver_lock);
1160
1161 update_turbo_state();
1162
1163
1164
1165
1166 if (global.turbo_disabled_mf != global.turbo_disabled) {
1167 global.turbo_disabled_mf = global.turbo_disabled;
1168 arch_set_max_freq_ratio(global.turbo_disabled);
1169 for_each_possible_cpu(cpu)
1170 intel_pstate_update_max_freq(cpu);
1171 } else {
1172 cpufreq_update_policy(cpu);
1173 }
1174
1175 mutex_unlock(&intel_pstate_driver_lock);
1176 }
1177
1178
1179 #define show_one(file_name, object) \
1180 static ssize_t show_##file_name \
1181 (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
1182 { \
1183 return sprintf(buf, "%u\n", global.object); \
1184 }
1185
1186 static ssize_t intel_pstate_show_status(char *buf);
1187 static int intel_pstate_update_status(const char *buf, size_t size);
1188
1189 static ssize_t show_status(struct kobject *kobj,
1190 struct kobj_attribute *attr, char *buf)
1191 {
1192 ssize_t ret;
1193
1194 mutex_lock(&intel_pstate_driver_lock);
1195 ret = intel_pstate_show_status(buf);
1196 mutex_unlock(&intel_pstate_driver_lock);
1197
1198 return ret;
1199 }
1200
1201 static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
1202 const char *buf, size_t count)
1203 {
1204 char *p = memchr(buf, '\n', count);
1205 int ret;
1206
1207 mutex_lock(&intel_pstate_driver_lock);
1208 ret = intel_pstate_update_status(buf, p ? p - buf : count);
1209 mutex_unlock(&intel_pstate_driver_lock);
1210
1211 return ret < 0 ? ret : count;
1212 }
1213
1214 static ssize_t show_turbo_pct(struct kobject *kobj,
1215 struct kobj_attribute *attr, char *buf)
1216 {
1217 struct cpudata *cpu;
1218 int total, no_turbo, turbo_pct;
1219 uint32_t turbo_fp;
1220
1221 mutex_lock(&intel_pstate_driver_lock);
1222
1223 if (!intel_pstate_driver) {
1224 mutex_unlock(&intel_pstate_driver_lock);
1225 return -EAGAIN;
1226 }
1227
1228 cpu = all_cpu_data[0];
1229
1230 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1231 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
1232 turbo_fp = div_fp(no_turbo, total);
1233 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
1234
1235 mutex_unlock(&intel_pstate_driver_lock);
1236
1237 return sprintf(buf, "%u\n", turbo_pct);
1238 }
1239
1240 static ssize_t show_num_pstates(struct kobject *kobj,
1241 struct kobj_attribute *attr, char *buf)
1242 {
1243 struct cpudata *cpu;
1244 int total;
1245
1246 mutex_lock(&intel_pstate_driver_lock);
1247
1248 if (!intel_pstate_driver) {
1249 mutex_unlock(&intel_pstate_driver_lock);
1250 return -EAGAIN;
1251 }
1252
1253 cpu = all_cpu_data[0];
1254 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
1255
1256 mutex_unlock(&intel_pstate_driver_lock);
1257
1258 return sprintf(buf, "%u\n", total);
1259 }
1260
1261 static ssize_t show_no_turbo(struct kobject *kobj,
1262 struct kobj_attribute *attr, char *buf)
1263 {
1264 ssize_t ret;
1265
1266 mutex_lock(&intel_pstate_driver_lock);
1267
1268 if (!intel_pstate_driver) {
1269 mutex_unlock(&intel_pstate_driver_lock);
1270 return -EAGAIN;
1271 }
1272
1273 update_turbo_state();
1274 if (global.turbo_disabled)
1275 ret = sprintf(buf, "%u\n", global.turbo_disabled);
1276 else
1277 ret = sprintf(buf, "%u\n", global.no_turbo);
1278
1279 mutex_unlock(&intel_pstate_driver_lock);
1280
1281 return ret;
1282 }
1283
1284 static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
1285 const char *buf, size_t count)
1286 {
1287 unsigned int input;
1288 int ret;
1289
1290 ret = sscanf(buf, "%u", &input);
1291 if (ret != 1)
1292 return -EINVAL;
1293
1294 mutex_lock(&intel_pstate_driver_lock);
1295
1296 if (!intel_pstate_driver) {
1297 mutex_unlock(&intel_pstate_driver_lock);
1298 return -EAGAIN;
1299 }
1300
1301 mutex_lock(&intel_pstate_limits_lock);
1302
1303 update_turbo_state();
1304 if (global.turbo_disabled) {
1305 pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n");
1306 mutex_unlock(&intel_pstate_limits_lock);
1307 mutex_unlock(&intel_pstate_driver_lock);
1308 return -EPERM;
1309 }
1310
1311 global.no_turbo = clamp_t(int, input, 0, 1);
1312
1313 if (global.no_turbo) {
1314 struct cpudata *cpu = all_cpu_data[0];
1315 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1316
1317
1318 if (global.min_perf_pct > pct)
1319 global.min_perf_pct = pct;
1320 }
1321
1322 mutex_unlock(&intel_pstate_limits_lock);
1323
1324 intel_pstate_update_policies();
1325 arch_set_max_freq_ratio(global.no_turbo);
1326
1327 mutex_unlock(&intel_pstate_driver_lock);
1328
1329 return count;
1330 }
1331
1332 static void update_qos_request(enum freq_qos_req_type type)
1333 {
1334 struct freq_qos_request *req;
1335 struct cpufreq_policy *policy;
1336 int i;
1337
1338 for_each_possible_cpu(i) {
1339 struct cpudata *cpu = all_cpu_data[i];
1340 unsigned int freq, perf_pct;
1341
1342 policy = cpufreq_cpu_get(i);
1343 if (!policy)
1344 continue;
1345
1346 req = policy->driver_data;
1347 cpufreq_cpu_put(policy);
1348
1349 if (!req)
1350 continue;
1351
1352 if (hwp_active)
1353 intel_pstate_get_hwp_cap(cpu);
1354
1355 if (type == FREQ_QOS_MIN) {
1356 perf_pct = global.min_perf_pct;
1357 } else {
1358 req++;
1359 perf_pct = global.max_perf_pct;
1360 }
1361
1362 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100);
1363
1364 if (freq_qos_update_request(req, freq) < 0)
1365 pr_warn("Failed to update freq constraint: CPU%d\n", i);
1366 }
1367 }
1368
1369 static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
1370 const char *buf, size_t count)
1371 {
1372 unsigned int input;
1373 int ret;
1374
1375 ret = sscanf(buf, "%u", &input);
1376 if (ret != 1)
1377 return -EINVAL;
1378
1379 mutex_lock(&intel_pstate_driver_lock);
1380
1381 if (!intel_pstate_driver) {
1382 mutex_unlock(&intel_pstate_driver_lock);
1383 return -EAGAIN;
1384 }
1385
1386 mutex_lock(&intel_pstate_limits_lock);
1387
1388 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1389
1390 mutex_unlock(&intel_pstate_limits_lock);
1391
1392 if (intel_pstate_driver == &intel_pstate)
1393 intel_pstate_update_policies();
1394 else
1395 update_qos_request(FREQ_QOS_MAX);
1396
1397 mutex_unlock(&intel_pstate_driver_lock);
1398
1399 return count;
1400 }
1401
1402 static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
1403 const char *buf, size_t count)
1404 {
1405 unsigned int input;
1406 int ret;
1407
1408 ret = sscanf(buf, "%u", &input);
1409 if (ret != 1)
1410 return -EINVAL;
1411
1412 mutex_lock(&intel_pstate_driver_lock);
1413
1414 if (!intel_pstate_driver) {
1415 mutex_unlock(&intel_pstate_driver_lock);
1416 return -EAGAIN;
1417 }
1418
1419 mutex_lock(&intel_pstate_limits_lock);
1420
1421 global.min_perf_pct = clamp_t(int, input,
1422 min_perf_pct_min(), global.max_perf_pct);
1423
1424 mutex_unlock(&intel_pstate_limits_lock);
1425
1426 if (intel_pstate_driver == &intel_pstate)
1427 intel_pstate_update_policies();
1428 else
1429 update_qos_request(FREQ_QOS_MIN);
1430
1431 mutex_unlock(&intel_pstate_driver_lock);
1432
1433 return count;
1434 }
1435
1436 static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1437 struct kobj_attribute *attr, char *buf)
1438 {
1439 return sprintf(buf, "%u\n", hwp_boost);
1440 }
1441
1442 static ssize_t store_hwp_dynamic_boost(struct kobject *a,
1443 struct kobj_attribute *b,
1444 const char *buf, size_t count)
1445 {
1446 unsigned int input;
1447 int ret;
1448
1449 ret = kstrtouint(buf, 10, &input);
1450 if (ret)
1451 return ret;
1452
1453 mutex_lock(&intel_pstate_driver_lock);
1454 hwp_boost = !!input;
1455 intel_pstate_update_policies();
1456 mutex_unlock(&intel_pstate_driver_lock);
1457
1458 return count;
1459 }
1460
1461 static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribute *attr,
1462 char *buf)
1463 {
1464 u64 power_ctl;
1465 int enable;
1466
1467 rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
1468 enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE));
1469 return sprintf(buf, "%d\n", !enable);
1470 }
1471
1472 static ssize_t store_energy_efficiency(struct kobject *a, struct kobj_attribute *b,
1473 const char *buf, size_t count)
1474 {
1475 bool input;
1476 int ret;
1477
1478 ret = kstrtobool(buf, &input);
1479 if (ret)
1480 return ret;
1481
1482 set_power_ctl_ee_state(input);
1483
1484 return count;
1485 }
1486
1487 show_one(max_perf_pct, max_perf_pct);
1488 show_one(min_perf_pct, min_perf_pct);
1489
1490 define_one_global_rw(status);
1491 define_one_global_rw(no_turbo);
1492 define_one_global_rw(max_perf_pct);
1493 define_one_global_rw(min_perf_pct);
1494 define_one_global_ro(turbo_pct);
1495 define_one_global_ro(num_pstates);
1496 define_one_global_rw(hwp_dynamic_boost);
1497 define_one_global_rw(energy_efficiency);
1498
1499 static struct attribute *intel_pstate_attributes[] = {
1500 &status.attr,
1501 &no_turbo.attr,
1502 NULL
1503 };
1504
1505 static const struct attribute_group intel_pstate_attr_group = {
1506 .attrs = intel_pstate_attributes,
1507 };
1508
1509 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[];
1510
1511 static struct kobject *intel_pstate_kobject;
1512
1513 static void __init intel_pstate_sysfs_expose_params(void)
1514 {
1515 int rc;
1516
1517 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
1518 &cpu_subsys.dev_root->kobj);
1519 if (WARN_ON(!intel_pstate_kobject))
1520 return;
1521
1522 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
1523 if (WARN_ON(rc))
1524 return;
1525
1526 if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
1527 rc = sysfs_create_file(intel_pstate_kobject, &turbo_pct.attr);
1528 WARN_ON(rc);
1529
1530 rc = sysfs_create_file(intel_pstate_kobject, &num_pstates.attr);
1531 WARN_ON(rc);
1532 }
1533
1534
1535
1536
1537
1538 if (per_cpu_limits)
1539 return;
1540
1541 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
1542 WARN_ON(rc);
1543
1544 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
1545 WARN_ON(rc);
1546
1547 if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) {
1548 rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr);
1549 WARN_ON(rc);
1550 }
1551 }
1552
1553 static void __init intel_pstate_sysfs_remove(void)
1554 {
1555 if (!intel_pstate_kobject)
1556 return;
1557
1558 sysfs_remove_group(intel_pstate_kobject, &intel_pstate_attr_group);
1559
1560 if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
1561 sysfs_remove_file(intel_pstate_kobject, &num_pstates.attr);
1562 sysfs_remove_file(intel_pstate_kobject, &turbo_pct.attr);
1563 }
1564
1565 if (!per_cpu_limits) {
1566 sysfs_remove_file(intel_pstate_kobject, &max_perf_pct.attr);
1567 sysfs_remove_file(intel_pstate_kobject, &min_perf_pct.attr);
1568
1569 if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids))
1570 sysfs_remove_file(intel_pstate_kobject, &energy_efficiency.attr);
1571 }
1572
1573 kobject_put(intel_pstate_kobject);
1574 }
1575
1576 static void intel_pstate_sysfs_expose_hwp_dynamic_boost(void)
1577 {
1578 int rc;
1579
1580 if (!hwp_active)
1581 return;
1582
1583 rc = sysfs_create_file(intel_pstate_kobject, &hwp_dynamic_boost.attr);
1584 WARN_ON_ONCE(rc);
1585 }
1586
1587 static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
1588 {
1589 if (!hwp_active)
1590 return;
1591
1592 sysfs_remove_file(intel_pstate_kobject, &hwp_dynamic_boost.attr);
1593 }
1594
1595
1596
1597 static void intel_pstate_notify_work(struct work_struct *work)
1598 {
1599 struct cpudata *cpudata =
1600 container_of(to_delayed_work(work), struct cpudata, hwp_notify_work);
1601 struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
1602
1603 if (policy) {
1604 intel_pstate_get_hwp_cap(cpudata);
1605 __intel_pstate_update_max_freq(cpudata, policy);
1606
1607 cpufreq_cpu_release(policy);
1608 }
1609
1610 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
1611 }
1612
1613 static DEFINE_SPINLOCK(hwp_notify_lock);
1614 static cpumask_t hwp_intr_enable_mask;
1615
1616 void notify_hwp_interrupt(void)
1617 {
1618 unsigned int this_cpu = smp_processor_id();
1619 struct cpudata *cpudata;
1620 unsigned long flags;
1621 u64 value;
1622
1623 if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1624 return;
1625
1626 rdmsrl_safe(MSR_HWP_STATUS, &value);
1627 if (!(value & 0x01))
1628 return;
1629
1630 spin_lock_irqsave(&hwp_notify_lock, flags);
1631
1632 if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
1633 goto ack_intr;
1634
1635
1636
1637
1638
1639
1640 if (unlikely(!READ_ONCE(all_cpu_data)))
1641 goto ack_intr;
1642
1643
1644
1645
1646
1647
1648 cpudata = READ_ONCE(all_cpu_data[this_cpu]);
1649 if (unlikely(!cpudata))
1650 goto ack_intr;
1651
1652 schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10));
1653
1654 spin_unlock_irqrestore(&hwp_notify_lock, flags);
1655
1656 return;
1657
1658 ack_intr:
1659 wrmsrl_safe(MSR_HWP_STATUS, 0);
1660 spin_unlock_irqrestore(&hwp_notify_lock, flags);
1661 }
1662
1663 static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
1664 {
1665 unsigned long flags;
1666
1667 if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1668 return;
1669
1670
1671 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1672
1673 spin_lock_irqsave(&hwp_notify_lock, flags);
1674 if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask))
1675 cancel_delayed_work(&cpudata->hwp_notify_work);
1676 spin_unlock_irqrestore(&hwp_notify_lock, flags);
1677 }
1678
1679 static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
1680 {
1681
1682 if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
1683 unsigned long flags;
1684
1685 spin_lock_irqsave(&hwp_notify_lock, flags);
1686 INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
1687 cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
1688 spin_unlock_irqrestore(&hwp_notify_lock, flags);
1689
1690
1691 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
1692 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
1693 }
1694 }
1695
1696 static void intel_pstate_update_epp_defaults(struct cpudata *cpudata)
1697 {
1698 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
1699
1700
1701
1702
1703
1704 if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE)
1705 return;
1706
1707
1708
1709
1710
1711
1712
1713 if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE &&
1714 cpudata->epp_default > HWP_EPP_PERFORMANCE) {
1715 epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default;
1716 return;
1717 }
1718
1719
1720
1721
1722
1723 cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
1724 intel_pstate_set_epp(cpudata, cpudata->epp_default);
1725 }
1726
1727 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
1728 {
1729
1730 if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
1731 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1732
1733 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
1734
1735 intel_pstate_enable_hwp_interrupt(cpudata);
1736
1737 if (cpudata->epp_default >= 0)
1738 return;
1739
1740 intel_pstate_update_epp_defaults(cpudata);
1741 }
1742
1743 static int atom_get_min_pstate(void)
1744 {
1745 u64 value;
1746
1747 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1748 return (value >> 8) & 0x7F;
1749 }
1750
1751 static int atom_get_max_pstate(void)
1752 {
1753 u64 value;
1754
1755 rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1756 return (value >> 16) & 0x7F;
1757 }
1758
1759 static int atom_get_turbo_pstate(void)
1760 {
1761 u64 value;
1762
1763 rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
1764 return value & 0x7F;
1765 }
1766
1767 static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1768 {
1769 u64 val;
1770 int32_t vid_fp;
1771 u32 vid;
1772
1773 val = (u64)pstate << 8;
1774 if (global.no_turbo && !global.turbo_disabled)
1775 val |= (u64)1 << 32;
1776
1777 vid_fp = cpudata->vid.min + mul_fp(
1778 int_tofp(pstate - cpudata->pstate.min_pstate),
1779 cpudata->vid.ratio);
1780
1781 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
1782 vid = ceiling_fp(vid_fp);
1783
1784 if (pstate > cpudata->pstate.max_pstate)
1785 vid = cpudata->vid.turbo;
1786
1787 return val | vid;
1788 }
1789
1790 static int silvermont_get_scaling(void)
1791 {
1792 u64 value;
1793 int i;
1794
1795 static int silvermont_freq_table[] = {
1796 83300, 100000, 133300, 116700, 80000};
1797
1798 rdmsrl(MSR_FSB_FREQ, value);
1799 i = value & 0x7;
1800 WARN_ON(i > 4);
1801
1802 return silvermont_freq_table[i];
1803 }
1804
1805 static int airmont_get_scaling(void)
1806 {
1807 u64 value;
1808 int i;
1809
1810 static int airmont_freq_table[] = {
1811 83300, 100000, 133300, 116700, 80000,
1812 93300, 90000, 88900, 87500};
1813
1814 rdmsrl(MSR_FSB_FREQ, value);
1815 i = value & 0xF;
1816 WARN_ON(i > 8);
1817
1818 return airmont_freq_table[i];
1819 }
1820
1821 static void atom_get_vid(struct cpudata *cpudata)
1822 {
1823 u64 value;
1824
1825 rdmsrl(MSR_ATOM_CORE_VIDS, value);
1826 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
1827 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
1828 cpudata->vid.ratio = div_fp(
1829 cpudata->vid.max - cpudata->vid.min,
1830 int_tofp(cpudata->pstate.max_pstate -
1831 cpudata->pstate.min_pstate));
1832
1833 rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
1834 cpudata->vid.turbo = value & 0x7f;
1835 }
1836
1837 static int core_get_min_pstate(void)
1838 {
1839 u64 value;
1840
1841 rdmsrl(MSR_PLATFORM_INFO, value);
1842 return (value >> 40) & 0xFF;
1843 }
1844
1845 static int core_get_max_pstate_physical(void)
1846 {
1847 u64 value;
1848
1849 rdmsrl(MSR_PLATFORM_INFO, value);
1850 return (value >> 8) & 0xFF;
1851 }
1852
1853 static int core_get_tdp_ratio(u64 plat_info)
1854 {
1855
1856 if (plat_info & 0x600000000) {
1857 u64 tdp_ctrl;
1858 u64 tdp_ratio;
1859 int tdp_msr;
1860 int err;
1861
1862
1863 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
1864 if (err)
1865 return err;
1866
1867
1868 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
1869 err = rdmsrl_safe(tdp_msr, &tdp_ratio);
1870 if (err)
1871 return err;
1872
1873
1874 if (tdp_ctrl & 0x03)
1875 tdp_ratio >>= 16;
1876
1877 tdp_ratio &= 0xff;
1878 pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
1879
1880 return (int)tdp_ratio;
1881 }
1882
1883 return -ENXIO;
1884 }
1885
1886 static int core_get_max_pstate(void)
1887 {
1888 u64 tar;
1889 u64 plat_info;
1890 int max_pstate;
1891 int tdp_ratio;
1892 int err;
1893
1894 rdmsrl(MSR_PLATFORM_INFO, plat_info);
1895 max_pstate = (plat_info >> 8) & 0xFF;
1896
1897 tdp_ratio = core_get_tdp_ratio(plat_info);
1898 if (tdp_ratio <= 0)
1899 return max_pstate;
1900
1901 if (hwp_active) {
1902
1903 return tdp_ratio;
1904 }
1905
1906 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
1907 if (!err) {
1908 int tar_levels;
1909
1910
1911 tar_levels = tar & 0xff;
1912 if (tdp_ratio - 1 == tar_levels) {
1913 max_pstate = tar_levels;
1914 pr_debug("max_pstate=TAC %x\n", max_pstate);
1915 }
1916 }
1917
1918 return max_pstate;
1919 }
1920
1921 static int core_get_turbo_pstate(void)
1922 {
1923 u64 value;
1924 int nont, ret;
1925
1926 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1927 nont = core_get_max_pstate();
1928 ret = (value) & 255;
1929 if (ret <= nont)
1930 ret = nont;
1931 return ret;
1932 }
1933
1934 static inline int core_get_scaling(void)
1935 {
1936 return 100000;
1937 }
1938
1939 static u64 core_get_val(struct cpudata *cpudata, int pstate)
1940 {
1941 u64 val;
1942
1943 val = (u64)pstate << 8;
1944 if (global.no_turbo && !global.turbo_disabled)
1945 val |= (u64)1 << 32;
1946
1947 return val;
1948 }
1949
1950 static int knl_get_aperf_mperf_shift(void)
1951 {
1952 return 10;
1953 }
1954
1955 static int knl_get_turbo_pstate(void)
1956 {
1957 u64 value;
1958 int nont, ret;
1959
1960 rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1961 nont = core_get_max_pstate();
1962 ret = (((value) >> 8) & 0xFF);
1963 if (ret <= nont)
1964 ret = nont;
1965 return ret;
1966 }
1967
1968 #ifdef CONFIG_ACPI_CPPC_LIB
1969 static u32 hybrid_ref_perf;
1970
1971 static int hybrid_get_cpu_scaling(int cpu)
1972 {
1973 return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
1974 intel_pstate_cppc_nominal(cpu));
1975 }
1976
1977 static void intel_pstate_cppc_set_cpu_scaling(void)
1978 {
1979 u32 min_nominal_perf = U32_MAX;
1980 int cpu;
1981
1982 for_each_present_cpu(cpu) {
1983 u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
1984
1985 if (nominal_perf && nominal_perf < min_nominal_perf)
1986 min_nominal_perf = nominal_perf;
1987 }
1988
1989 if (min_nominal_perf < U32_MAX) {
1990 hybrid_ref_perf = min_nominal_perf;
1991 pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
1992 }
1993 }
1994 #else
1995 static inline void intel_pstate_cppc_set_cpu_scaling(void)
1996 {
1997 }
1998 #endif
1999
2000 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
2001 {
2002 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
2003 cpu->pstate.current_pstate = pstate;
2004
2005
2006
2007
2008
2009 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
2010 pstate_funcs.get_val(cpu, pstate));
2011 }
2012
2013 static void intel_pstate_set_min_pstate(struct cpudata *cpu)
2014 {
2015 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
2016 }
2017
2018 static void intel_pstate_max_within_limits(struct cpudata *cpu)
2019 {
2020 int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
2021
2022 update_turbo_state();
2023 intel_pstate_set_pstate(cpu, pstate);
2024 }
2025
2026 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
2027 {
2028 int perf_ctl_max_phys = pstate_funcs.get_max_physical();
2029 int perf_ctl_scaling = pstate_funcs.get_scaling();
2030
2031 cpu->pstate.min_pstate = pstate_funcs.get_min();
2032 cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
2033 cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
2034
2035 if (hwp_active && !hwp_mode_bdw) {
2036 __intel_pstate_get_hwp_cap(cpu);
2037
2038 if (pstate_funcs.get_cpu_scaling) {
2039 cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
2040 if (cpu->pstate.scaling != perf_ctl_scaling)
2041 intel_pstate_hybrid_hwp_adjust(cpu);
2042 } else {
2043 cpu->pstate.scaling = perf_ctl_scaling;
2044 }
2045 } else {
2046 cpu->pstate.scaling = perf_ctl_scaling;
2047 cpu->pstate.max_pstate = pstate_funcs.get_max();
2048 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
2049 }
2050
2051 if (cpu->pstate.scaling == perf_ctl_scaling) {
2052 cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
2053 cpu->pstate.max_freq = cpu->pstate.max_pstate * perf_ctl_scaling;
2054 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * perf_ctl_scaling;
2055 }
2056
2057 if (pstate_funcs.get_aperf_mperf_shift)
2058 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
2059
2060 if (pstate_funcs.get_vid)
2061 pstate_funcs.get_vid(cpu);
2062
2063 intel_pstate_set_min_pstate(cpu);
2064 }
2065
2066
2067
2068
2069
2070
2071
2072 static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
2073
2074 static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
2075 {
2076 u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
2077 u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
2078 u32 max_limit = (hwp_req & 0xff00) >> 8;
2079 u32 min_limit = (hwp_req & 0xff);
2080 u32 boost_level1;
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097 if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
2098 return;
2099
2100 if (!cpu->hwp_boost_min)
2101 cpu->hwp_boost_min = min_limit;
2102
2103
2104 boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1;
2105
2106 if (cpu->hwp_boost_min < boost_level1)
2107 cpu->hwp_boost_min = boost_level1;
2108 else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(hwp_cap))
2109 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(hwp_cap);
2110 else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(hwp_cap) &&
2111 max_limit != HWP_GUARANTEED_PERF(hwp_cap))
2112 cpu->hwp_boost_min = max_limit;
2113 else
2114 return;
2115
2116 hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
2117 wrmsrl(MSR_HWP_REQUEST, hwp_req);
2118 cpu->last_update = cpu->sample.time;
2119 }
2120
2121 static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
2122 {
2123 if (cpu->hwp_boost_min) {
2124 bool expired;
2125
2126
2127 expired = time_after64(cpu->sample.time, cpu->last_update +
2128 hwp_boost_hold_time_ns);
2129 if (expired) {
2130 wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
2131 cpu->hwp_boost_min = 0;
2132 }
2133 }
2134 cpu->last_update = cpu->sample.time;
2135 }
2136
2137 static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
2138 u64 time)
2139 {
2140 cpu->sample.time = time;
2141
2142 if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
2143 bool do_io = false;
2144
2145 cpu->sched_flags = 0;
2146
2147
2148
2149
2150
2151
2152
2153 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
2154 do_io = true;
2155
2156 cpu->last_io_update = time;
2157
2158 if (do_io)
2159 intel_pstate_hwp_boost_up(cpu);
2160
2161 } else {
2162 intel_pstate_hwp_boost_down(cpu);
2163 }
2164 }
2165
2166 static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
2167 u64 time, unsigned int flags)
2168 {
2169 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
2170
2171 cpu->sched_flags |= flags;
2172
2173 if (smp_processor_id() == cpu->cpu)
2174 intel_pstate_update_util_hwp_local(cpu, time);
2175 }
2176
2177 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
2178 {
2179 struct sample *sample = &cpu->sample;
2180
2181 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
2182 }
2183
2184 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
2185 {
2186 u64 aperf, mperf;
2187 unsigned long flags;
2188 u64 tsc;
2189
2190 local_irq_save(flags);
2191 rdmsrl(MSR_IA32_APERF, aperf);
2192 rdmsrl(MSR_IA32_MPERF, mperf);
2193 tsc = rdtsc();
2194 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
2195 local_irq_restore(flags);
2196 return false;
2197 }
2198 local_irq_restore(flags);
2199
2200 cpu->last_sample_time = cpu->sample.time;
2201 cpu->sample.time = time;
2202 cpu->sample.aperf = aperf;
2203 cpu->sample.mperf = mperf;
2204 cpu->sample.tsc = tsc;
2205 cpu->sample.aperf -= cpu->prev_aperf;
2206 cpu->sample.mperf -= cpu->prev_mperf;
2207 cpu->sample.tsc -= cpu->prev_tsc;
2208
2209 cpu->prev_aperf = aperf;
2210 cpu->prev_mperf = mperf;
2211 cpu->prev_tsc = tsc;
2212
2213
2214
2215
2216
2217
2218
2219 if (cpu->last_sample_time) {
2220 intel_pstate_calc_avg_perf(cpu);
2221 return true;
2222 }
2223 return false;
2224 }
2225
2226 static inline int32_t get_avg_frequency(struct cpudata *cpu)
2227 {
2228 return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz);
2229 }
2230
2231 static inline int32_t get_avg_pstate(struct cpudata *cpu)
2232 {
2233 return mul_ext_fp(cpu->pstate.max_pstate_physical,
2234 cpu->sample.core_avg_perf);
2235 }
2236
2237 static inline int32_t get_target_pstate(struct cpudata *cpu)
2238 {
2239 struct sample *sample = &cpu->sample;
2240 int32_t busy_frac;
2241 int target, avg_pstate;
2242
2243 busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
2244 sample->tsc);
2245
2246 if (busy_frac < cpu->iowait_boost)
2247 busy_frac = cpu->iowait_boost;
2248
2249 sample->busy_scaled = busy_frac * 100;
2250
2251 target = global.no_turbo || global.turbo_disabled ?
2252 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2253 target += target >> 2;
2254 target = mul_fp(target, busy_frac);
2255 if (target < cpu->pstate.min_pstate)
2256 target = cpu->pstate.min_pstate;
2257
2258
2259
2260
2261
2262
2263
2264
2265 avg_pstate = get_avg_pstate(cpu);
2266 if (avg_pstate > target)
2267 target += (avg_pstate - target) >> 1;
2268
2269 return target;
2270 }
2271
2272 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
2273 {
2274 int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
2275 int max_pstate = max(min_pstate, cpu->max_perf_ratio);
2276
2277 return clamp_t(int, pstate, min_pstate, max_pstate);
2278 }
2279
2280 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
2281 {
2282 if (pstate == cpu->pstate.current_pstate)
2283 return;
2284
2285 cpu->pstate.current_pstate = pstate;
2286 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
2287 }
2288
2289 static void intel_pstate_adjust_pstate(struct cpudata *cpu)
2290 {
2291 int from = cpu->pstate.current_pstate;
2292 struct sample *sample;
2293 int target_pstate;
2294
2295 update_turbo_state();
2296
2297 target_pstate = get_target_pstate(cpu);
2298 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2299 trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
2300 intel_pstate_update_pstate(cpu, target_pstate);
2301
2302 sample = &cpu->sample;
2303 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
2304 fp_toint(sample->busy_scaled),
2305 from,
2306 cpu->pstate.current_pstate,
2307 sample->mperf,
2308 sample->aperf,
2309 sample->tsc,
2310 get_avg_frequency(cpu),
2311 fp_toint(cpu->iowait_boost * 100));
2312 }
2313
2314 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
2315 unsigned int flags)
2316 {
2317 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
2318 u64 delta_ns;
2319
2320
2321 if (smp_processor_id() != cpu->cpu)
2322 return;
2323
2324 delta_ns = time - cpu->last_update;
2325 if (flags & SCHED_CPUFREQ_IOWAIT) {
2326
2327 if (delta_ns > TICK_NSEC) {
2328 cpu->iowait_boost = ONE_EIGHTH_FP;
2329 } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) {
2330 cpu->iowait_boost <<= 1;
2331 if (cpu->iowait_boost > int_tofp(1))
2332 cpu->iowait_boost = int_tofp(1);
2333 } else {
2334 cpu->iowait_boost = ONE_EIGHTH_FP;
2335 }
2336 } else if (cpu->iowait_boost) {
2337
2338 if (delta_ns > TICK_NSEC)
2339 cpu->iowait_boost = 0;
2340 else
2341 cpu->iowait_boost >>= 1;
2342 }
2343 cpu->last_update = time;
2344 delta_ns = time - cpu->sample.time;
2345 if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
2346 return;
2347
2348 if (intel_pstate_sample(cpu, time))
2349 intel_pstate_adjust_pstate(cpu);
2350 }
2351
2352 static struct pstate_funcs core_funcs = {
2353 .get_max = core_get_max_pstate,
2354 .get_max_physical = core_get_max_pstate_physical,
2355 .get_min = core_get_min_pstate,
2356 .get_turbo = core_get_turbo_pstate,
2357 .get_scaling = core_get_scaling,
2358 .get_val = core_get_val,
2359 };
2360
2361 static const struct pstate_funcs silvermont_funcs = {
2362 .get_max = atom_get_max_pstate,
2363 .get_max_physical = atom_get_max_pstate,
2364 .get_min = atom_get_min_pstate,
2365 .get_turbo = atom_get_turbo_pstate,
2366 .get_val = atom_get_val,
2367 .get_scaling = silvermont_get_scaling,
2368 .get_vid = atom_get_vid,
2369 };
2370
2371 static const struct pstate_funcs airmont_funcs = {
2372 .get_max = atom_get_max_pstate,
2373 .get_max_physical = atom_get_max_pstate,
2374 .get_min = atom_get_min_pstate,
2375 .get_turbo = atom_get_turbo_pstate,
2376 .get_val = atom_get_val,
2377 .get_scaling = airmont_get_scaling,
2378 .get_vid = atom_get_vid,
2379 };
2380
2381 static const struct pstate_funcs knl_funcs = {
2382 .get_max = core_get_max_pstate,
2383 .get_max_physical = core_get_max_pstate_physical,
2384 .get_min = core_get_min_pstate,
2385 .get_turbo = knl_get_turbo_pstate,
2386 .get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
2387 .get_scaling = core_get_scaling,
2388 .get_val = core_get_val,
2389 };
2390
2391 #define X86_MATCH(model, policy) \
2392 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
2393 X86_FEATURE_APERFMPERF, &policy)
2394
2395 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
2396 X86_MATCH(SANDYBRIDGE, core_funcs),
2397 X86_MATCH(SANDYBRIDGE_X, core_funcs),
2398 X86_MATCH(ATOM_SILVERMONT, silvermont_funcs),
2399 X86_MATCH(IVYBRIDGE, core_funcs),
2400 X86_MATCH(HASWELL, core_funcs),
2401 X86_MATCH(BROADWELL, core_funcs),
2402 X86_MATCH(IVYBRIDGE_X, core_funcs),
2403 X86_MATCH(HASWELL_X, core_funcs),
2404 X86_MATCH(HASWELL_L, core_funcs),
2405 X86_MATCH(HASWELL_G, core_funcs),
2406 X86_MATCH(BROADWELL_G, core_funcs),
2407 X86_MATCH(ATOM_AIRMONT, airmont_funcs),
2408 X86_MATCH(SKYLAKE_L, core_funcs),
2409 X86_MATCH(BROADWELL_X, core_funcs),
2410 X86_MATCH(SKYLAKE, core_funcs),
2411 X86_MATCH(BROADWELL_D, core_funcs),
2412 X86_MATCH(XEON_PHI_KNL, knl_funcs),
2413 X86_MATCH(XEON_PHI_KNM, knl_funcs),
2414 X86_MATCH(ATOM_GOLDMONT, core_funcs),
2415 X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs),
2416 X86_MATCH(SKYLAKE_X, core_funcs),
2417 X86_MATCH(COMETLAKE, core_funcs),
2418 X86_MATCH(ICELAKE_X, core_funcs),
2419 {}
2420 };
2421 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
2422
2423 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
2424 X86_MATCH(BROADWELL_D, core_funcs),
2425 X86_MATCH(BROADWELL_X, core_funcs),
2426 X86_MATCH(SKYLAKE_X, core_funcs),
2427 X86_MATCH(ICELAKE_X, core_funcs),
2428 X86_MATCH(SAPPHIRERAPIDS_X, core_funcs),
2429 {}
2430 };
2431
2432 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
2433 X86_MATCH(KABYLAKE, core_funcs),
2434 {}
2435 };
2436
2437 static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
2438 X86_MATCH(SKYLAKE_X, core_funcs),
2439 X86_MATCH(SKYLAKE, core_funcs),
2440 {}
2441 };
2442
2443 static int intel_pstate_init_cpu(unsigned int cpunum)
2444 {
2445 struct cpudata *cpu;
2446
2447 cpu = all_cpu_data[cpunum];
2448
2449 if (!cpu) {
2450 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
2451 if (!cpu)
2452 return -ENOMEM;
2453
2454 WRITE_ONCE(all_cpu_data[cpunum], cpu);
2455
2456 cpu->cpu = cpunum;
2457
2458 cpu->epp_default = -EINVAL;
2459
2460 if (hwp_active) {
2461 const struct x86_cpu_id *id;
2462
2463 intel_pstate_hwp_enable(cpu);
2464
2465 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
2466 if (id && intel_pstate_acpi_pm_profile_server())
2467 hwp_boost = true;
2468 }
2469 } else if (hwp_active) {
2470
2471
2472
2473
2474
2475 intel_pstate_hwp_reenable(cpu);
2476 }
2477
2478 cpu->epp_powersave = -EINVAL;
2479 cpu->epp_policy = 0;
2480
2481 intel_pstate_get_cpu_pstates(cpu);
2482
2483 pr_debug("controlling: cpu %d\n", cpunum);
2484
2485 return 0;
2486 }
2487
2488 static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
2489 {
2490 struct cpudata *cpu = all_cpu_data[cpu_num];
2491
2492 if (hwp_active && !hwp_boost)
2493 return;
2494
2495 if (cpu->update_util_set)
2496 return;
2497
2498
2499 cpu->sample.time = 0;
2500 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
2501 (hwp_active ?
2502 intel_pstate_update_util_hwp :
2503 intel_pstate_update_util));
2504 cpu->update_util_set = true;
2505 }
2506
2507 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
2508 {
2509 struct cpudata *cpu_data = all_cpu_data[cpu];
2510
2511 if (!cpu_data->update_util_set)
2512 return;
2513
2514 cpufreq_remove_update_util_hook(cpu);
2515 cpu_data->update_util_set = false;
2516 synchronize_rcu();
2517 }
2518
2519 static int intel_pstate_get_max_freq(struct cpudata *cpu)
2520 {
2521 return global.turbo_disabled || global.no_turbo ?
2522 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2523 }
2524
2525 static void intel_pstate_update_perf_limits(struct cpudata *cpu,
2526 unsigned int policy_min,
2527 unsigned int policy_max)
2528 {
2529 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
2530 int32_t max_policy_perf, min_policy_perf;
2531
2532 max_policy_perf = policy_max / perf_ctl_scaling;
2533 if (policy_max == policy_min) {
2534 min_policy_perf = max_policy_perf;
2535 } else {
2536 min_policy_perf = policy_min / perf_ctl_scaling;
2537 min_policy_perf = clamp_t(int32_t, min_policy_perf,
2538 0, max_policy_perf);
2539 }
2540
2541
2542
2543
2544
2545 if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
2546 int scaling = cpu->pstate.scaling;
2547 int freq;
2548
2549 freq = max_policy_perf * perf_ctl_scaling;
2550 max_policy_perf = DIV_ROUND_UP(freq, scaling);
2551 freq = min_policy_perf * perf_ctl_scaling;
2552 min_policy_perf = DIV_ROUND_UP(freq, scaling);
2553 }
2554
2555 pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
2556 cpu->cpu, min_policy_perf, max_policy_perf);
2557
2558
2559 if (per_cpu_limits) {
2560 cpu->min_perf_ratio = min_policy_perf;
2561 cpu->max_perf_ratio = max_policy_perf;
2562 } else {
2563 int turbo_max = cpu->pstate.turbo_pstate;
2564 int32_t global_min, global_max;
2565
2566
2567 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
2568 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
2569 global_min = clamp_t(int32_t, global_min, 0, global_max);
2570
2571 pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu,
2572 global_min, global_max);
2573
2574 cpu->min_perf_ratio = max(min_policy_perf, global_min);
2575 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
2576 cpu->max_perf_ratio = min(max_policy_perf, global_max);
2577 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
2578
2579
2580 cpu->min_perf_ratio = min(cpu->min_perf_ratio,
2581 cpu->max_perf_ratio);
2582
2583 }
2584 pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu,
2585 cpu->max_perf_ratio,
2586 cpu->min_perf_ratio);
2587 }
2588
2589 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2590 {
2591 struct cpudata *cpu;
2592
2593 if (!policy->cpuinfo.max_freq)
2594 return -ENODEV;
2595
2596 pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
2597 policy->cpuinfo.max_freq, policy->max);
2598
2599 cpu = all_cpu_data[policy->cpu];
2600 cpu->policy = policy->policy;
2601
2602 mutex_lock(&intel_pstate_limits_lock);
2603
2604 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2605
2606 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2607
2608
2609
2610
2611 intel_pstate_clear_update_util_hook(policy->cpu);
2612 intel_pstate_max_within_limits(cpu);
2613 } else {
2614 intel_pstate_set_update_util_hook(policy->cpu);
2615 }
2616
2617 if (hwp_active) {
2618
2619
2620
2621
2622
2623 if (!hwp_boost)
2624 intel_pstate_clear_update_util_hook(policy->cpu);
2625 intel_pstate_hwp_set(policy->cpu);
2626 }
2627
2628 mutex_unlock(&intel_pstate_limits_lock);
2629
2630 return 0;
2631 }
2632
2633 static void intel_pstate_adjust_policy_max(struct cpudata *cpu,
2634 struct cpufreq_policy_data *policy)
2635 {
2636 if (!hwp_active &&
2637 cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
2638 policy->max < policy->cpuinfo.max_freq &&
2639 policy->max > cpu->pstate.max_freq) {
2640 pr_debug("policy->max > max non turbo frequency\n");
2641 policy->max = policy->cpuinfo.max_freq;
2642 }
2643 }
2644
2645 static void intel_pstate_verify_cpu_policy(struct cpudata *cpu,
2646 struct cpufreq_policy_data *policy)
2647 {
2648 int max_freq;
2649
2650 update_turbo_state();
2651 if (hwp_active) {
2652 intel_pstate_get_hwp_cap(cpu);
2653 max_freq = global.no_turbo || global.turbo_disabled ?
2654 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2655 } else {
2656 max_freq = intel_pstate_get_max_freq(cpu);
2657 }
2658 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq);
2659
2660 intel_pstate_adjust_policy_max(cpu, policy);
2661 }
2662
2663 static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
2664 {
2665 intel_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
2666
2667 return 0;
2668 }
2669
2670 static int intel_cpufreq_cpu_offline(struct cpufreq_policy *policy)
2671 {
2672 struct cpudata *cpu = all_cpu_data[policy->cpu];
2673
2674 pr_debug("CPU %d going offline\n", cpu->cpu);
2675
2676 if (cpu->suspended)
2677 return 0;
2678
2679
2680
2681
2682
2683
2684
2685 if (hwp_active)
2686 intel_pstate_hwp_offline(cpu);
2687 else
2688 intel_pstate_set_min_pstate(cpu);
2689
2690 intel_pstate_exit_perf_limits(policy);
2691
2692 return 0;
2693 }
2694
2695 static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
2696 {
2697 struct cpudata *cpu = all_cpu_data[policy->cpu];
2698
2699 pr_debug("CPU %d going online\n", cpu->cpu);
2700
2701 intel_pstate_init_acpi_perf_limits(policy);
2702
2703 if (hwp_active) {
2704
2705
2706
2707
2708 intel_pstate_hwp_reenable(cpu);
2709 cpu->suspended = false;
2710 }
2711
2712 return 0;
2713 }
2714
2715 static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
2716 {
2717 intel_pstate_clear_update_util_hook(policy->cpu);
2718
2719 return intel_cpufreq_cpu_offline(policy);
2720 }
2721
2722 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
2723 {
2724 pr_debug("CPU %d exiting\n", policy->cpu);
2725
2726 policy->fast_switch_possible = false;
2727
2728 return 0;
2729 }
2730
2731 static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2732 {
2733 struct cpudata *cpu;
2734 int rc;
2735
2736 rc = intel_pstate_init_cpu(policy->cpu);
2737 if (rc)
2738 return rc;
2739
2740 cpu = all_cpu_data[policy->cpu];
2741
2742 cpu->max_perf_ratio = 0xFF;
2743 cpu->min_perf_ratio = 0;
2744
2745
2746 policy->cpuinfo.min_freq = cpu->pstate.min_freq;
2747 update_turbo_state();
2748 global.turbo_disabled_mf = global.turbo_disabled;
2749 policy->cpuinfo.max_freq = global.turbo_disabled ?
2750 cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2751
2752 policy->min = policy->cpuinfo.min_freq;
2753 policy->max = policy->cpuinfo.max_freq;
2754
2755 intel_pstate_init_acpi_perf_limits(policy);
2756
2757 policy->fast_switch_possible = true;
2758
2759 return 0;
2760 }
2761
2762 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
2763 {
2764 int ret = __intel_pstate_cpu_init(policy);
2765
2766 if (ret)
2767 return ret;
2768
2769
2770
2771
2772
2773 policy->policy = CPUFREQ_POLICY_POWERSAVE;
2774
2775 if (hwp_active) {
2776 struct cpudata *cpu = all_cpu_data[policy->cpu];
2777
2778 cpu->epp_cached = intel_pstate_get_epp(cpu, 0);
2779 }
2780
2781 return 0;
2782 }
2783
2784 static struct cpufreq_driver intel_pstate = {
2785 .flags = CPUFREQ_CONST_LOOPS,
2786 .verify = intel_pstate_verify_policy,
2787 .setpolicy = intel_pstate_set_policy,
2788 .suspend = intel_pstate_suspend,
2789 .resume = intel_pstate_resume,
2790 .init = intel_pstate_cpu_init,
2791 .exit = intel_pstate_cpu_exit,
2792 .offline = intel_pstate_cpu_offline,
2793 .online = intel_pstate_cpu_online,
2794 .update_limits = intel_pstate_update_limits,
2795 .name = "intel_pstate",
2796 };
2797
2798 static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
2799 {
2800 struct cpudata *cpu = all_cpu_data[policy->cpu];
2801
2802 intel_pstate_verify_cpu_policy(cpu, policy);
2803 intel_pstate_update_perf_limits(cpu, policy->min, policy->max);
2804
2805 return 0;
2806 }
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821 #define INTEL_PSTATE_TRACE_TARGET 10
2822 #define INTEL_PSTATE_TRACE_FAST_SWITCH 90
2823
2824 static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
2825 {
2826 struct sample *sample;
2827
2828 if (!trace_pstate_sample_enabled())
2829 return;
2830
2831 if (!intel_pstate_sample(cpu, ktime_get()))
2832 return;
2833
2834 sample = &cpu->sample;
2835 trace_pstate_sample(trace_type,
2836 0,
2837 old_pstate,
2838 cpu->pstate.current_pstate,
2839 sample->mperf,
2840 sample->aperf,
2841 sample->tsc,
2842 get_avg_frequency(cpu),
2843 fp_toint(cpu->iowait_boost * 100));
2844 }
2845
2846 static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max,
2847 u32 desired, bool fast_switch)
2848 {
2849 u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev;
2850
2851 value &= ~HWP_MIN_PERF(~0L);
2852 value |= HWP_MIN_PERF(min);
2853
2854 value &= ~HWP_MAX_PERF(~0L);
2855 value |= HWP_MAX_PERF(max);
2856
2857 value &= ~HWP_DESIRED_PERF(~0L);
2858 value |= HWP_DESIRED_PERF(desired);
2859
2860 if (value == prev)
2861 return;
2862
2863 WRITE_ONCE(cpu->hwp_req_cached, value);
2864 if (fast_switch)
2865 wrmsrl(MSR_HWP_REQUEST, value);
2866 else
2867 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
2868 }
2869
2870 static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu,
2871 u32 target_pstate, bool fast_switch)
2872 {
2873 if (fast_switch)
2874 wrmsrl(MSR_IA32_PERF_CTL,
2875 pstate_funcs.get_val(cpu, target_pstate));
2876 else
2877 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
2878 pstate_funcs.get_val(cpu, target_pstate));
2879 }
2880
2881 static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy,
2882 int target_pstate, bool fast_switch)
2883 {
2884 struct cpudata *cpu = all_cpu_data[policy->cpu];
2885 int old_pstate = cpu->pstate.current_pstate;
2886
2887 target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2888 if (hwp_active) {
2889 int max_pstate = policy->strict_target ?
2890 target_pstate : cpu->max_perf_ratio;
2891
2892 intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, 0,
2893 fast_switch);
2894 } else if (target_pstate != old_pstate) {
2895 intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch);
2896 }
2897
2898 cpu->pstate.current_pstate = target_pstate;
2899
2900 intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH :
2901 INTEL_PSTATE_TRACE_TARGET, old_pstate);
2902
2903 return target_pstate;
2904 }
2905
2906 static int intel_cpufreq_target(struct cpufreq_policy *policy,
2907 unsigned int target_freq,
2908 unsigned int relation)
2909 {
2910 struct cpudata *cpu = all_cpu_data[policy->cpu];
2911 struct cpufreq_freqs freqs;
2912 int target_pstate;
2913
2914 update_turbo_state();
2915
2916 freqs.old = policy->cur;
2917 freqs.new = target_freq;
2918
2919 cpufreq_freq_transition_begin(policy, &freqs);
2920
2921 switch (relation) {
2922 case CPUFREQ_RELATION_L:
2923 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
2924 break;
2925 case CPUFREQ_RELATION_H:
2926 target_pstate = freqs.new / cpu->pstate.scaling;
2927 break;
2928 default:
2929 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
2930 break;
2931 }
2932
2933 target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
2934
2935 freqs.new = target_pstate * cpu->pstate.scaling;
2936
2937 cpufreq_freq_transition_end(policy, &freqs, false);
2938
2939 return 0;
2940 }
2941
2942 static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
2943 unsigned int target_freq)
2944 {
2945 struct cpudata *cpu = all_cpu_data[policy->cpu];
2946 int target_pstate;
2947
2948 update_turbo_state();
2949
2950 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
2951
2952 target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
2953
2954 return target_pstate * cpu->pstate.scaling;
2955 }
2956
2957 static void intel_cpufreq_adjust_perf(unsigned int cpunum,
2958 unsigned long min_perf,
2959 unsigned long target_perf,
2960 unsigned long capacity)
2961 {
2962 struct cpudata *cpu = all_cpu_data[cpunum];
2963 u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
2964 int old_pstate = cpu->pstate.current_pstate;
2965 int cap_pstate, min_pstate, max_pstate, target_pstate;
2966
2967 update_turbo_state();
2968 cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) :
2969 HWP_HIGHEST_PERF(hwp_cap);
2970
2971
2972
2973 target_pstate = cap_pstate;
2974 if (target_perf < capacity)
2975 target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity);
2976
2977 min_pstate = cap_pstate;
2978 if (min_perf < capacity)
2979 min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity);
2980
2981 if (min_pstate < cpu->pstate.min_pstate)
2982 min_pstate = cpu->pstate.min_pstate;
2983
2984 if (min_pstate < cpu->min_perf_ratio)
2985 min_pstate = cpu->min_perf_ratio;
2986
2987 max_pstate = min(cap_pstate, cpu->max_perf_ratio);
2988 if (max_pstate < min_pstate)
2989 max_pstate = min_pstate;
2990
2991 target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate);
2992
2993 intel_cpufreq_hwp_update(cpu, min_pstate, max_pstate, target_pstate, true);
2994
2995 cpu->pstate.current_pstate = target_pstate;
2996 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
2997 }
2998
2999 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
3000 {
3001 struct freq_qos_request *req;
3002 struct cpudata *cpu;
3003 struct device *dev;
3004 int ret, freq;
3005
3006 dev = get_cpu_device(policy->cpu);
3007 if (!dev)
3008 return -ENODEV;
3009
3010 ret = __intel_pstate_cpu_init(policy);
3011 if (ret)
3012 return ret;
3013
3014 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
3015
3016 policy->cur = policy->cpuinfo.min_freq;
3017
3018 req = kcalloc(2, sizeof(*req), GFP_KERNEL);
3019 if (!req) {
3020 ret = -ENOMEM;
3021 goto pstate_exit;
3022 }
3023
3024 cpu = all_cpu_data[policy->cpu];
3025
3026 if (hwp_active) {
3027 u64 value;
3028
3029 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
3030
3031 intel_pstate_get_hwp_cap(cpu);
3032
3033 rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
3034 WRITE_ONCE(cpu->hwp_req_cached, value);
3035
3036 cpu->epp_cached = intel_pstate_get_epp(cpu, value);
3037 } else {
3038 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
3039 }
3040
3041 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100);
3042
3043 ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
3044 freq);
3045 if (ret < 0) {
3046 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
3047 goto free_req;
3048 }
3049
3050 freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100);
3051
3052 ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
3053 freq);
3054 if (ret < 0) {
3055 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
3056 goto remove_min_req;
3057 }
3058
3059 policy->driver_data = req;
3060
3061 return 0;
3062
3063 remove_min_req:
3064 freq_qos_remove_request(req);
3065 free_req:
3066 kfree(req);
3067 pstate_exit:
3068 intel_pstate_exit_perf_limits(policy);
3069
3070 return ret;
3071 }
3072
3073 static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
3074 {
3075 struct freq_qos_request *req;
3076
3077 req = policy->driver_data;
3078
3079 freq_qos_remove_request(req + 1);
3080 freq_qos_remove_request(req);
3081 kfree(req);
3082
3083 return intel_pstate_cpu_exit(policy);
3084 }
3085
3086 static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
3087 {
3088 intel_pstate_suspend(policy);
3089
3090 if (hwp_active) {
3091 struct cpudata *cpu = all_cpu_data[policy->cpu];
3092 u64 value = READ_ONCE(cpu->hwp_req_cached);
3093
3094
3095
3096
3097
3098
3099 value &= ~HWP_DESIRED_PERF(~0L);
3100 wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
3101 WRITE_ONCE(cpu->hwp_req_cached, value);
3102 }
3103
3104 return 0;
3105 }
3106
3107 static struct cpufreq_driver intel_cpufreq = {
3108 .flags = CPUFREQ_CONST_LOOPS,
3109 .verify = intel_cpufreq_verify_policy,
3110 .target = intel_cpufreq_target,
3111 .fast_switch = intel_cpufreq_fast_switch,
3112 .init = intel_cpufreq_cpu_init,
3113 .exit = intel_cpufreq_cpu_exit,
3114 .offline = intel_cpufreq_cpu_offline,
3115 .online = intel_pstate_cpu_online,
3116 .suspend = intel_cpufreq_suspend,
3117 .resume = intel_pstate_resume,
3118 .update_limits = intel_pstate_update_limits,
3119 .name = "intel_cpufreq",
3120 };
3121
3122 static struct cpufreq_driver *default_driver;
3123
3124 static void intel_pstate_driver_cleanup(void)
3125 {
3126 unsigned int cpu;
3127
3128 cpus_read_lock();
3129 for_each_online_cpu(cpu) {
3130 if (all_cpu_data[cpu]) {
3131 if (intel_pstate_driver == &intel_pstate)
3132 intel_pstate_clear_update_util_hook(cpu);
3133
3134 spin_lock(&hwp_notify_lock);
3135 kfree(all_cpu_data[cpu]);
3136 WRITE_ONCE(all_cpu_data[cpu], NULL);
3137 spin_unlock(&hwp_notify_lock);
3138 }
3139 }
3140 cpus_read_unlock();
3141
3142 intel_pstate_driver = NULL;
3143 }
3144
3145 static int intel_pstate_register_driver(struct cpufreq_driver *driver)
3146 {
3147 int ret;
3148
3149 if (driver == &intel_pstate)
3150 intel_pstate_sysfs_expose_hwp_dynamic_boost();
3151
3152 memset(&global, 0, sizeof(global));
3153 global.max_perf_pct = 100;
3154
3155 intel_pstate_driver = driver;
3156 ret = cpufreq_register_driver(intel_pstate_driver);
3157 if (ret) {
3158 intel_pstate_driver_cleanup();
3159 return ret;
3160 }
3161
3162 global.min_perf_pct = min_perf_pct_min();
3163
3164 return 0;
3165 }
3166
3167 static ssize_t intel_pstate_show_status(char *buf)
3168 {
3169 if (!intel_pstate_driver)
3170 return sprintf(buf, "off\n");
3171
3172 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
3173 "active" : "passive");
3174 }
3175
3176 static int intel_pstate_update_status(const char *buf, size_t size)
3177 {
3178 if (size == 3 && !strncmp(buf, "off", size)) {
3179 if (!intel_pstate_driver)
3180 return -EINVAL;
3181
3182 if (hwp_active)
3183 return -EBUSY;
3184
3185 cpufreq_unregister_driver(intel_pstate_driver);
3186 intel_pstate_driver_cleanup();
3187 return 0;
3188 }
3189
3190 if (size == 6 && !strncmp(buf, "active", size)) {
3191 if (intel_pstate_driver) {
3192 if (intel_pstate_driver == &intel_pstate)
3193 return 0;
3194
3195 cpufreq_unregister_driver(intel_pstate_driver);
3196 }
3197
3198 return intel_pstate_register_driver(&intel_pstate);
3199 }
3200
3201 if (size == 7 && !strncmp(buf, "passive", size)) {
3202 if (intel_pstate_driver) {
3203 if (intel_pstate_driver == &intel_cpufreq)
3204 return 0;
3205
3206 cpufreq_unregister_driver(intel_pstate_driver);
3207 intel_pstate_sysfs_hide_hwp_dynamic_boost();
3208 }
3209
3210 return intel_pstate_register_driver(&intel_cpufreq);
3211 }
3212
3213 return -EINVAL;
3214 }
3215
3216 static int no_load __initdata;
3217 static int no_hwp __initdata;
3218 static int hwp_only __initdata;
3219 static unsigned int force_load __initdata;
3220
3221 static int __init intel_pstate_msrs_not_valid(void)
3222 {
3223 if (!pstate_funcs.get_max() ||
3224 !pstate_funcs.get_min() ||
3225 !pstate_funcs.get_turbo())
3226 return -ENODEV;
3227
3228 return 0;
3229 }
3230
3231 static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
3232 {
3233 pstate_funcs.get_max = funcs->get_max;
3234 pstate_funcs.get_max_physical = funcs->get_max_physical;
3235 pstate_funcs.get_min = funcs->get_min;
3236 pstate_funcs.get_turbo = funcs->get_turbo;
3237 pstate_funcs.get_scaling = funcs->get_scaling;
3238 pstate_funcs.get_val = funcs->get_val;
3239 pstate_funcs.get_vid = funcs->get_vid;
3240 pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
3241 }
3242
3243 #ifdef CONFIG_ACPI
3244
3245 static bool __init intel_pstate_no_acpi_pss(void)
3246 {
3247 int i;
3248
3249 for_each_possible_cpu(i) {
3250 acpi_status status;
3251 union acpi_object *pss;
3252 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
3253 struct acpi_processor *pr = per_cpu(processors, i);
3254
3255 if (!pr)
3256 continue;
3257
3258 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
3259 if (ACPI_FAILURE(status))
3260 continue;
3261
3262 pss = buffer.pointer;
3263 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
3264 kfree(pss);
3265 return false;
3266 }
3267
3268 kfree(pss);
3269 }
3270
3271 pr_debug("ACPI _PSS not found\n");
3272 return true;
3273 }
3274
3275 static bool __init intel_pstate_no_acpi_pcch(void)
3276 {
3277 acpi_status status;
3278 acpi_handle handle;
3279
3280 status = acpi_get_handle(NULL, "\\_SB", &handle);
3281 if (ACPI_FAILURE(status))
3282 goto not_found;
3283
3284 if (acpi_has_method(handle, "PCCH"))
3285 return false;
3286
3287 not_found:
3288 pr_debug("ACPI PCCH not found\n");
3289 return true;
3290 }
3291
3292 static bool __init intel_pstate_has_acpi_ppc(void)
3293 {
3294 int i;
3295
3296 for_each_possible_cpu(i) {
3297 struct acpi_processor *pr = per_cpu(processors, i);
3298
3299 if (!pr)
3300 continue;
3301 if (acpi_has_method(pr->handle, "_PPC"))
3302 return true;
3303 }
3304 pr_debug("ACPI _PPC not found\n");
3305 return false;
3306 }
3307
3308 enum {
3309 PSS,
3310 PPC,
3311 };
3312
3313
3314 static struct acpi_platform_list plat_info[] __initdata = {
3315 {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, NULL, PSS},
3316 {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3317 {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3318 {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3319 {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3320 {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3321 {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3322 {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3323 {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3324 {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3325 {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3326 {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3327 {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3328 {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3329 {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
3330 { }
3331 };
3332
3333 #define BITMASK_OOB (BIT(8) | BIT(18))
3334
3335 static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
3336 {
3337 const struct x86_cpu_id *id;
3338 u64 misc_pwr;
3339 int idx;
3340
3341 id = x86_match_cpu(intel_pstate_cpu_oob_ids);
3342 if (id) {
3343 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
3344 if (misc_pwr & BITMASK_OOB) {
3345 pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n");
3346 pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n");
3347 return true;
3348 }
3349 }
3350
3351 idx = acpi_match_platform_list(plat_info);
3352 if (idx < 0)
3353 return false;
3354
3355 switch (plat_info[idx].data) {
3356 case PSS:
3357 if (!intel_pstate_no_acpi_pss())
3358 return false;
3359
3360 return intel_pstate_no_acpi_pcch();
3361 case PPC:
3362 return intel_pstate_has_acpi_ppc() && !force_load;
3363 }
3364
3365 return false;
3366 }
3367
3368 static void intel_pstate_request_control_from_smm(void)
3369 {
3370
3371
3372
3373
3374 if (acpi_ppc)
3375 acpi_processor_pstate_control();
3376 }
3377 #else
3378 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
3379 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
3380 static inline void intel_pstate_request_control_from_smm(void) {}
3381 #endif
3382
3383 #define INTEL_PSTATE_HWP_BROADWELL 0x01
3384
3385 #define X86_MATCH_HWP(model, hwp_mode) \
3386 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
3387 X86_FEATURE_HWP, hwp_mode)
3388
3389 static const struct x86_cpu_id hwp_support_ids[] __initconst = {
3390 X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
3391 X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL),
3392 X86_MATCH_HWP(ANY, 0),
3393 {}
3394 };
3395
3396 static bool intel_pstate_hwp_is_enabled(void)
3397 {
3398 u64 value;
3399
3400 rdmsrl(MSR_PM_ENABLE, value);
3401 return !!(value & 0x1);
3402 }
3403
3404 static const struct x86_cpu_id intel_epp_balance_perf[] = {
3405
3406
3407
3408
3409
3410 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
3411 {}
3412 };
3413
3414 static int __init intel_pstate_init(void)
3415 {
3416 static struct cpudata **_all_cpu_data;
3417 const struct x86_cpu_id *id;
3418 int rc;
3419
3420 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
3421 return -ENODEV;
3422
3423 id = x86_match_cpu(hwp_support_ids);
3424 if (id) {
3425 bool hwp_forced = intel_pstate_hwp_is_enabled();
3426
3427 if (hwp_forced)
3428 pr_info("HWP enabled by BIOS\n");
3429 else if (no_load)
3430 return -ENODEV;
3431
3432 copy_cpu_funcs(&core_funcs);
3433
3434
3435
3436
3437
3438
3439
3440
3441 if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) {
3442 WRITE_ONCE(hwp_active, 1);
3443 hwp_mode_bdw = id->driver_data;
3444 intel_pstate.attr = hwp_cpufreq_attrs;
3445 intel_cpufreq.attr = hwp_cpufreq_attrs;
3446 intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
3447 intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf;
3448 if (!default_driver)
3449 default_driver = &intel_pstate;
3450
3451 if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
3452 intel_pstate_cppc_set_cpu_scaling();
3453
3454 goto hwp_cpu_matched;
3455 }
3456 pr_info("HWP not enabled\n");
3457 } else {
3458 if (no_load)
3459 return -ENODEV;
3460
3461 id = x86_match_cpu(intel_pstate_cpu_ids);
3462 if (!id) {
3463 pr_info("CPU model not supported\n");
3464 return -ENODEV;
3465 }
3466
3467 copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
3468 }
3469
3470 if (intel_pstate_msrs_not_valid()) {
3471 pr_info("Invalid MSRs\n");
3472 return -ENODEV;
3473 }
3474
3475 if (!default_driver)
3476 default_driver = &intel_cpufreq;
3477
3478 hwp_cpu_matched:
3479
3480
3481
3482
3483 if (intel_pstate_platform_pwr_mgmt_exists()) {
3484 pr_info("P-states controlled by the platform\n");
3485 return -ENODEV;
3486 }
3487
3488 if (!hwp_active && hwp_only)
3489 return -ENOTSUPP;
3490
3491 pr_info("Intel P-state driver initializing\n");
3492
3493 _all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
3494 if (!_all_cpu_data)
3495 return -ENOMEM;
3496
3497 WRITE_ONCE(all_cpu_data, _all_cpu_data);
3498
3499 intel_pstate_request_control_from_smm();
3500
3501 intel_pstate_sysfs_expose_params();
3502
3503 if (hwp_active) {
3504 const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf);
3505
3506 if (id)
3507 epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data;
3508 }
3509
3510 mutex_lock(&intel_pstate_driver_lock);
3511 rc = intel_pstate_register_driver(default_driver);
3512 mutex_unlock(&intel_pstate_driver_lock);
3513 if (rc) {
3514 intel_pstate_sysfs_remove();
3515 return rc;
3516 }
3517
3518 if (hwp_active) {
3519 const struct x86_cpu_id *id;
3520
3521 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
3522 if (id) {
3523 set_power_ctl_ee_state(false);
3524 pr_info("Disabling energy efficiency optimization\n");
3525 }
3526
3527 pr_info("HWP enabled\n");
3528 } else if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
3529 pr_warn("Problematic setup: Hybrid processor with disabled HWP\n");
3530 }
3531
3532 return 0;
3533 }
3534 device_initcall(intel_pstate_init);
3535
3536 static int __init intel_pstate_setup(char *str)
3537 {
3538 if (!str)
3539 return -EINVAL;
3540
3541 if (!strcmp(str, "disable"))
3542 no_load = 1;
3543 else if (!strcmp(str, "active"))
3544 default_driver = &intel_pstate;
3545 else if (!strcmp(str, "passive"))
3546 default_driver = &intel_cpufreq;
3547
3548 if (!strcmp(str, "no_hwp"))
3549 no_hwp = 1;
3550
3551 if (!strcmp(str, "force"))
3552 force_load = 1;
3553 if (!strcmp(str, "hwp_only"))
3554 hwp_only = 1;
3555 if (!strcmp(str, "per_cpu_perf_limits"))
3556 per_cpu_limits = true;
3557
3558 #ifdef CONFIG_ACPI
3559 if (!strcmp(str, "support_acpi_ppc"))
3560 acpi_ppc = true;
3561 #endif
3562
3563 return 0;
3564 }
3565 early_param("intel_pstate", intel_pstate_setup);
3566
3567 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
3568 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
3569 MODULE_LICENSE("GPL");