0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/cpufreq.h>
0010 #include <linux/delay.h>
0011 #include <linux/ktime.h>
0012 #include <linux/math64.h>
0013 #include <linux/percpu.h>
0014 #include <linux/rcupdate.h>
0015 #include <linux/sched/isolation.h>
0016 #include <linux/sched/topology.h>
0017 #include <linux/smp.h>
0018 #include <linux/syscore_ops.h>
0019
0020 #include <asm/cpu.h>
0021 #include <asm/cpu_device_id.h>
0022 #include <asm/intel-family.h>
0023
0024 #include "cpu.h"
0025
0026 struct aperfmperf {
0027 seqcount_t seq;
0028 unsigned long last_update;
0029 u64 acnt;
0030 u64 mcnt;
0031 u64 aperf;
0032 u64 mperf;
0033 };
0034
0035 static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
0036 .seq = SEQCNT_ZERO(cpu_samples.seq)
0037 };
0038
0039 static void init_counter_refs(void)
0040 {
0041 u64 aperf, mperf;
0042
0043 rdmsrl(MSR_IA32_APERF, aperf);
0044 rdmsrl(MSR_IA32_MPERF, mperf);
0045
0046 this_cpu_write(cpu_samples.aperf, aperf);
0047 this_cpu_write(cpu_samples.mperf, mperf);
0048 }
0049
0050 #if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
0086
0087 static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
0088 static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
0089
0090 void arch_set_max_freq_ratio(bool turbo_disabled)
0091 {
0092 arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
0093 arch_turbo_freq_ratio;
0094 }
0095 EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
0096
0097 static bool __init turbo_disabled(void)
0098 {
0099 u64 misc_en;
0100 int err;
0101
0102 err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
0103 if (err)
0104 return false;
0105
0106 return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
0107 }
0108
0109 static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
0110 {
0111 int err;
0112
0113 err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
0114 if (err)
0115 return false;
0116
0117 err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
0118 if (err)
0119 return false;
0120
0121 *base_freq = (*base_freq >> 16) & 0x3F;
0122 *turbo_freq = *turbo_freq & 0x3F;
0123
0124 return true;
0125 }
0126
0127 #define X86_MATCH(model) \
0128 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
0129 INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
0130
0131 static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
0132 X86_MATCH(XEON_PHI_KNL),
0133 X86_MATCH(XEON_PHI_KNM),
0134 {}
0135 };
0136
0137 static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
0138 X86_MATCH(SKYLAKE_X),
0139 {}
0140 };
0141
0142 static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
0143 X86_MATCH(ATOM_GOLDMONT),
0144 X86_MATCH(ATOM_GOLDMONT_D),
0145 X86_MATCH(ATOM_GOLDMONT_PLUS),
0146 {}
0147 };
0148
0149 static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
0150 int num_delta_fratio)
0151 {
0152 int fratio, delta_fratio, found;
0153 int err, i;
0154 u64 msr;
0155
0156 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
0157 if (err)
0158 return false;
0159
0160 *base_freq = (*base_freq >> 8) & 0xFF;
0161
0162 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
0163 if (err)
0164 return false;
0165
0166 fratio = (msr >> 8) & 0xFF;
0167 i = 16;
0168 found = 0;
0169 do {
0170 if (found >= num_delta_fratio) {
0171 *turbo_freq = fratio;
0172 return true;
0173 }
0174
0175 delta_fratio = (msr >> (i + 5)) & 0x7;
0176
0177 if (delta_fratio) {
0178 found += 1;
0179 fratio -= delta_fratio;
0180 }
0181
0182 i += 8;
0183 } while (i < 64);
0184
0185 return true;
0186 }
0187
0188 static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
0189 {
0190 u64 ratios, counts;
0191 u32 group_size;
0192 int err, i;
0193
0194 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
0195 if (err)
0196 return false;
0197
0198 *base_freq = (*base_freq >> 8) & 0xFF;
0199
0200 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
0201 if (err)
0202 return false;
0203
0204 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
0205 if (err)
0206 return false;
0207
0208 for (i = 0; i < 64; i += 8) {
0209 group_size = (counts >> i) & 0xFF;
0210 if (group_size >= size) {
0211 *turbo_freq = (ratios >> i) & 0xFF;
0212 return true;
0213 }
0214 }
0215
0216 return false;
0217 }
0218
0219 static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
0220 {
0221 u64 msr;
0222 int err;
0223
0224 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
0225 if (err)
0226 return false;
0227
0228 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
0229 if (err)
0230 return false;
0231
0232 *base_freq = (*base_freq >> 8) & 0xFF;
0233 *turbo_freq = (msr >> 24) & 0xFF;
0234
0235
0236 if (!*turbo_freq)
0237 *turbo_freq = msr & 0xFF;
0238
0239 return true;
0240 }
0241
0242 static bool __init intel_set_max_freq_ratio(void)
0243 {
0244 u64 base_freq, turbo_freq;
0245 u64 turbo_ratio;
0246
0247 if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
0248 goto out;
0249
0250 if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
0251 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
0252 goto out;
0253
0254 if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
0255 knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
0256 goto out;
0257
0258 if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
0259 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
0260 goto out;
0261
0262 if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
0263 goto out;
0264
0265 return false;
0266
0267 out:
0268
0269
0270
0271
0272
0273
0274 if (!base_freq || !turbo_freq) {
0275 pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
0276 return false;
0277 }
0278
0279 turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
0280 if (!turbo_ratio) {
0281 pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
0282 return false;
0283 }
0284
0285 arch_turbo_freq_ratio = turbo_ratio;
0286 arch_set_max_freq_ratio(turbo_disabled());
0287
0288 return true;
0289 }
0290
0291 #ifdef CONFIG_PM_SLEEP
0292 static struct syscore_ops freq_invariance_syscore_ops = {
0293 .resume = init_counter_refs,
0294 };
0295
0296 static void register_freq_invariance_syscore_ops(void)
0297 {
0298 register_syscore_ops(&freq_invariance_syscore_ops);
0299 }
0300 #else
0301 static inline void register_freq_invariance_syscore_ops(void) {}
0302 #endif
0303
0304 static void freq_invariance_enable(void)
0305 {
0306 if (static_branch_unlikely(&arch_scale_freq_key)) {
0307 WARN_ON_ONCE(1);
0308 return;
0309 }
0310 static_branch_enable(&arch_scale_freq_key);
0311 register_freq_invariance_syscore_ops();
0312 pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
0313 }
0314
0315 void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
0316 {
0317 arch_turbo_freq_ratio = ratio;
0318 arch_set_max_freq_ratio(turbo_disabled);
0319 freq_invariance_enable();
0320 }
0321
0322 static void __init bp_init_freq_invariance(void)
0323 {
0324 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
0325 return;
0326
0327 if (intel_set_max_freq_ratio())
0328 freq_invariance_enable();
0329 }
0330
0331 static void disable_freq_invariance_workfn(struct work_struct *work)
0332 {
0333 static_branch_disable(&arch_scale_freq_key);
0334 }
0335
0336 static DECLARE_WORK(disable_freq_invariance_work,
0337 disable_freq_invariance_workfn);
0338
0339 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
0340
0341 static void scale_freq_tick(u64 acnt, u64 mcnt)
0342 {
0343 u64 freq_scale;
0344
0345 if (!arch_scale_freq_invariant())
0346 return;
0347
0348 if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
0349 goto error;
0350
0351 if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
0352 goto error;
0353
0354 freq_scale = div64_u64(acnt, mcnt);
0355 if (!freq_scale)
0356 goto error;
0357
0358 if (freq_scale > SCHED_CAPACITY_SCALE)
0359 freq_scale = SCHED_CAPACITY_SCALE;
0360
0361 this_cpu_write(arch_freq_scale, freq_scale);
0362 return;
0363
0364 error:
0365 pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
0366 schedule_work(&disable_freq_invariance_work);
0367 }
0368 #else
0369 static inline void bp_init_freq_invariance(void) { }
0370 static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
0371 #endif
0372
0373 void arch_scale_freq_tick(void)
0374 {
0375 struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
0376 u64 acnt, mcnt, aperf, mperf;
0377
0378 if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
0379 return;
0380
0381 rdmsrl(MSR_IA32_APERF, aperf);
0382 rdmsrl(MSR_IA32_MPERF, mperf);
0383 acnt = aperf - s->aperf;
0384 mcnt = mperf - s->mperf;
0385
0386 s->aperf = aperf;
0387 s->mperf = mperf;
0388
0389 raw_write_seqcount_begin(&s->seq);
0390 s->last_update = jiffies;
0391 s->acnt = acnt;
0392 s->mcnt = mcnt;
0393 raw_write_seqcount_end(&s->seq);
0394
0395 scale_freq_tick(acnt, mcnt);
0396 }
0397
0398
0399
0400
0401
0402
0403 #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
0404
0405 unsigned int arch_freq_get_on_cpu(int cpu)
0406 {
0407 struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
0408 unsigned int seq, freq;
0409 unsigned long last;
0410 u64 acnt, mcnt;
0411
0412 if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
0413 goto fallback;
0414
0415 do {
0416 seq = raw_read_seqcount_begin(&s->seq);
0417 last = s->last_update;
0418 acnt = s->acnt;
0419 mcnt = s->mcnt;
0420 } while (read_seqcount_retry(&s->seq, seq));
0421
0422
0423
0424
0425
0426 if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
0427 goto fallback;
0428
0429 return div64_u64((cpu_khz * acnt), mcnt);
0430
0431 fallback:
0432 freq = cpufreq_quick_get(cpu);
0433 return freq ? freq : cpu_khz;
0434 }
0435
0436 static int __init bp_init_aperfmperf(void)
0437 {
0438 if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
0439 return 0;
0440
0441 init_counter_refs();
0442 bp_init_freq_invariance();
0443 return 0;
0444 }
0445 early_initcall(bp_init_aperfmperf);
0446
0447 void ap_init_aperfmperf(void)
0448 {
0449 if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
0450 init_counter_refs();
0451 }