0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/cpu.h>
0014 #include <linux/cpufreq.h>
0015 #include <linux/cpu_cooling.h>
0016 #include <linux/device.h>
0017 #include <linux/energy_model.h>
0018 #include <linux/err.h>
0019 #include <linux/export.h>
0020 #include <linux/pm_opp.h>
0021 #include <linux/pm_qos.h>
0022 #include <linux/slab.h>
0023 #include <linux/thermal.h>
0024 #include <linux/units.h>
0025
0026 #include <trace/events/thermal.h>
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 struct time_in_idle {
0048 u64 time;
0049 u64 timestamp;
0050 };
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070 struct cpufreq_cooling_device {
0071 u32 last_load;
0072 unsigned int cpufreq_state;
0073 unsigned int max_level;
0074 struct em_perf_domain *em;
0075 struct cpufreq_policy *policy;
0076 struct thermal_cooling_device_ops cooling_ops;
0077 #ifndef CONFIG_SMP
0078 struct time_in_idle *idle_time;
0079 #endif
0080 struct freq_qos_request qos_req;
0081 };
0082
0083 #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
0084
0085
0086
0087
0088
0089
0090
0091 static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
0092 unsigned int freq)
0093 {
0094 int i;
0095
0096 for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
0097 if (freq > cpufreq_cdev->em->table[i].frequency)
0098 break;
0099 }
0100
0101 return cpufreq_cdev->max_level - i - 1;
0102 }
0103
0104 static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
0105 u32 freq)
0106 {
0107 unsigned long power_mw;
0108 int i;
0109
0110 for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
0111 if (freq > cpufreq_cdev->em->table[i].frequency)
0112 break;
0113 }
0114
0115 power_mw = cpufreq_cdev->em->table[i + 1].power;
0116 power_mw /= MICROWATT_PER_MILLIWATT;
0117
0118 return power_mw;
0119 }
0120
0121 static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
0122 u32 power)
0123 {
0124 unsigned long em_power_mw;
0125 int i;
0126
0127 for (i = cpufreq_cdev->max_level; i > 0; i--) {
0128
0129 em_power_mw = cpufreq_cdev->em->table[i].power;
0130 em_power_mw /= MICROWATT_PER_MILLIWATT;
0131 if (power >= em_power_mw)
0132 break;
0133 }
0134
0135 return cpufreq_cdev->em->table[i].frequency;
0136 }
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 #ifdef CONFIG_SMP
0148 static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
0149 int cpu_idx)
0150 {
0151 unsigned long util = sched_cpu_util(cpu);
0152
0153 return (util * 100) / arch_scale_cpu_capacity(cpu);
0154 }
0155 #else
0156 static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
0157 int cpu_idx)
0158 {
0159 u32 load;
0160 u64 now, now_idle, delta_time, delta_idle;
0161 struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
0162
0163 now_idle = get_cpu_idle_time(cpu, &now, 0);
0164 delta_idle = now_idle - idle_time->time;
0165 delta_time = now - idle_time->timestamp;
0166
0167 if (delta_time <= delta_idle)
0168 load = 0;
0169 else
0170 load = div64_u64(100 * (delta_time - delta_idle), delta_time);
0171
0172 idle_time->time = now_idle;
0173 idle_time->timestamp = now;
0174
0175 return load;
0176 }
0177 #endif
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187 static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
0188 unsigned long freq)
0189 {
0190 u32 raw_cpu_power;
0191
0192 raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
0193 return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
0194 }
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
0219 u32 *power)
0220 {
0221 unsigned long freq;
0222 int i = 0, cpu;
0223 u32 total_load = 0;
0224 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0225 struct cpufreq_policy *policy = cpufreq_cdev->policy;
0226
0227 freq = cpufreq_quick_get(policy->cpu);
0228
0229 for_each_cpu(cpu, policy->related_cpus) {
0230 u32 load;
0231
0232 if (cpu_online(cpu))
0233 load = get_load(cpufreq_cdev, cpu, i);
0234 else
0235 load = 0;
0236
0237 total_load += load;
0238 }
0239
0240 cpufreq_cdev->last_load = total_load;
0241
0242 *power = get_dynamic_power(cpufreq_cdev, freq);
0243
0244 trace_thermal_power_cpu_get_power_simple(policy->cpu, *power);
0245
0246 return 0;
0247 }
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262 static int cpufreq_state2power(struct thermal_cooling_device *cdev,
0263 unsigned long state, u32 *power)
0264 {
0265 unsigned int freq, num_cpus, idx;
0266 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0267
0268
0269 if (state > cpufreq_cdev->max_level)
0270 return -EINVAL;
0271
0272 num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
0273
0274 idx = cpufreq_cdev->max_level - state;
0275 freq = cpufreq_cdev->em->table[idx].frequency;
0276 *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
0277
0278 return 0;
0279 }
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296 static int cpufreq_power2state(struct thermal_cooling_device *cdev,
0297 u32 power, unsigned long *state)
0298 {
0299 unsigned int target_freq;
0300 u32 last_load, normalised_power;
0301 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0302 struct cpufreq_policy *policy = cpufreq_cdev->policy;
0303
0304 last_load = cpufreq_cdev->last_load ?: 1;
0305 normalised_power = (power * 100) / last_load;
0306 target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
0307
0308 *state = get_level(cpufreq_cdev, target_freq);
0309 trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
0310 power);
0311 return 0;
0312 }
0313
0314 static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
0315 struct em_perf_domain *em) {
0316 struct cpufreq_policy *policy;
0317 unsigned int nr_levels;
0318
0319 if (!em || em_is_artificial(em))
0320 return false;
0321
0322 policy = cpufreq_cdev->policy;
0323 if (!cpumask_equal(policy->related_cpus, em_span_cpus(em))) {
0324 pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n",
0325 cpumask_pr_args(em_span_cpus(em)),
0326 cpumask_pr_args(policy->related_cpus));
0327 return false;
0328 }
0329
0330 nr_levels = cpufreq_cdev->max_level + 1;
0331 if (em_pd_nr_perf_states(em) != nr_levels) {
0332 pr_err("The number of performance states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n",
0333 cpumask_pr_args(em_span_cpus(em)),
0334 em_pd_nr_perf_states(em), nr_levels);
0335 return false;
0336 }
0337
0338 return true;
0339 }
0340 #endif
0341
0342 #ifdef CONFIG_SMP
0343 static inline int allocate_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
0344 {
0345 return 0;
0346 }
0347
0348 static inline void free_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
0349 {
0350 }
0351 #else
0352 static int allocate_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
0353 {
0354 unsigned int num_cpus = cpumask_weight(cpufreq_cdev->policy->related_cpus);
0355
0356 cpufreq_cdev->idle_time = kcalloc(num_cpus,
0357 sizeof(*cpufreq_cdev->idle_time),
0358 GFP_KERNEL);
0359 if (!cpufreq_cdev->idle_time)
0360 return -ENOMEM;
0361
0362 return 0;
0363 }
0364
0365 static void free_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
0366 {
0367 kfree(cpufreq_cdev->idle_time);
0368 cpufreq_cdev->idle_time = NULL;
0369 }
0370 #endif
0371
0372 static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
0373 unsigned long state)
0374 {
0375 struct cpufreq_policy *policy;
0376 unsigned long idx;
0377
0378 #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
0379
0380 if (cpufreq_cdev->em) {
0381 idx = cpufreq_cdev->max_level - state;
0382 return cpufreq_cdev->em->table[idx].frequency;
0383 }
0384 #endif
0385
0386
0387 policy = cpufreq_cdev->policy;
0388 if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
0389 idx = cpufreq_cdev->max_level - state;
0390 else
0391 idx = state;
0392
0393 return policy->freq_table[idx].frequency;
0394 }
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408 static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
0409 unsigned long *state)
0410 {
0411 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0412
0413 *state = cpufreq_cdev->max_level;
0414 return 0;
0415 }
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427 static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
0428 unsigned long *state)
0429 {
0430 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0431
0432 *state = cpufreq_cdev->cpufreq_state;
0433
0434 return 0;
0435 }
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447 static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
0448 unsigned long state)
0449 {
0450 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
0451 struct cpumask *cpus;
0452 unsigned int frequency;
0453 int ret;
0454
0455
0456 if (state > cpufreq_cdev->max_level)
0457 return -EINVAL;
0458
0459
0460 if (cpufreq_cdev->cpufreq_state == state)
0461 return 0;
0462
0463 frequency = get_state_freq(cpufreq_cdev, state);
0464
0465 ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
0466 if (ret >= 0) {
0467 cpufreq_cdev->cpufreq_state = state;
0468 cpus = cpufreq_cdev->policy->related_cpus;
0469 arch_update_thermal_pressure(cpus, frequency);
0470 ret = 0;
0471 }
0472
0473 return ret;
0474 }
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491 static struct thermal_cooling_device *
0492 __cpufreq_cooling_register(struct device_node *np,
0493 struct cpufreq_policy *policy,
0494 struct em_perf_domain *em)
0495 {
0496 struct thermal_cooling_device *cdev;
0497 struct cpufreq_cooling_device *cpufreq_cdev;
0498 unsigned int i;
0499 struct device *dev;
0500 int ret;
0501 struct thermal_cooling_device_ops *cooling_ops;
0502 char *name;
0503
0504 dev = get_cpu_device(policy->cpu);
0505 if (unlikely(!dev)) {
0506 pr_warn("No cpu device for cpu %d\n", policy->cpu);
0507 return ERR_PTR(-ENODEV);
0508 }
0509
0510 if (IS_ERR_OR_NULL(policy)) {
0511 pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
0512 return ERR_PTR(-EINVAL);
0513 }
0514
0515 i = cpufreq_table_count_valid_entries(policy);
0516 if (!i) {
0517 pr_debug("%s: CPUFreq table not found or has no valid entries\n",
0518 __func__);
0519 return ERR_PTR(-ENODEV);
0520 }
0521
0522 cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
0523 if (!cpufreq_cdev)
0524 return ERR_PTR(-ENOMEM);
0525
0526 cpufreq_cdev->policy = policy;
0527
0528 ret = allocate_idle_time(cpufreq_cdev);
0529 if (ret) {
0530 cdev = ERR_PTR(ret);
0531 goto free_cdev;
0532 }
0533
0534
0535 cpufreq_cdev->max_level = i - 1;
0536
0537 cooling_ops = &cpufreq_cdev->cooling_ops;
0538 cooling_ops->get_max_state = cpufreq_get_max_state;
0539 cooling_ops->get_cur_state = cpufreq_get_cur_state;
0540 cooling_ops->set_cur_state = cpufreq_set_cur_state;
0541
0542 #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
0543 if (em_is_sane(cpufreq_cdev, em)) {
0544 cpufreq_cdev->em = em;
0545 cooling_ops->get_requested_power = cpufreq_get_requested_power;
0546 cooling_ops->state2power = cpufreq_state2power;
0547 cooling_ops->power2state = cpufreq_power2state;
0548 } else
0549 #endif
0550 if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED) {
0551 pr_err("%s: unsorted frequency tables are not supported\n",
0552 __func__);
0553 cdev = ERR_PTR(-EINVAL);
0554 goto free_idle_time;
0555 }
0556
0557 ret = freq_qos_add_request(&policy->constraints,
0558 &cpufreq_cdev->qos_req, FREQ_QOS_MAX,
0559 get_state_freq(cpufreq_cdev, 0));
0560 if (ret < 0) {
0561 pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
0562 ret);
0563 cdev = ERR_PTR(ret);
0564 goto free_idle_time;
0565 }
0566
0567 cdev = ERR_PTR(-ENOMEM);
0568 name = kasprintf(GFP_KERNEL, "cpufreq-%s", dev_name(dev));
0569 if (!name)
0570 goto remove_qos_req;
0571
0572 cdev = thermal_of_cooling_device_register(np, name, cpufreq_cdev,
0573 cooling_ops);
0574 kfree(name);
0575
0576 if (IS_ERR(cdev))
0577 goto remove_qos_req;
0578
0579 return cdev;
0580
0581 remove_qos_req:
0582 freq_qos_remove_request(&cpufreq_cdev->qos_req);
0583 free_idle_time:
0584 free_idle_time(cpufreq_cdev);
0585 free_cdev:
0586 kfree(cpufreq_cdev);
0587 return cdev;
0588 }
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601 struct thermal_cooling_device *
0602 cpufreq_cooling_register(struct cpufreq_policy *policy)
0603 {
0604 return __cpufreq_cooling_register(NULL, policy, NULL);
0605 }
0606 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624 struct thermal_cooling_device *
0625 of_cpufreq_cooling_register(struct cpufreq_policy *policy)
0626 {
0627 struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
0628 struct thermal_cooling_device *cdev = NULL;
0629
0630 if (!np) {
0631 pr_err("cpufreq_cooling: OF node not available for cpu%d\n",
0632 policy->cpu);
0633 return NULL;
0634 }
0635
0636 if (of_find_property(np, "#cooling-cells", NULL)) {
0637 struct em_perf_domain *em = em_cpu_get(policy->cpu);
0638
0639 cdev = __cpufreq_cooling_register(np, policy, em);
0640 if (IS_ERR(cdev)) {
0641 pr_err("cpufreq_cooling: cpu%d failed to register as cooling device: %ld\n",
0642 policy->cpu, PTR_ERR(cdev));
0643 cdev = NULL;
0644 }
0645 }
0646
0647 of_node_put(np);
0648 return cdev;
0649 }
0650 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
0651
0652
0653
0654
0655
0656
0657
0658 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
0659 {
0660 struct cpufreq_cooling_device *cpufreq_cdev;
0661
0662 if (!cdev)
0663 return;
0664
0665 cpufreq_cdev = cdev->devdata;
0666
0667 thermal_cooling_device_unregister(cdev);
0668 freq_qos_remove_request(&cpufreq_cdev->qos_req);
0669 free_idle_time(cpufreq_cdev);
0670 kfree(cpufreq_cdev);
0671 }
0672 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);