![]() |
|
|||
0001 /* SPDX-License-Identifier: GPL-2.0 */ 0002 #ifndef _LINUX_ENERGY_MODEL_H 0003 #define _LINUX_ENERGY_MODEL_H 0004 #include <linux/cpumask.h> 0005 #include <linux/device.h> 0006 #include <linux/jump_label.h> 0007 #include <linux/kobject.h> 0008 #include <linux/rcupdate.h> 0009 #include <linux/sched/cpufreq.h> 0010 #include <linux/sched/topology.h> 0011 #include <linux/types.h> 0012 0013 /** 0014 * struct em_perf_state - Performance state of a performance domain 0015 * @frequency: The frequency in KHz, for consistency with CPUFreq 0016 * @power: The power consumed at this level (by 1 CPU or by a registered 0017 * device). It can be a total power: static and dynamic. 0018 * @cost: The cost coefficient associated with this level, used during 0019 * energy calculation. Equal to: power * max_frequency / frequency 0020 * @flags: see "em_perf_state flags" description below. 0021 */ 0022 struct em_perf_state { 0023 unsigned long frequency; 0024 unsigned long power; 0025 unsigned long cost; 0026 unsigned long flags; 0027 }; 0028 0029 /* 0030 * em_perf_state flags: 0031 * 0032 * EM_PERF_STATE_INEFFICIENT: The performance state is inefficient. There is 0033 * in this em_perf_domain, another performance state with a higher frequency 0034 * but a lower or equal power cost. Such inefficient states are ignored when 0035 * using em_pd_get_efficient_*() functions. 0036 */ 0037 #define EM_PERF_STATE_INEFFICIENT BIT(0) 0038 0039 /** 0040 * struct em_perf_domain - Performance domain 0041 * @table: List of performance states, in ascending order 0042 * @nr_perf_states: Number of performance states 0043 * @flags: See "em_perf_domain flags" 0044 * @cpus: Cpumask covering the CPUs of the domain. It's here 0045 * for performance reasons to avoid potential cache 0046 * misses during energy calculations in the scheduler 0047 * and simplifies allocating/freeing that memory region. 0048 * 0049 * In case of CPU device, a "performance domain" represents a group of CPUs 0050 * whose performance is scaled together. All CPUs of a performance domain 0051 * must have the same micro-architecture. Performance domains often have 0052 * a 1-to-1 mapping with CPUFreq policies. In case of other devices the @cpus 0053 * field is unused. 0054 */ 0055 struct em_perf_domain { 0056 struct em_perf_state *table; 0057 int nr_perf_states; 0058 unsigned long flags; 0059 unsigned long cpus[]; 0060 }; 0061 0062 /* 0063 * em_perf_domain flags: 0064 * 0065 * EM_PERF_DOMAIN_MICROWATTS: The power values are in micro-Watts or some 0066 * other scale. 0067 * 0068 * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating 0069 * energy consumption. 0070 * 0071 * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be 0072 * created by platform missing real power information 0073 */ 0074 #define EM_PERF_DOMAIN_MICROWATTS BIT(0) 0075 #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) 0076 #define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) 0077 0078 #define em_span_cpus(em) (to_cpumask((em)->cpus)) 0079 #define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) 0080 0081 #ifdef CONFIG_ENERGY_MODEL 0082 /* 0083 * The max power value in micro-Watts. The limit of 64 Watts is set as 0084 * a safety net to not overflow multiplications on 32bit platforms. The 0085 * 32bit value limit for total Perf Domain power implies a limit of 0086 * maximum CPUs in such domain to 64. 0087 */ 0088 #define EM_MAX_POWER (64000000) /* 64 Watts */ 0089 0090 /* 0091 * To avoid possible energy estimation overflow on 32bit machines add 0092 * limits to number of CPUs in the Perf. Domain. 0093 * We are safe on 64bit machine, thus some big number. 0094 */ 0095 #ifdef CONFIG_64BIT 0096 #define EM_MAX_NUM_CPUS 4096 0097 #else 0098 #define EM_MAX_NUM_CPUS 16 0099 #endif 0100 0101 /* 0102 * To avoid an overflow on 32bit machines while calculating the energy 0103 * use a different order in the operation. First divide by the 'cpu_scale' 0104 * which would reduce big value stored in the 'cost' field, then multiply by 0105 * the 'sum_util'. This would allow to handle existing platforms, which have 0106 * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. 0107 * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' 0108 * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. 0109 * This reordering of operations has some limitations, we lose small 0110 * precision in the estimation (comparing to 64bit platform w/o reordering). 0111 * 0112 * We are safe on 64bit machine. 0113 */ 0114 #ifdef CONFIG_64BIT 0115 #define em_estimate_energy(cost, sum_util, scale_cpu) \ 0116 (((cost) * (sum_util)) / (scale_cpu)) 0117 #else 0118 #define em_estimate_energy(cost, sum_util, scale_cpu) \ 0119 (((cost) / (scale_cpu)) * (sum_util)) 0120 #endif 0121 0122 struct em_data_callback { 0123 /** 0124 * active_power() - Provide power at the next performance state of 0125 * a device 0126 * @dev : Device for which we do this operation (can be a CPU) 0127 * @power : Active power at the performance state 0128 * (modified) 0129 * @freq : Frequency at the performance state in kHz 0130 * (modified) 0131 * 0132 * active_power() must find the lowest performance state of 'dev' above 0133 * 'freq' and update 'power' and 'freq' to the matching active power 0134 * and frequency. 0135 * 0136 * In case of CPUs, the power is the one of a single CPU in the domain, 0137 * expressed in micro-Watts or an abstract scale. It is expected to 0138 * fit in the [0, EM_MAX_POWER] range. 0139 * 0140 * Return 0 on success. 0141 */ 0142 int (*active_power)(struct device *dev, unsigned long *power, 0143 unsigned long *freq); 0144 0145 /** 0146 * get_cost() - Provide the cost at the given performance state of 0147 * a device 0148 * @dev : Device for which we do this operation (can be a CPU) 0149 * @freq : Frequency at the performance state in kHz 0150 * @cost : The cost value for the performance state 0151 * (modified) 0152 * 0153 * In case of CPUs, the cost is the one of a single CPU in the domain. 0154 * It is expected to fit in the [0, EM_MAX_POWER] range due to internal 0155 * usage in EAS calculation. 0156 * 0157 * Return 0 on success, or appropriate error value in case of failure. 0158 */ 0159 int (*get_cost)(struct device *dev, unsigned long freq, 0160 unsigned long *cost); 0161 }; 0162 #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb) 0163 #define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \ 0164 { .active_power = _active_power_cb, \ 0165 .get_cost = _cost_cb } 0166 #define EM_DATA_CB(_active_power_cb) \ 0167 EM_ADV_DATA_CB(_active_power_cb, NULL) 0168 0169 struct em_perf_domain *em_cpu_get(int cpu); 0170 struct em_perf_domain *em_pd_get(struct device *dev); 0171 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, 0172 struct em_data_callback *cb, cpumask_t *span, 0173 bool microwatts); 0174 void em_dev_unregister_perf_domain(struct device *dev); 0175 0176 /** 0177 * em_pd_get_efficient_state() - Get an efficient performance state from the EM 0178 * @pd : Performance domain for which we want an efficient frequency 0179 * @freq : Frequency to map with the EM 0180 * 0181 * It is called from the scheduler code quite frequently and as a consequence 0182 * doesn't implement any check. 0183 * 0184 * Return: An efficient performance state, high enough to meet @freq 0185 * requirement. 0186 */ 0187 static inline 0188 struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd, 0189 unsigned long freq) 0190 { 0191 struct em_perf_state *ps; 0192 int i; 0193 0194 for (i = 0; i < pd->nr_perf_states; i++) { 0195 ps = &pd->table[i]; 0196 if (ps->frequency >= freq) { 0197 if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && 0198 ps->flags & EM_PERF_STATE_INEFFICIENT) 0199 continue; 0200 break; 0201 } 0202 } 0203 0204 return ps; 0205 } 0206 0207 /** 0208 * em_cpu_energy() - Estimates the energy consumed by the CPUs of a 0209 * performance domain 0210 * @pd : performance domain for which energy has to be estimated 0211 * @max_util : highest utilization among CPUs of the domain 0212 * @sum_util : sum of the utilization of all CPUs in the domain 0213 * @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which 0214 * might reflect reduced frequency (due to thermal) 0215 * 0216 * This function must be used only for CPU devices. There is no validation, 0217 * i.e. if the EM is a CPU type and has cpumask allocated. It is called from 0218 * the scheduler code quite frequently and that is why there is not checks. 0219 * 0220 * Return: the sum of the energy consumed by the CPUs of the domain assuming 0221 * a capacity state satisfying the max utilization of the domain. 0222 */ 0223 static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, 0224 unsigned long max_util, unsigned long sum_util, 0225 unsigned long allowed_cpu_cap) 0226 { 0227 unsigned long freq, scale_cpu; 0228 struct em_perf_state *ps; 0229 int cpu; 0230 0231 if (!sum_util) 0232 return 0; 0233 0234 /* 0235 * In order to predict the performance state, map the utilization of 0236 * the most utilized CPU of the performance domain to a requested 0237 * frequency, like schedutil. Take also into account that the real 0238 * frequency might be set lower (due to thermal capping). Thus, clamp 0239 * max utilization to the allowed CPU capacity before calculating 0240 * effective frequency. 0241 */ 0242 cpu = cpumask_first(to_cpumask(pd->cpus)); 0243 scale_cpu = arch_scale_cpu_capacity(cpu); 0244 ps = &pd->table[pd->nr_perf_states - 1]; 0245 0246 max_util = map_util_perf(max_util); 0247 max_util = min(max_util, allowed_cpu_cap); 0248 freq = map_util_freq(max_util, ps->frequency, scale_cpu); 0249 0250 /* 0251 * Find the lowest performance state of the Energy Model above the 0252 * requested frequency. 0253 */ 0254 ps = em_pd_get_efficient_state(pd, freq); 0255 0256 /* 0257 * The capacity of a CPU in the domain at the performance state (ps) 0258 * can be computed as: 0259 * 0260 * ps->freq * scale_cpu 0261 * ps->cap = -------------------- (1) 0262 * cpu_max_freq 0263 * 0264 * So, ignoring the costs of idle states (which are not available in 0265 * the EM), the energy consumed by this CPU at that performance state 0266 * is estimated as: 0267 * 0268 * ps->power * cpu_util 0269 * cpu_nrg = -------------------- (2) 0270 * ps->cap 0271 * 0272 * since 'cpu_util / ps->cap' represents its percentage of busy time. 0273 * 0274 * NOTE: Although the result of this computation actually is in 0275 * units of power, it can be manipulated as an energy value 0276 * over a scheduling period, since it is assumed to be 0277 * constant during that interval. 0278 * 0279 * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product 0280 * of two terms: 0281 * 0282 * ps->power * cpu_max_freq cpu_util 0283 * cpu_nrg = ------------------------ * --------- (3) 0284 * ps->freq scale_cpu 0285 * 0286 * The first term is static, and is stored in the em_perf_state struct 0287 * as 'ps->cost'. 0288 * 0289 * Since all CPUs of the domain have the same micro-architecture, they 0290 * share the same 'ps->cost', and the same CPU capacity. Hence, the 0291 * total energy of the domain (which is the simple sum of the energy of 0292 * all of its CPUs) can be factorized as: 0293 * 0294 * ps->cost * \Sum cpu_util 0295 * pd_nrg = ------------------------ (4) 0296 * scale_cpu 0297 */ 0298 return em_estimate_energy(ps->cost, sum_util, scale_cpu); 0299 } 0300 0301 /** 0302 * em_pd_nr_perf_states() - Get the number of performance states of a perf. 0303 * domain 0304 * @pd : performance domain for which this must be done 0305 * 0306 * Return: the number of performance states in the performance domain table 0307 */ 0308 static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) 0309 { 0310 return pd->nr_perf_states; 0311 } 0312 0313 #else 0314 struct em_data_callback {}; 0315 #define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } 0316 #define EM_DATA_CB(_active_power_cb) { } 0317 #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0) 0318 0319 static inline 0320 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, 0321 struct em_data_callback *cb, cpumask_t *span, 0322 bool microwatts) 0323 { 0324 return -EINVAL; 0325 } 0326 static inline void em_dev_unregister_perf_domain(struct device *dev) 0327 { 0328 } 0329 static inline struct em_perf_domain *em_cpu_get(int cpu) 0330 { 0331 return NULL; 0332 } 0333 static inline struct em_perf_domain *em_pd_get(struct device *dev) 0334 { 0335 return NULL; 0336 } 0337 static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, 0338 unsigned long max_util, unsigned long sum_util, 0339 unsigned long allowed_cpu_cap) 0340 { 0341 return 0; 0342 } 0343 static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) 0344 { 0345 return 0; 0346 } 0347 #endif 0348 0349 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |
![]() ![]() |