![]() |
|
|||
0001 // SPDX-License-Identifier: GPL-2.0 0002 /* 0003 * Per Entity Load Tracking 0004 * 0005 * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 0006 * 0007 * Interactivity improvements by Mike Galbraith 0008 * (C) 2007 Mike Galbraith <efault@gmx.de> 0009 * 0010 * Various enhancements by Dmitry Adamushko. 0011 * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com> 0012 * 0013 * Group scheduling enhancements by Srivatsa Vaddagiri 0014 * Copyright IBM Corporation, 2007 0015 * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> 0016 * 0017 * Scaled math optimizations by Thomas Gleixner 0018 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> 0019 * 0020 * Adaptive scheduling granularity, math enhancements by Peter Zijlstra 0021 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra 0022 * 0023 * Move PELT related code from fair.c into this pelt.c file 0024 * Author: Vincent Guittot <vincent.guittot@linaro.org> 0025 */ 0026 0027 /* 0028 * Approximate: 0029 * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) 0030 */ 0031 static u64 decay_load(u64 val, u64 n) 0032 { 0033 unsigned int local_n; 0034 0035 if (unlikely(n > LOAD_AVG_PERIOD * 63)) 0036 return 0; 0037 0038 /* after bounds checking we can collapse to 32-bit */ 0039 local_n = n; 0040 0041 /* 0042 * As y^PERIOD = 1/2, we can combine 0043 * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD) 0044 * With a look-up table which covers y^n (n<PERIOD) 0045 * 0046 * To achieve constant time decay_load. 0047 */ 0048 if (unlikely(local_n >= LOAD_AVG_PERIOD)) { 0049 val >>= local_n / LOAD_AVG_PERIOD; 0050 local_n %= LOAD_AVG_PERIOD; 0051 } 0052 0053 val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); 0054 return val; 0055 } 0056 0057 static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) 0058 { 0059 u32 c1, c2, c3 = d3; /* y^0 == 1 */ 0060 0061 /* 0062 * c1 = d1 y^p 0063 */ 0064 c1 = decay_load((u64)d1, periods); 0065 0066 /* 0067 * p-1 0068 * c2 = 1024 \Sum y^n 0069 * n=1 0070 * 0071 * inf inf 0072 * = 1024 ( \Sum y^n - \Sum y^n - y^0 ) 0073 * n=0 n=p 0074 */ 0075 c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024; 0076 0077 return c1 + c2 + c3; 0078 } 0079 0080 /* 0081 * Accumulate the three separate parts of the sum; d1 the remainder 0082 * of the last (incomplete) period, d2 the span of full periods and d3 0083 * the remainder of the (incomplete) current period. 0084 * 0085 * d1 d2 d3 0086 * ^ ^ ^ 0087 * | | | 0088 * |<->|<----------------->|<--->| 0089 * ... |---x---|------| ... |------|-----x (now) 0090 * 0091 * p-1 0092 * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0 0093 * n=1 0094 * 0095 * = u y^p + (Step 1) 0096 * 0097 * p-1 0098 * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2) 0099 * n=1 0100 */ 0101 static __always_inline u32 0102 accumulate_sum(u64 delta, struct sched_avg *sa, 0103 unsigned long load, unsigned long runnable, int running) 0104 { 0105 u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ 0106 u64 periods; 0107 0108 delta += sa->period_contrib; 0109 periods = delta / 1024; /* A period is 1024us (~1ms) */ 0110 0111 /* 0112 * Step 1: decay old *_sum if we crossed period boundaries. 0113 */ 0114 if (periods) { 0115 sa->load_sum = decay_load(sa->load_sum, periods); 0116 sa->runnable_sum = 0117 decay_load(sa->runnable_sum, periods); 0118 sa->util_sum = decay_load((u64)(sa->util_sum), periods); 0119 0120 /* 0121 * Step 2 0122 */ 0123 delta %= 1024; 0124 if (load) { 0125 /* 0126 * This relies on the: 0127 * 0128 * if (!load) 0129 * runnable = running = 0; 0130 * 0131 * clause from ___update_load_sum(); this results in 0132 * the below usage of @contrib to disappear entirely, 0133 * so no point in calculating it. 0134 */ 0135 contrib = __accumulate_pelt_segments(periods, 0136 1024 - sa->period_contrib, delta); 0137 } 0138 } 0139 sa->period_contrib = delta; 0140 0141 if (load) 0142 sa->load_sum += load * contrib; 0143 if (runnable) 0144 sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT; 0145 if (running) 0146 sa->util_sum += contrib << SCHED_CAPACITY_SHIFT; 0147 0148 return periods; 0149 } 0150 0151 /* 0152 * We can represent the historical contribution to runnable average as the 0153 * coefficients of a geometric series. To do this we sub-divide our runnable 0154 * history into segments of approximately 1ms (1024us); label the segment that 0155 * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. 0156 * 0157 * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ... 0158 * p0 p1 p2 0159 * (now) (~1ms ago) (~2ms ago) 0160 * 0161 * Let u_i denote the fraction of p_i that the entity was runnable. 0162 * 0163 * We then designate the fractions u_i as our co-efficients, yielding the 0164 * following representation of historical load: 0165 * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ... 0166 * 0167 * We choose y based on the with of a reasonably scheduling period, fixing: 0168 * y^32 = 0.5 0169 * 0170 * This means that the contribution to load ~32ms ago (u_32) will be weighted 0171 * approximately half as much as the contribution to load within the last ms 0172 * (u_0). 0173 * 0174 * When a period "rolls over" and we have new u_0`, multiplying the previous 0175 * sum again by y is sufficient to update: 0176 * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) 0177 * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] 0178 */ 0179 static __always_inline int 0180 ___update_load_sum(u64 now, struct sched_avg *sa, 0181 unsigned long load, unsigned long runnable, int running) 0182 { 0183 u64 delta; 0184 0185 delta = now - sa->last_update_time; 0186 /* 0187 * This should only happen when time goes backwards, which it 0188 * unfortunately does during sched clock init when we swap over to TSC. 0189 */ 0190 if ((s64)delta < 0) { 0191 sa->last_update_time = now; 0192 return 0; 0193 } 0194 0195 /* 0196 * Use 1024ns as the unit of measurement since it's a reasonable 0197 * approximation of 1us and fast to compute. 0198 */ 0199 delta >>= 10; 0200 if (!delta) 0201 return 0; 0202 0203 sa->last_update_time += delta << 10; 0204 0205 /* 0206 * running is a subset of runnable (weight) so running can't be set if 0207 * runnable is clear. But there are some corner cases where the current 0208 * se has been already dequeued but cfs_rq->curr still points to it. 0209 * This means that weight will be 0 but not running for a sched_entity 0210 * but also for a cfs_rq if the latter becomes idle. As an example, 0211 * this happens during idle_balance() which calls 0212 * update_blocked_averages(). 0213 * 0214 * Also see the comment in accumulate_sum(). 0215 */ 0216 if (!load) 0217 runnable = running = 0; 0218 0219 /* 0220 * Now we know we crossed measurement unit boundaries. The *_avg 0221 * accrues by two steps: 0222 * 0223 * Step 1: accumulate *_sum since last_update_time. If we haven't 0224 * crossed period boundaries, finish. 0225 */ 0226 if (!accumulate_sum(delta, sa, load, runnable, running)) 0227 return 0; 0228 0229 return 1; 0230 } 0231 0232 /* 0233 * When syncing *_avg with *_sum, we must take into account the current 0234 * position in the PELT segment otherwise the remaining part of the segment 0235 * will be considered as idle time whereas it's not yet elapsed and this will 0236 * generate unwanted oscillation in the range [1002..1024[. 0237 * 0238 * The max value of *_sum varies with the position in the time segment and is 0239 * equals to : 0240 * 0241 * LOAD_AVG_MAX*y + sa->period_contrib 0242 * 0243 * which can be simplified into: 0244 * 0245 * LOAD_AVG_MAX - 1024 + sa->period_contrib 0246 * 0247 * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024 0248 * 0249 * The same care must be taken when a sched entity is added, updated or 0250 * removed from a cfs_rq and we need to update sched_avg. Scheduler entities 0251 * and the cfs rq, to which they are attached, have the same position in the 0252 * time segment because they use the same clock. This means that we can use 0253 * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity 0254 * if it's more convenient. 0255 */ 0256 static __always_inline void 0257 ___update_load_avg(struct sched_avg *sa, unsigned long load) 0258 { 0259 u32 divider = get_pelt_divider(sa); 0260 0261 /* 0262 * Step 2: update *_avg. 0263 */ 0264 sa->load_avg = div_u64(load * sa->load_sum, divider); 0265 sa->runnable_avg = div_u64(sa->runnable_sum, divider); 0266 WRITE_ONCE(sa->util_avg, sa->util_sum / divider); 0267 } 0268 0269 /* 0270 * sched_entity: 0271 * 0272 * task: 0273 * se_weight() = se->load.weight 0274 * se_runnable() = !!on_rq 0275 * 0276 * group: [ see update_cfs_group() ] 0277 * se_weight() = tg->weight * grq->load_avg / tg->load_avg 0278 * se_runnable() = grq->h_nr_running 0279 * 0280 * runnable_sum = se_runnable() * runnable = grq->runnable_sum 0281 * runnable_avg = runnable_sum 0282 * 0283 * load_sum := runnable 0284 * load_avg = se_weight(se) * load_sum 0285 * 0286 * cfq_rq: 0287 * 0288 * runnable_sum = \Sum se->avg.runnable_sum 0289 * runnable_avg = \Sum se->avg.runnable_avg 0290 * 0291 * load_sum = \Sum se_weight(se) * se->avg.load_sum 0292 * load_avg = \Sum se->avg.load_avg 0293 */ 0294 0295 int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) 0296 { 0297 if (___update_load_sum(now, &se->avg, 0, 0, 0)) { 0298 ___update_load_avg(&se->avg, se_weight(se)); 0299 trace_pelt_se_tp(se); 0300 return 1; 0301 } 0302 0303 return 0; 0304 } 0305 0306 int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) 0307 { 0308 if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se), 0309 cfs_rq->curr == se)) { 0310 0311 ___update_load_avg(&se->avg, se_weight(se)); 0312 cfs_se_util_change(&se->avg); 0313 trace_pelt_se_tp(se); 0314 return 1; 0315 } 0316 0317 return 0; 0318 } 0319 0320 int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) 0321 { 0322 if (___update_load_sum(now, &cfs_rq->avg, 0323 scale_load_down(cfs_rq->load.weight), 0324 cfs_rq->h_nr_running, 0325 cfs_rq->curr != NULL)) { 0326 0327 ___update_load_avg(&cfs_rq->avg, 1); 0328 trace_pelt_cfs_tp(cfs_rq); 0329 return 1; 0330 } 0331 0332 return 0; 0333 } 0334 0335 /* 0336 * rt_rq: 0337 * 0338 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked 0339 * util_sum = cpu_scale * load_sum 0340 * runnable_sum = util_sum 0341 * 0342 * load_avg and runnable_avg are not supported and meaningless. 0343 * 0344 */ 0345 0346 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) 0347 { 0348 if (___update_load_sum(now, &rq->avg_rt, 0349 running, 0350 running, 0351 running)) { 0352 0353 ___update_load_avg(&rq->avg_rt, 1); 0354 trace_pelt_rt_tp(rq); 0355 return 1; 0356 } 0357 0358 return 0; 0359 } 0360 0361 /* 0362 * dl_rq: 0363 * 0364 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked 0365 * util_sum = cpu_scale * load_sum 0366 * runnable_sum = util_sum 0367 * 0368 * load_avg and runnable_avg are not supported and meaningless. 0369 * 0370 */ 0371 0372 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) 0373 { 0374 if (___update_load_sum(now, &rq->avg_dl, 0375 running, 0376 running, 0377 running)) { 0378 0379 ___update_load_avg(&rq->avg_dl, 1); 0380 trace_pelt_dl_tp(rq); 0381 return 1; 0382 } 0383 0384 return 0; 0385 } 0386 0387 #ifdef CONFIG_SCHED_THERMAL_PRESSURE 0388 /* 0389 * thermal: 0390 * 0391 * load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked 0392 * 0393 * util_avg and runnable_load_avg are not supported and meaningless. 0394 * 0395 * Unlike rt/dl utilization tracking that track time spent by a cpu 0396 * running a rt/dl task through util_avg, the average thermal pressure is 0397 * tracked through load_avg. This is because thermal pressure signal is 0398 * time weighted "delta" capacity unlike util_avg which is binary. 0399 * "delta capacity" = actual capacity - 0400 * capped capacity a cpu due to a thermal event. 0401 */ 0402 0403 int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) 0404 { 0405 if (___update_load_sum(now, &rq->avg_thermal, 0406 capacity, 0407 capacity, 0408 capacity)) { 0409 ___update_load_avg(&rq->avg_thermal, 1); 0410 trace_pelt_thermal_tp(rq); 0411 return 1; 0412 } 0413 0414 return 0; 0415 } 0416 #endif 0417 0418 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ 0419 /* 0420 * irq: 0421 * 0422 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked 0423 * util_sum = cpu_scale * load_sum 0424 * runnable_sum = util_sum 0425 * 0426 * load_avg and runnable_avg are not supported and meaningless. 0427 * 0428 */ 0429 0430 int update_irq_load_avg(struct rq *rq, u64 running) 0431 { 0432 int ret = 0; 0433 0434 /* 0435 * We can't use clock_pelt because irq time is not accounted in 0436 * clock_task. Instead we directly scale the running time to 0437 * reflect the real amount of computation 0438 */ 0439 running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); 0440 running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq))); 0441 0442 /* 0443 * We know the time that has been used by interrupt since last update 0444 * but we don't when. Let be pessimistic and assume that interrupt has 0445 * happened just before the update. This is not so far from reality 0446 * because interrupt will most probably wake up task and trig an update 0447 * of rq clock during which the metric is updated. 0448 * We start to decay with normal context time and then we add the 0449 * interrupt context time. 0450 * We can safely remove running from rq->clock because 0451 * rq->clock += delta with delta >= running 0452 */ 0453 ret = ___update_load_sum(rq->clock - running, &rq->avg_irq, 0454 0, 0455 0, 0456 0); 0457 ret += ___update_load_sum(rq->clock, &rq->avg_irq, 0458 1, 0459 1, 0460 1); 0461 0462 if (ret) { 0463 ___update_load_avg(&rq->avg_irq, 1); 0464 trace_pelt_irq_tp(rq); 0465 } 0466 0467 return ret; 0468 } 0469 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |
![]() ![]() |