0001
0002
0003
0004
0005
0006
0007 int sched_rr_timeslice = RR_TIMESLICE;
0008
0009 static const u64 max_rt_runtime = MAX_BW;
0010
0011 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
0012
0013 struct rt_bandwidth def_rt_bandwidth;
0014
0015
0016
0017
0018
0019 unsigned int sysctl_sched_rt_period = 1000000;
0020
0021
0022
0023
0024
0025 int sysctl_sched_rt_runtime = 950000;
0026
0027 #ifdef CONFIG_SYSCTL
0028 static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
0029 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
0030 size_t *lenp, loff_t *ppos);
0031 static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
0032 size_t *lenp, loff_t *ppos);
0033 static struct ctl_table sched_rt_sysctls[] = {
0034 {
0035 .procname = "sched_rt_period_us",
0036 .data = &sysctl_sched_rt_period,
0037 .maxlen = sizeof(unsigned int),
0038 .mode = 0644,
0039 .proc_handler = sched_rt_handler,
0040 },
0041 {
0042 .procname = "sched_rt_runtime_us",
0043 .data = &sysctl_sched_rt_runtime,
0044 .maxlen = sizeof(int),
0045 .mode = 0644,
0046 .proc_handler = sched_rt_handler,
0047 },
0048 {
0049 .procname = "sched_rr_timeslice_ms",
0050 .data = &sysctl_sched_rr_timeslice,
0051 .maxlen = sizeof(int),
0052 .mode = 0644,
0053 .proc_handler = sched_rr_handler,
0054 },
0055 {}
0056 };
0057
0058 static int __init sched_rt_sysctl_init(void)
0059 {
0060 register_sysctl_init("kernel", sched_rt_sysctls);
0061 return 0;
0062 }
0063 late_initcall(sched_rt_sysctl_init);
0064 #endif
0065
0066 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
0067 {
0068 struct rt_bandwidth *rt_b =
0069 container_of(timer, struct rt_bandwidth, rt_period_timer);
0070 int idle = 0;
0071 int overrun;
0072
0073 raw_spin_lock(&rt_b->rt_runtime_lock);
0074 for (;;) {
0075 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
0076 if (!overrun)
0077 break;
0078
0079 raw_spin_unlock(&rt_b->rt_runtime_lock);
0080 idle = do_sched_rt_period_timer(rt_b, overrun);
0081 raw_spin_lock(&rt_b->rt_runtime_lock);
0082 }
0083 if (idle)
0084 rt_b->rt_period_active = 0;
0085 raw_spin_unlock(&rt_b->rt_runtime_lock);
0086
0087 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
0088 }
0089
0090 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
0091 {
0092 rt_b->rt_period = ns_to_ktime(period);
0093 rt_b->rt_runtime = runtime;
0094
0095 raw_spin_lock_init(&rt_b->rt_runtime_lock);
0096
0097 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
0098 HRTIMER_MODE_REL_HARD);
0099 rt_b->rt_period_timer.function = sched_rt_period_timer;
0100 }
0101
0102 static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
0103 {
0104 raw_spin_lock(&rt_b->rt_runtime_lock);
0105 if (!rt_b->rt_period_active) {
0106 rt_b->rt_period_active = 1;
0107
0108
0109
0110
0111
0112
0113
0114
0115 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
0116 hrtimer_start_expires(&rt_b->rt_period_timer,
0117 HRTIMER_MODE_ABS_PINNED_HARD);
0118 }
0119 raw_spin_unlock(&rt_b->rt_runtime_lock);
0120 }
0121
0122 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
0123 {
0124 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
0125 return;
0126
0127 do_start_rt_bandwidth(rt_b);
0128 }
0129
0130 void init_rt_rq(struct rt_rq *rt_rq)
0131 {
0132 struct rt_prio_array *array;
0133 int i;
0134
0135 array = &rt_rq->active;
0136 for (i = 0; i < MAX_RT_PRIO; i++) {
0137 INIT_LIST_HEAD(array->queue + i);
0138 __clear_bit(i, array->bitmap);
0139 }
0140
0141 __set_bit(MAX_RT_PRIO, array->bitmap);
0142
0143 #if defined CONFIG_SMP
0144 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
0145 rt_rq->highest_prio.next = MAX_RT_PRIO-1;
0146 rt_rq->rt_nr_migratory = 0;
0147 rt_rq->overloaded = 0;
0148 plist_head_init(&rt_rq->pushable_tasks);
0149 #endif
0150
0151 rt_rq->rt_queued = 0;
0152
0153 rt_rq->rt_time = 0;
0154 rt_rq->rt_throttled = 0;
0155 rt_rq->rt_runtime = 0;
0156 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
0157 }
0158
0159 #ifdef CONFIG_RT_GROUP_SCHED
0160 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
0161 {
0162 hrtimer_cancel(&rt_b->rt_period_timer);
0163 }
0164
0165 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
0166
0167 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
0168 {
0169 #ifdef CONFIG_SCHED_DEBUG
0170 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
0171 #endif
0172 return container_of(rt_se, struct task_struct, rt);
0173 }
0174
0175 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
0176 {
0177 return rt_rq->rq;
0178 }
0179
0180 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
0181 {
0182 return rt_se->rt_rq;
0183 }
0184
0185 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
0186 {
0187 struct rt_rq *rt_rq = rt_se->rt_rq;
0188
0189 return rt_rq->rq;
0190 }
0191
0192 void unregister_rt_sched_group(struct task_group *tg)
0193 {
0194 if (tg->rt_se)
0195 destroy_rt_bandwidth(&tg->rt_bandwidth);
0196
0197 }
0198
0199 void free_rt_sched_group(struct task_group *tg)
0200 {
0201 int i;
0202
0203 for_each_possible_cpu(i) {
0204 if (tg->rt_rq)
0205 kfree(tg->rt_rq[i]);
0206 if (tg->rt_se)
0207 kfree(tg->rt_se[i]);
0208 }
0209
0210 kfree(tg->rt_rq);
0211 kfree(tg->rt_se);
0212 }
0213
0214 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
0215 struct sched_rt_entity *rt_se, int cpu,
0216 struct sched_rt_entity *parent)
0217 {
0218 struct rq *rq = cpu_rq(cpu);
0219
0220 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
0221 rt_rq->rt_nr_boosted = 0;
0222 rt_rq->rq = rq;
0223 rt_rq->tg = tg;
0224
0225 tg->rt_rq[cpu] = rt_rq;
0226 tg->rt_se[cpu] = rt_se;
0227
0228 if (!rt_se)
0229 return;
0230
0231 if (!parent)
0232 rt_se->rt_rq = &rq->rt;
0233 else
0234 rt_se->rt_rq = parent->my_q;
0235
0236 rt_se->my_q = rt_rq;
0237 rt_se->parent = parent;
0238 INIT_LIST_HEAD(&rt_se->run_list);
0239 }
0240
0241 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
0242 {
0243 struct rt_rq *rt_rq;
0244 struct sched_rt_entity *rt_se;
0245 int i;
0246
0247 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
0248 if (!tg->rt_rq)
0249 goto err;
0250 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
0251 if (!tg->rt_se)
0252 goto err;
0253
0254 init_rt_bandwidth(&tg->rt_bandwidth,
0255 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
0256
0257 for_each_possible_cpu(i) {
0258 rt_rq = kzalloc_node(sizeof(struct rt_rq),
0259 GFP_KERNEL, cpu_to_node(i));
0260 if (!rt_rq)
0261 goto err;
0262
0263 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
0264 GFP_KERNEL, cpu_to_node(i));
0265 if (!rt_se)
0266 goto err_free_rq;
0267
0268 init_rt_rq(rt_rq);
0269 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
0270 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
0271 }
0272
0273 return 1;
0274
0275 err_free_rq:
0276 kfree(rt_rq);
0277 err:
0278 return 0;
0279 }
0280
0281 #else
0282
0283 #define rt_entity_is_task(rt_se) (1)
0284
0285 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
0286 {
0287 return container_of(rt_se, struct task_struct, rt);
0288 }
0289
0290 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
0291 {
0292 return container_of(rt_rq, struct rq, rt);
0293 }
0294
0295 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
0296 {
0297 struct task_struct *p = rt_task_of(rt_se);
0298
0299 return task_rq(p);
0300 }
0301
0302 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
0303 {
0304 struct rq *rq = rq_of_rt_se(rt_se);
0305
0306 return &rq->rt;
0307 }
0308
0309 void unregister_rt_sched_group(struct task_group *tg) { }
0310
0311 void free_rt_sched_group(struct task_group *tg) { }
0312
0313 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
0314 {
0315 return 1;
0316 }
0317 #endif
0318
0319 #ifdef CONFIG_SMP
0320
0321 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
0322 {
0323
0324 return rq->online && rq->rt.highest_prio.curr > prev->prio;
0325 }
0326
0327 static inline int rt_overloaded(struct rq *rq)
0328 {
0329 return atomic_read(&rq->rd->rto_count);
0330 }
0331
0332 static inline void rt_set_overload(struct rq *rq)
0333 {
0334 if (!rq->online)
0335 return;
0336
0337 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347 smp_wmb();
0348 atomic_inc(&rq->rd->rto_count);
0349 }
0350
0351 static inline void rt_clear_overload(struct rq *rq)
0352 {
0353 if (!rq->online)
0354 return;
0355
0356
0357 atomic_dec(&rq->rd->rto_count);
0358 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
0359 }
0360
0361 static void update_rt_migration(struct rt_rq *rt_rq)
0362 {
0363 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
0364 if (!rt_rq->overloaded) {
0365 rt_set_overload(rq_of_rt_rq(rt_rq));
0366 rt_rq->overloaded = 1;
0367 }
0368 } else if (rt_rq->overloaded) {
0369 rt_clear_overload(rq_of_rt_rq(rt_rq));
0370 rt_rq->overloaded = 0;
0371 }
0372 }
0373
0374 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
0375 {
0376 struct task_struct *p;
0377
0378 if (!rt_entity_is_task(rt_se))
0379 return;
0380
0381 p = rt_task_of(rt_se);
0382 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
0383
0384 rt_rq->rt_nr_total++;
0385 if (p->nr_cpus_allowed > 1)
0386 rt_rq->rt_nr_migratory++;
0387
0388 update_rt_migration(rt_rq);
0389 }
0390
0391 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
0392 {
0393 struct task_struct *p;
0394
0395 if (!rt_entity_is_task(rt_se))
0396 return;
0397
0398 p = rt_task_of(rt_se);
0399 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
0400
0401 rt_rq->rt_nr_total--;
0402 if (p->nr_cpus_allowed > 1)
0403 rt_rq->rt_nr_migratory--;
0404
0405 update_rt_migration(rt_rq);
0406 }
0407
0408 static inline int has_pushable_tasks(struct rq *rq)
0409 {
0410 return !plist_head_empty(&rq->rt.pushable_tasks);
0411 }
0412
0413 static DEFINE_PER_CPU(struct callback_head, rt_push_head);
0414 static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
0415
0416 static void push_rt_tasks(struct rq *);
0417 static void pull_rt_task(struct rq *);
0418
0419 static inline void rt_queue_push_tasks(struct rq *rq)
0420 {
0421 if (!has_pushable_tasks(rq))
0422 return;
0423
0424 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
0425 }
0426
0427 static inline void rt_queue_pull_task(struct rq *rq)
0428 {
0429 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
0430 }
0431
0432 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
0433 {
0434 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
0435 plist_node_init(&p->pushable_tasks, p->prio);
0436 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
0437
0438
0439 if (p->prio < rq->rt.highest_prio.next)
0440 rq->rt.highest_prio.next = p->prio;
0441 }
0442
0443 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
0444 {
0445 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
0446
0447
0448 if (has_pushable_tasks(rq)) {
0449 p = plist_first_entry(&rq->rt.pushable_tasks,
0450 struct task_struct, pushable_tasks);
0451 rq->rt.highest_prio.next = p->prio;
0452 } else {
0453 rq->rt.highest_prio.next = MAX_RT_PRIO-1;
0454 }
0455 }
0456
0457 #else
0458
0459 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
0460 {
0461 }
0462
0463 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
0464 {
0465 }
0466
0467 static inline
0468 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
0469 {
0470 }
0471
0472 static inline
0473 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
0474 {
0475 }
0476
0477 static inline void rt_queue_push_tasks(struct rq *rq)
0478 {
0479 }
0480 #endif
0481
0482 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
0483 static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
0484
0485 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
0486 {
0487 return rt_se->on_rq;
0488 }
0489
0490 #ifdef CONFIG_UCLAMP_TASK
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
0506 {
0507 unsigned int min_cap;
0508 unsigned int max_cap;
0509 unsigned int cpu_cap;
0510
0511
0512 if (!static_branch_unlikely(&sched_asym_cpucapacity))
0513 return true;
0514
0515 min_cap = uclamp_eff_value(p, UCLAMP_MIN);
0516 max_cap = uclamp_eff_value(p, UCLAMP_MAX);
0517
0518 cpu_cap = capacity_orig_of(cpu);
0519
0520 return cpu_cap >= min(min_cap, max_cap);
0521 }
0522 #else
0523 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
0524 {
0525 return true;
0526 }
0527 #endif
0528
0529 #ifdef CONFIG_RT_GROUP_SCHED
0530
0531 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
0532 {
0533 if (!rt_rq->tg)
0534 return RUNTIME_INF;
0535
0536 return rt_rq->rt_runtime;
0537 }
0538
0539 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
0540 {
0541 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
0542 }
0543
0544 typedef struct task_group *rt_rq_iter_t;
0545
0546 static inline struct task_group *next_task_group(struct task_group *tg)
0547 {
0548 do {
0549 tg = list_entry_rcu(tg->list.next,
0550 typeof(struct task_group), list);
0551 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
0552
0553 if (&tg->list == &task_groups)
0554 tg = NULL;
0555
0556 return tg;
0557 }
0558
0559 #define for_each_rt_rq(rt_rq, iter, rq) \
0560 for (iter = container_of(&task_groups, typeof(*iter), list); \
0561 (iter = next_task_group(iter)) && \
0562 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
0563
0564 #define for_each_sched_rt_entity(rt_se) \
0565 for (; rt_se; rt_se = rt_se->parent)
0566
0567 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
0568 {
0569 return rt_se->my_q;
0570 }
0571
0572 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
0573 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
0574
0575 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
0576 {
0577 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
0578 struct rq *rq = rq_of_rt_rq(rt_rq);
0579 struct sched_rt_entity *rt_se;
0580
0581 int cpu = cpu_of(rq);
0582
0583 rt_se = rt_rq->tg->rt_se[cpu];
0584
0585 if (rt_rq->rt_nr_running) {
0586 if (!rt_se)
0587 enqueue_top_rt_rq(rt_rq);
0588 else if (!on_rt_rq(rt_se))
0589 enqueue_rt_entity(rt_se, 0);
0590
0591 if (rt_rq->highest_prio.curr < curr->prio)
0592 resched_curr(rq);
0593 }
0594 }
0595
0596 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
0597 {
0598 struct sched_rt_entity *rt_se;
0599 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
0600
0601 rt_se = rt_rq->tg->rt_se[cpu];
0602
0603 if (!rt_se) {
0604 dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
0605
0606 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
0607 }
0608 else if (on_rt_rq(rt_se))
0609 dequeue_rt_entity(rt_se, 0);
0610 }
0611
0612 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
0613 {
0614 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
0615 }
0616
0617 static int rt_se_boosted(struct sched_rt_entity *rt_se)
0618 {
0619 struct rt_rq *rt_rq = group_rt_rq(rt_se);
0620 struct task_struct *p;
0621
0622 if (rt_rq)
0623 return !!rt_rq->rt_nr_boosted;
0624
0625 p = rt_task_of(rt_se);
0626 return p->prio != p->normal_prio;
0627 }
0628
0629 #ifdef CONFIG_SMP
0630 static inline const struct cpumask *sched_rt_period_mask(void)
0631 {
0632 return this_rq()->rd->span;
0633 }
0634 #else
0635 static inline const struct cpumask *sched_rt_period_mask(void)
0636 {
0637 return cpu_online_mask;
0638 }
0639 #endif
0640
0641 static inline
0642 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
0643 {
0644 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
0645 }
0646
0647 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
0648 {
0649 return &rt_rq->tg->rt_bandwidth;
0650 }
0651
0652 #else
0653
0654 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
0655 {
0656 return rt_rq->rt_runtime;
0657 }
0658
0659 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
0660 {
0661 return ktime_to_ns(def_rt_bandwidth.rt_period);
0662 }
0663
0664 typedef struct rt_rq *rt_rq_iter_t;
0665
0666 #define for_each_rt_rq(rt_rq, iter, rq) \
0667 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
0668
0669 #define for_each_sched_rt_entity(rt_se) \
0670 for (; rt_se; rt_se = NULL)
0671
0672 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
0673 {
0674 return NULL;
0675 }
0676
0677 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
0678 {
0679 struct rq *rq = rq_of_rt_rq(rt_rq);
0680
0681 if (!rt_rq->rt_nr_running)
0682 return;
0683
0684 enqueue_top_rt_rq(rt_rq);
0685 resched_curr(rq);
0686 }
0687
0688 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
0689 {
0690 dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
0691 }
0692
0693 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
0694 {
0695 return rt_rq->rt_throttled;
0696 }
0697
0698 static inline const struct cpumask *sched_rt_period_mask(void)
0699 {
0700 return cpu_online_mask;
0701 }
0702
0703 static inline
0704 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
0705 {
0706 return &cpu_rq(cpu)->rt;
0707 }
0708
0709 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
0710 {
0711 return &def_rt_bandwidth;
0712 }
0713
0714 #endif
0715
0716 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
0717 {
0718 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
0719
0720 return (hrtimer_active(&rt_b->rt_period_timer) ||
0721 rt_rq->rt_time < rt_b->rt_runtime);
0722 }
0723
0724 #ifdef CONFIG_SMP
0725
0726
0727
0728 static void do_balance_runtime(struct rt_rq *rt_rq)
0729 {
0730 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
0731 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
0732 int i, weight;
0733 u64 rt_period;
0734
0735 weight = cpumask_weight(rd->span);
0736
0737 raw_spin_lock(&rt_b->rt_runtime_lock);
0738 rt_period = ktime_to_ns(rt_b->rt_period);
0739 for_each_cpu(i, rd->span) {
0740 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
0741 s64 diff;
0742
0743 if (iter == rt_rq)
0744 continue;
0745
0746 raw_spin_lock(&iter->rt_runtime_lock);
0747
0748
0749
0750
0751
0752 if (iter->rt_runtime == RUNTIME_INF)
0753 goto next;
0754
0755
0756
0757
0758
0759 diff = iter->rt_runtime - iter->rt_time;
0760 if (diff > 0) {
0761 diff = div_u64((u64)diff, weight);
0762 if (rt_rq->rt_runtime + diff > rt_period)
0763 diff = rt_period - rt_rq->rt_runtime;
0764 iter->rt_runtime -= diff;
0765 rt_rq->rt_runtime += diff;
0766 if (rt_rq->rt_runtime == rt_period) {
0767 raw_spin_unlock(&iter->rt_runtime_lock);
0768 break;
0769 }
0770 }
0771 next:
0772 raw_spin_unlock(&iter->rt_runtime_lock);
0773 }
0774 raw_spin_unlock(&rt_b->rt_runtime_lock);
0775 }
0776
0777
0778
0779
0780 static void __disable_runtime(struct rq *rq)
0781 {
0782 struct root_domain *rd = rq->rd;
0783 rt_rq_iter_t iter;
0784 struct rt_rq *rt_rq;
0785
0786 if (unlikely(!scheduler_running))
0787 return;
0788
0789 for_each_rt_rq(rt_rq, iter, rq) {
0790 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
0791 s64 want;
0792 int i;
0793
0794 raw_spin_lock(&rt_b->rt_runtime_lock);
0795 raw_spin_lock(&rt_rq->rt_runtime_lock);
0796
0797
0798
0799
0800
0801 if (rt_rq->rt_runtime == RUNTIME_INF ||
0802 rt_rq->rt_runtime == rt_b->rt_runtime)
0803 goto balanced;
0804 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0805
0806
0807
0808
0809
0810
0811 want = rt_b->rt_runtime - rt_rq->rt_runtime;
0812
0813
0814
0815
0816 for_each_cpu(i, rd->span) {
0817 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
0818 s64 diff;
0819
0820
0821
0822
0823 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
0824 continue;
0825
0826 raw_spin_lock(&iter->rt_runtime_lock);
0827 if (want > 0) {
0828 diff = min_t(s64, iter->rt_runtime, want);
0829 iter->rt_runtime -= diff;
0830 want -= diff;
0831 } else {
0832 iter->rt_runtime -= want;
0833 want -= want;
0834 }
0835 raw_spin_unlock(&iter->rt_runtime_lock);
0836
0837 if (!want)
0838 break;
0839 }
0840
0841 raw_spin_lock(&rt_rq->rt_runtime_lock);
0842
0843
0844
0845
0846 BUG_ON(want);
0847 balanced:
0848
0849
0850
0851
0852 rt_rq->rt_runtime = RUNTIME_INF;
0853 rt_rq->rt_throttled = 0;
0854 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0855 raw_spin_unlock(&rt_b->rt_runtime_lock);
0856
0857
0858 sched_rt_rq_enqueue(rt_rq);
0859 }
0860 }
0861
0862 static void __enable_runtime(struct rq *rq)
0863 {
0864 rt_rq_iter_t iter;
0865 struct rt_rq *rt_rq;
0866
0867 if (unlikely(!scheduler_running))
0868 return;
0869
0870
0871
0872
0873 for_each_rt_rq(rt_rq, iter, rq) {
0874 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
0875
0876 raw_spin_lock(&rt_b->rt_runtime_lock);
0877 raw_spin_lock(&rt_rq->rt_runtime_lock);
0878 rt_rq->rt_runtime = rt_b->rt_runtime;
0879 rt_rq->rt_time = 0;
0880 rt_rq->rt_throttled = 0;
0881 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0882 raw_spin_unlock(&rt_b->rt_runtime_lock);
0883 }
0884 }
0885
0886 static void balance_runtime(struct rt_rq *rt_rq)
0887 {
0888 if (!sched_feat(RT_RUNTIME_SHARE))
0889 return;
0890
0891 if (rt_rq->rt_time > rt_rq->rt_runtime) {
0892 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0893 do_balance_runtime(rt_rq);
0894 raw_spin_lock(&rt_rq->rt_runtime_lock);
0895 }
0896 }
0897 #else
0898 static inline void balance_runtime(struct rt_rq *rt_rq) {}
0899 #endif
0900
0901 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
0902 {
0903 int i, idle = 1, throttled = 0;
0904 const struct cpumask *span;
0905
0906 span = sched_rt_period_mask();
0907 #ifdef CONFIG_RT_GROUP_SCHED
0908
0909
0910
0911
0912
0913
0914
0915
0916
0917 if (rt_b == &root_task_group.rt_bandwidth)
0918 span = cpu_online_mask;
0919 #endif
0920 for_each_cpu(i, span) {
0921 int enqueue = 0;
0922 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
0923 struct rq *rq = rq_of_rt_rq(rt_rq);
0924 struct rq_flags rf;
0925 int skip;
0926
0927
0928
0929
0930
0931 raw_spin_lock(&rt_rq->rt_runtime_lock);
0932 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
0933 rt_rq->rt_runtime = rt_b->rt_runtime;
0934 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
0935 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0936 if (skip)
0937 continue;
0938
0939 rq_lock(rq, &rf);
0940 update_rq_clock(rq);
0941
0942 if (rt_rq->rt_time) {
0943 u64 runtime;
0944
0945 raw_spin_lock(&rt_rq->rt_runtime_lock);
0946 if (rt_rq->rt_throttled)
0947 balance_runtime(rt_rq);
0948 runtime = rt_rq->rt_runtime;
0949 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
0950 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
0951 rt_rq->rt_throttled = 0;
0952 enqueue = 1;
0953
0954
0955
0956
0957
0958
0959
0960
0961 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
0962 rq_clock_cancel_skipupdate(rq);
0963 }
0964 if (rt_rq->rt_time || rt_rq->rt_nr_running)
0965 idle = 0;
0966 raw_spin_unlock(&rt_rq->rt_runtime_lock);
0967 } else if (rt_rq->rt_nr_running) {
0968 idle = 0;
0969 if (!rt_rq_throttled(rt_rq))
0970 enqueue = 1;
0971 }
0972 if (rt_rq->rt_throttled)
0973 throttled = 1;
0974
0975 if (enqueue)
0976 sched_rt_rq_enqueue(rt_rq);
0977 rq_unlock(rq, &rf);
0978 }
0979
0980 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
0981 return 1;
0982
0983 return idle;
0984 }
0985
0986 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
0987 {
0988 #ifdef CONFIG_RT_GROUP_SCHED
0989 struct rt_rq *rt_rq = group_rt_rq(rt_se);
0990
0991 if (rt_rq)
0992 return rt_rq->highest_prio.curr;
0993 #endif
0994
0995 return rt_task_of(rt_se)->prio;
0996 }
0997
0998 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
0999 {
1000 u64 runtime = sched_rt_runtime(rt_rq);
1001
1002 if (rt_rq->rt_throttled)
1003 return rt_rq_throttled(rt_rq);
1004
1005 if (runtime >= sched_rt_period(rt_rq))
1006 return 0;
1007
1008 balance_runtime(rt_rq);
1009 runtime = sched_rt_runtime(rt_rq);
1010 if (runtime == RUNTIME_INF)
1011 return 0;
1012
1013 if (rt_rq->rt_time > runtime) {
1014 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
1015
1016
1017
1018
1019
1020 if (likely(rt_b->rt_runtime)) {
1021 rt_rq->rt_throttled = 1;
1022 printk_deferred_once("sched: RT throttling activated\n");
1023 } else {
1024
1025
1026
1027
1028
1029 rt_rq->rt_time = 0;
1030 }
1031
1032 if (rt_rq_throttled(rt_rq)) {
1033 sched_rt_rq_dequeue(rt_rq);
1034 return 1;
1035 }
1036 }
1037
1038 return 0;
1039 }
1040
1041
1042
1043
1044
1045 static void update_curr_rt(struct rq *rq)
1046 {
1047 struct task_struct *curr = rq->curr;
1048 struct sched_rt_entity *rt_se = &curr->rt;
1049 u64 delta_exec;
1050 u64 now;
1051
1052 if (curr->sched_class != &rt_sched_class)
1053 return;
1054
1055 now = rq_clock_task(rq);
1056 delta_exec = now - curr->se.exec_start;
1057 if (unlikely((s64)delta_exec <= 0))
1058 return;
1059
1060 schedstat_set(curr->stats.exec_max,
1061 max(curr->stats.exec_max, delta_exec));
1062
1063 trace_sched_stat_runtime(curr, delta_exec, 0);
1064
1065 curr->se.sum_exec_runtime += delta_exec;
1066 account_group_exec_runtime(curr, delta_exec);
1067
1068 curr->se.exec_start = now;
1069 cgroup_account_cputime(curr, delta_exec);
1070
1071 if (!rt_bandwidth_enabled())
1072 return;
1073
1074 for_each_sched_rt_entity(rt_se) {
1075 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1076 int exceeded;
1077
1078 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
1079 raw_spin_lock(&rt_rq->rt_runtime_lock);
1080 rt_rq->rt_time += delta_exec;
1081 exceeded = sched_rt_runtime_exceeded(rt_rq);
1082 if (exceeded)
1083 resched_curr(rq);
1084 raw_spin_unlock(&rt_rq->rt_runtime_lock);
1085 if (exceeded)
1086 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
1087 }
1088 }
1089 }
1090
1091 static void
1092 dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
1093 {
1094 struct rq *rq = rq_of_rt_rq(rt_rq);
1095
1096 BUG_ON(&rq->rt != rt_rq);
1097
1098 if (!rt_rq->rt_queued)
1099 return;
1100
1101 BUG_ON(!rq->nr_running);
1102
1103 sub_nr_running(rq, count);
1104 rt_rq->rt_queued = 0;
1105
1106 }
1107
1108 static void
1109 enqueue_top_rt_rq(struct rt_rq *rt_rq)
1110 {
1111 struct rq *rq = rq_of_rt_rq(rt_rq);
1112
1113 BUG_ON(&rq->rt != rt_rq);
1114
1115 if (rt_rq->rt_queued)
1116 return;
1117
1118 if (rt_rq_throttled(rt_rq))
1119 return;
1120
1121 if (rt_rq->rt_nr_running) {
1122 add_nr_running(rq, rt_rq->rt_nr_running);
1123 rt_rq->rt_queued = 1;
1124 }
1125
1126
1127 cpufreq_update_util(rq, 0);
1128 }
1129
1130 #if defined CONFIG_SMP
1131
1132 static void
1133 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1134 {
1135 struct rq *rq = rq_of_rt_rq(rt_rq);
1136
1137 #ifdef CONFIG_RT_GROUP_SCHED
1138
1139
1140
1141 if (&rq->rt != rt_rq)
1142 return;
1143 #endif
1144 if (rq->online && prio < prev_prio)
1145 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
1146 }
1147
1148 static void
1149 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1150 {
1151 struct rq *rq = rq_of_rt_rq(rt_rq);
1152
1153 #ifdef CONFIG_RT_GROUP_SCHED
1154
1155
1156
1157 if (&rq->rt != rt_rq)
1158 return;
1159 #endif
1160 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1161 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1162 }
1163
1164 #else
1165
1166 static inline
1167 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1168 static inline
1169 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1170
1171 #endif
1172
1173 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
1174 static void
1175 inc_rt_prio(struct rt_rq *rt_rq, int prio)
1176 {
1177 int prev_prio = rt_rq->highest_prio.curr;
1178
1179 if (prio < prev_prio)
1180 rt_rq->highest_prio.curr = prio;
1181
1182 inc_rt_prio_smp(rt_rq, prio, prev_prio);
1183 }
1184
1185 static void
1186 dec_rt_prio(struct rt_rq *rt_rq, int prio)
1187 {
1188 int prev_prio = rt_rq->highest_prio.curr;
1189
1190 if (rt_rq->rt_nr_running) {
1191
1192 WARN_ON(prio < prev_prio);
1193
1194
1195
1196
1197
1198 if (prio == prev_prio) {
1199 struct rt_prio_array *array = &rt_rq->active;
1200
1201 rt_rq->highest_prio.curr =
1202 sched_find_first_bit(array->bitmap);
1203 }
1204
1205 } else {
1206 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
1207 }
1208
1209 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1210 }
1211
1212 #else
1213
1214 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1215 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1216
1217 #endif
1218
1219 #ifdef CONFIG_RT_GROUP_SCHED
1220
1221 static void
1222 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1223 {
1224 if (rt_se_boosted(rt_se))
1225 rt_rq->rt_nr_boosted++;
1226
1227 if (rt_rq->tg)
1228 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1229 }
1230
1231 static void
1232 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1233 {
1234 if (rt_se_boosted(rt_se))
1235 rt_rq->rt_nr_boosted--;
1236
1237 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1238 }
1239
1240 #else
1241
1242 static void
1243 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1244 {
1245 start_rt_bandwidth(&def_rt_bandwidth);
1246 }
1247
1248 static inline
1249 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1250
1251 #endif
1252
1253 static inline
1254 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1255 {
1256 struct rt_rq *group_rq = group_rt_rq(rt_se);
1257
1258 if (group_rq)
1259 return group_rq->rt_nr_running;
1260 else
1261 return 1;
1262 }
1263
1264 static inline
1265 unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1266 {
1267 struct rt_rq *group_rq = group_rt_rq(rt_se);
1268 struct task_struct *tsk;
1269
1270 if (group_rq)
1271 return group_rq->rr_nr_running;
1272
1273 tsk = rt_task_of(rt_se);
1274
1275 return (tsk->policy == SCHED_RR) ? 1 : 0;
1276 }
1277
1278 static inline
1279 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1280 {
1281 int prio = rt_se_prio(rt_se);
1282
1283 WARN_ON(!rt_prio(prio));
1284 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1285 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1286
1287 inc_rt_prio(rt_rq, prio);
1288 inc_rt_migration(rt_se, rt_rq);
1289 inc_rt_group(rt_se, rt_rq);
1290 }
1291
1292 static inline
1293 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1294 {
1295 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1296 WARN_ON(!rt_rq->rt_nr_running);
1297 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1298 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1299
1300 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1301 dec_rt_migration(rt_se, rt_rq);
1302 dec_rt_group(rt_se, rt_rq);
1303 }
1304
1305
1306
1307
1308
1309
1310 static inline bool move_entity(unsigned int flags)
1311 {
1312 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1313 return false;
1314
1315 return true;
1316 }
1317
1318 static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1319 {
1320 list_del_init(&rt_se->run_list);
1321
1322 if (list_empty(array->queue + rt_se_prio(rt_se)))
1323 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1324
1325 rt_se->on_list = 0;
1326 }
1327
1328 static inline struct sched_statistics *
1329 __schedstats_from_rt_se(struct sched_rt_entity *rt_se)
1330 {
1331 #ifdef CONFIG_RT_GROUP_SCHED
1332
1333 if (!rt_entity_is_task(rt_se))
1334 return NULL;
1335 #endif
1336
1337 return &rt_task_of(rt_se)->stats;
1338 }
1339
1340 static inline void
1341 update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1342 {
1343 struct sched_statistics *stats;
1344 struct task_struct *p = NULL;
1345
1346 if (!schedstat_enabled())
1347 return;
1348
1349 if (rt_entity_is_task(rt_se))
1350 p = rt_task_of(rt_se);
1351
1352 stats = __schedstats_from_rt_se(rt_se);
1353 if (!stats)
1354 return;
1355
1356 __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats);
1357 }
1358
1359 static inline void
1360 update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1361 {
1362 struct sched_statistics *stats;
1363 struct task_struct *p = NULL;
1364
1365 if (!schedstat_enabled())
1366 return;
1367
1368 if (rt_entity_is_task(rt_se))
1369 p = rt_task_of(rt_se);
1370
1371 stats = __schedstats_from_rt_se(rt_se);
1372 if (!stats)
1373 return;
1374
1375 __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats);
1376 }
1377
1378 static inline void
1379 update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
1380 int flags)
1381 {
1382 if (!schedstat_enabled())
1383 return;
1384
1385 if (flags & ENQUEUE_WAKEUP)
1386 update_stats_enqueue_sleeper_rt(rt_rq, rt_se);
1387 }
1388
1389 static inline void
1390 update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1391 {
1392 struct sched_statistics *stats;
1393 struct task_struct *p = NULL;
1394
1395 if (!schedstat_enabled())
1396 return;
1397
1398 if (rt_entity_is_task(rt_se))
1399 p = rt_task_of(rt_se);
1400
1401 stats = __schedstats_from_rt_se(rt_se);
1402 if (!stats)
1403 return;
1404
1405 __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats);
1406 }
1407
1408 static inline void
1409 update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
1410 int flags)
1411 {
1412 struct task_struct *p = NULL;
1413
1414 if (!schedstat_enabled())
1415 return;
1416
1417 if (rt_entity_is_task(rt_se))
1418 p = rt_task_of(rt_se);
1419
1420 if ((flags & DEQUEUE_SLEEP) && p) {
1421 unsigned int state;
1422
1423 state = READ_ONCE(p->__state);
1424 if (state & TASK_INTERRUPTIBLE)
1425 __schedstat_set(p->stats.sleep_start,
1426 rq_clock(rq_of_rt_rq(rt_rq)));
1427
1428 if (state & TASK_UNINTERRUPTIBLE)
1429 __schedstat_set(p->stats.block_start,
1430 rq_clock(rq_of_rt_rq(rt_rq)));
1431 }
1432 }
1433
1434 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1435 {
1436 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1437 struct rt_prio_array *array = &rt_rq->active;
1438 struct rt_rq *group_rq = group_rt_rq(rt_se);
1439 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1440
1441
1442
1443
1444
1445
1446
1447 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1448 if (rt_se->on_list)
1449 __delist_rt_entity(rt_se, array);
1450 return;
1451 }
1452
1453 if (move_entity(flags)) {
1454 WARN_ON_ONCE(rt_se->on_list);
1455 if (flags & ENQUEUE_HEAD)
1456 list_add(&rt_se->run_list, queue);
1457 else
1458 list_add_tail(&rt_se->run_list, queue);
1459
1460 __set_bit(rt_se_prio(rt_se), array->bitmap);
1461 rt_se->on_list = 1;
1462 }
1463 rt_se->on_rq = 1;
1464
1465 inc_rt_tasks(rt_se, rt_rq);
1466 }
1467
1468 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1469 {
1470 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1471 struct rt_prio_array *array = &rt_rq->active;
1472
1473 if (move_entity(flags)) {
1474 WARN_ON_ONCE(!rt_se->on_list);
1475 __delist_rt_entity(rt_se, array);
1476 }
1477 rt_se->on_rq = 0;
1478
1479 dec_rt_tasks(rt_se, rt_rq);
1480 }
1481
1482
1483
1484
1485
1486 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
1487 {
1488 struct sched_rt_entity *back = NULL;
1489 unsigned int rt_nr_running;
1490
1491 for_each_sched_rt_entity(rt_se) {
1492 rt_se->back = back;
1493 back = rt_se;
1494 }
1495
1496 rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
1497
1498 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1499 if (on_rt_rq(rt_se))
1500 __dequeue_rt_entity(rt_se, flags);
1501 }
1502
1503 dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
1504 }
1505
1506 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1507 {
1508 struct rq *rq = rq_of_rt_se(rt_se);
1509
1510 update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
1511
1512 dequeue_rt_stack(rt_se, flags);
1513 for_each_sched_rt_entity(rt_se)
1514 __enqueue_rt_entity(rt_se, flags);
1515 enqueue_top_rt_rq(&rq->rt);
1516 }
1517
1518 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1519 {
1520 struct rq *rq = rq_of_rt_se(rt_se);
1521
1522 update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
1523
1524 dequeue_rt_stack(rt_se, flags);
1525
1526 for_each_sched_rt_entity(rt_se) {
1527 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1528
1529 if (rt_rq && rt_rq->rt_nr_running)
1530 __enqueue_rt_entity(rt_se, flags);
1531 }
1532 enqueue_top_rt_rq(&rq->rt);
1533 }
1534
1535
1536
1537
1538 static void
1539 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1540 {
1541 struct sched_rt_entity *rt_se = &p->rt;
1542
1543 if (flags & ENQUEUE_WAKEUP)
1544 rt_se->timeout = 0;
1545
1546 check_schedstat_required();
1547 update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
1548
1549 enqueue_rt_entity(rt_se, flags);
1550
1551 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1552 enqueue_pushable_task(rq, p);
1553 }
1554
1555 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1556 {
1557 struct sched_rt_entity *rt_se = &p->rt;
1558
1559 update_curr_rt(rq);
1560 dequeue_rt_entity(rt_se, flags);
1561
1562 dequeue_pushable_task(rq, p);
1563 }
1564
1565
1566
1567
1568
1569 static void
1570 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1571 {
1572 if (on_rt_rq(rt_se)) {
1573 struct rt_prio_array *array = &rt_rq->active;
1574 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1575
1576 if (head)
1577 list_move(&rt_se->run_list, queue);
1578 else
1579 list_move_tail(&rt_se->run_list, queue);
1580 }
1581 }
1582
1583 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1584 {
1585 struct sched_rt_entity *rt_se = &p->rt;
1586 struct rt_rq *rt_rq;
1587
1588 for_each_sched_rt_entity(rt_se) {
1589 rt_rq = rt_rq_of_se(rt_se);
1590 requeue_rt_entity(rt_rq, rt_se, head);
1591 }
1592 }
1593
1594 static void yield_task_rt(struct rq *rq)
1595 {
1596 requeue_task_rt(rq, rq->curr, 0);
1597 }
1598
1599 #ifdef CONFIG_SMP
1600 static int find_lowest_rq(struct task_struct *task);
1601
1602 static int
1603 select_task_rq_rt(struct task_struct *p, int cpu, int flags)
1604 {
1605 struct task_struct *curr;
1606 struct rq *rq;
1607 bool test;
1608
1609
1610 if (!(flags & (WF_TTWU | WF_FORK)))
1611 goto out;
1612
1613 rq = cpu_rq(cpu);
1614
1615 rcu_read_lock();
1616 curr = READ_ONCE(rq->curr);
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644 test = curr &&
1645 unlikely(rt_task(curr)) &&
1646 (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
1647
1648 if (test || !rt_task_fits_capacity(p, cpu)) {
1649 int target = find_lowest_rq(p);
1650
1651
1652
1653
1654
1655 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
1656 goto out_unlock;
1657
1658
1659
1660
1661
1662 if (target != -1 &&
1663 p->prio < cpu_rq(target)->rt.highest_prio.curr)
1664 cpu = target;
1665 }
1666
1667 out_unlock:
1668 rcu_read_unlock();
1669
1670 out:
1671 return cpu;
1672 }
1673
1674 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1675 {
1676
1677
1678
1679
1680 if (rq->curr->nr_cpus_allowed == 1 ||
1681 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1682 return;
1683
1684
1685
1686
1687
1688 if (p->nr_cpus_allowed != 1 &&
1689 cpupri_find(&rq->rd->cpupri, p, NULL))
1690 return;
1691
1692
1693
1694
1695
1696
1697 requeue_task_rt(rq, p, 1);
1698 resched_curr(rq);
1699 }
1700
1701 static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1702 {
1703 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1704
1705
1706
1707
1708
1709
1710 rq_unpin_lock(rq, rf);
1711 pull_rt_task(rq);
1712 rq_repin_lock(rq, rf);
1713 }
1714
1715 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1716 }
1717 #endif
1718
1719
1720
1721
1722 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1723 {
1724 if (p->prio < rq->curr->prio) {
1725 resched_curr(rq);
1726 return;
1727 }
1728
1729 #ifdef CONFIG_SMP
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1743 check_preempt_equal_prio(rq, p);
1744 #endif
1745 }
1746
1747 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
1748 {
1749 struct sched_rt_entity *rt_se = &p->rt;
1750 struct rt_rq *rt_rq = &rq->rt;
1751
1752 p->se.exec_start = rq_clock_task(rq);
1753 if (on_rt_rq(&p->rt))
1754 update_stats_wait_end_rt(rt_rq, rt_se);
1755
1756
1757 dequeue_pushable_task(rq, p);
1758
1759 if (!first)
1760 return;
1761
1762
1763
1764
1765
1766
1767 if (rq->curr->sched_class != &rt_sched_class)
1768 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1769
1770 rt_queue_push_tasks(rq);
1771 }
1772
1773 static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
1774 {
1775 struct rt_prio_array *array = &rt_rq->active;
1776 struct sched_rt_entity *next = NULL;
1777 struct list_head *queue;
1778 int idx;
1779
1780 idx = sched_find_first_bit(array->bitmap);
1781 BUG_ON(idx >= MAX_RT_PRIO);
1782
1783 queue = array->queue + idx;
1784 next = list_entry(queue->next, struct sched_rt_entity, run_list);
1785
1786 return next;
1787 }
1788
1789 static struct task_struct *_pick_next_task_rt(struct rq *rq)
1790 {
1791 struct sched_rt_entity *rt_se;
1792 struct rt_rq *rt_rq = &rq->rt;
1793
1794 do {
1795 rt_se = pick_next_rt_entity(rt_rq);
1796 BUG_ON(!rt_se);
1797 rt_rq = group_rt_rq(rt_se);
1798 } while (rt_rq);
1799
1800 return rt_task_of(rt_se);
1801 }
1802
1803 static struct task_struct *pick_task_rt(struct rq *rq)
1804 {
1805 struct task_struct *p;
1806
1807 if (!sched_rt_runnable(rq))
1808 return NULL;
1809
1810 p = _pick_next_task_rt(rq);
1811
1812 return p;
1813 }
1814
1815 static struct task_struct *pick_next_task_rt(struct rq *rq)
1816 {
1817 struct task_struct *p = pick_task_rt(rq);
1818
1819 if (p)
1820 set_next_task_rt(rq, p, true);
1821
1822 return p;
1823 }
1824
1825 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1826 {
1827 struct sched_rt_entity *rt_se = &p->rt;
1828 struct rt_rq *rt_rq = &rq->rt;
1829
1830 if (on_rt_rq(&p->rt))
1831 update_stats_wait_start_rt(rt_rq, rt_se);
1832
1833 update_curr_rt(rq);
1834
1835 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1836
1837
1838
1839
1840
1841 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1842 enqueue_pushable_task(rq, p);
1843 }
1844
1845 #ifdef CONFIG_SMP
1846
1847
1848 #define RT_MAX_TRIES 3
1849
1850 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1851 {
1852 if (!task_running(rq, p) &&
1853 cpumask_test_cpu(cpu, &p->cpus_mask))
1854 return 1;
1855
1856 return 0;
1857 }
1858
1859
1860
1861
1862
1863 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1864 {
1865 struct plist_head *head = &rq->rt.pushable_tasks;
1866 struct task_struct *p;
1867
1868 if (!has_pushable_tasks(rq))
1869 return NULL;
1870
1871 plist_for_each_entry(p, head, pushable_tasks) {
1872 if (pick_rt_task(rq, p, cpu))
1873 return p;
1874 }
1875
1876 return NULL;
1877 }
1878
1879 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1880
1881 static int find_lowest_rq(struct task_struct *task)
1882 {
1883 struct sched_domain *sd;
1884 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1885 int this_cpu = smp_processor_id();
1886 int cpu = task_cpu(task);
1887 int ret;
1888
1889
1890 if (unlikely(!lowest_mask))
1891 return -1;
1892
1893 if (task->nr_cpus_allowed == 1)
1894 return -1;
1895
1896
1897
1898
1899
1900 if (static_branch_unlikely(&sched_asym_cpucapacity)) {
1901
1902 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
1903 task, lowest_mask,
1904 rt_task_fits_capacity);
1905 } else {
1906
1907 ret = cpupri_find(&task_rq(task)->rd->cpupri,
1908 task, lowest_mask);
1909 }
1910
1911 if (!ret)
1912 return -1;
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922 if (cpumask_test_cpu(cpu, lowest_mask))
1923 return cpu;
1924
1925
1926
1927
1928
1929 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1930 this_cpu = -1;
1931
1932 rcu_read_lock();
1933 for_each_domain(cpu, sd) {
1934 if (sd->flags & SD_WAKE_AFFINE) {
1935 int best_cpu;
1936
1937
1938
1939
1940
1941 if (this_cpu != -1 &&
1942 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1943 rcu_read_unlock();
1944 return this_cpu;
1945 }
1946
1947 best_cpu = cpumask_any_and_distribute(lowest_mask,
1948 sched_domain_span(sd));
1949 if (best_cpu < nr_cpu_ids) {
1950 rcu_read_unlock();
1951 return best_cpu;
1952 }
1953 }
1954 }
1955 rcu_read_unlock();
1956
1957
1958
1959
1960
1961
1962 if (this_cpu != -1)
1963 return this_cpu;
1964
1965 cpu = cpumask_any_distribute(lowest_mask);
1966 if (cpu < nr_cpu_ids)
1967 return cpu;
1968
1969 return -1;
1970 }
1971
1972
1973 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1974 {
1975 struct rq *lowest_rq = NULL;
1976 int tries;
1977 int cpu;
1978
1979 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1980 cpu = find_lowest_rq(task);
1981
1982 if ((cpu == -1) || (cpu == rq->cpu))
1983 break;
1984
1985 lowest_rq = cpu_rq(cpu);
1986
1987 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1988
1989
1990
1991
1992
1993 lowest_rq = NULL;
1994 break;
1995 }
1996
1997
1998 if (double_lock_balance(rq, lowest_rq)) {
1999
2000
2001
2002
2003
2004
2005 if (unlikely(task_rq(task) != rq ||
2006 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
2007 task_running(rq, task) ||
2008 !rt_task(task) ||
2009 !task_on_rq_queued(task))) {
2010
2011 double_unlock_balance(rq, lowest_rq);
2012 lowest_rq = NULL;
2013 break;
2014 }
2015 }
2016
2017
2018 if (lowest_rq->rt.highest_prio.curr > task->prio)
2019 break;
2020
2021
2022 double_unlock_balance(rq, lowest_rq);
2023 lowest_rq = NULL;
2024 }
2025
2026 return lowest_rq;
2027 }
2028
2029 static struct task_struct *pick_next_pushable_task(struct rq *rq)
2030 {
2031 struct task_struct *p;
2032
2033 if (!has_pushable_tasks(rq))
2034 return NULL;
2035
2036 p = plist_first_entry(&rq->rt.pushable_tasks,
2037 struct task_struct, pushable_tasks);
2038
2039 BUG_ON(rq->cpu != task_cpu(p));
2040 BUG_ON(task_current(rq, p));
2041 BUG_ON(p->nr_cpus_allowed <= 1);
2042
2043 BUG_ON(!task_on_rq_queued(p));
2044 BUG_ON(!rt_task(p));
2045
2046 return p;
2047 }
2048
2049
2050
2051
2052
2053
2054 static int push_rt_task(struct rq *rq, bool pull)
2055 {
2056 struct task_struct *next_task;
2057 struct rq *lowest_rq;
2058 int ret = 0;
2059
2060 if (!rq->rt.overloaded)
2061 return 0;
2062
2063 next_task = pick_next_pushable_task(rq);
2064 if (!next_task)
2065 return 0;
2066
2067 retry:
2068
2069
2070
2071
2072
2073 if (unlikely(next_task->prio < rq->curr->prio)) {
2074 resched_curr(rq);
2075 return 0;
2076 }
2077
2078 if (is_migration_disabled(next_task)) {
2079 struct task_struct *push_task = NULL;
2080 int cpu;
2081
2082 if (!pull || rq->push_busy)
2083 return 0;
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094 if (rq->curr->sched_class != &rt_sched_class)
2095 return 0;
2096
2097 cpu = find_lowest_rq(rq->curr);
2098 if (cpu == -1 || cpu == rq->cpu)
2099 return 0;
2100
2101
2102
2103
2104
2105
2106
2107 push_task = get_push_task(rq);
2108 if (push_task) {
2109 raw_spin_rq_unlock(rq);
2110 stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
2111 push_task, &rq->push_work);
2112 raw_spin_rq_lock(rq);
2113 }
2114
2115 return 0;
2116 }
2117
2118 if (WARN_ON(next_task == rq->curr))
2119 return 0;
2120
2121
2122 get_task_struct(next_task);
2123
2124
2125 lowest_rq = find_lock_lowest_rq(next_task, rq);
2126 if (!lowest_rq) {
2127 struct task_struct *task;
2128
2129
2130
2131
2132
2133
2134
2135
2136 task = pick_next_pushable_task(rq);
2137 if (task == next_task) {
2138
2139
2140
2141
2142
2143
2144 goto out;
2145 }
2146
2147 if (!task)
2148
2149 goto out;
2150
2151
2152
2153
2154 put_task_struct(next_task);
2155 next_task = task;
2156 goto retry;
2157 }
2158
2159 deactivate_task(rq, next_task, 0);
2160 set_task_cpu(next_task, lowest_rq->cpu);
2161 activate_task(lowest_rq, next_task, 0);
2162 resched_curr(lowest_rq);
2163 ret = 1;
2164
2165 double_unlock_balance(rq, lowest_rq);
2166 out:
2167 put_task_struct(next_task);
2168
2169 return ret;
2170 }
2171
2172 static void push_rt_tasks(struct rq *rq)
2173 {
2174
2175 while (push_rt_task(rq, false))
2176 ;
2177 }
2178
2179 #ifdef HAVE_RT_PUSH_IPI
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222 static int rto_next_cpu(struct root_domain *rd)
2223 {
2224 int next;
2225 int cpu;
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240 for (;;) {
2241
2242
2243 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
2244
2245 rd->rto_cpu = cpu;
2246
2247 if (cpu < nr_cpu_ids)
2248 return cpu;
2249
2250 rd->rto_cpu = -1;
2251
2252
2253
2254
2255
2256
2257
2258 next = atomic_read_acquire(&rd->rto_loop_next);
2259
2260 if (rd->rto_loop == next)
2261 break;
2262
2263 rd->rto_loop = next;
2264 }
2265
2266 return -1;
2267 }
2268
2269 static inline bool rto_start_trylock(atomic_t *v)
2270 {
2271 return !atomic_cmpxchg_acquire(v, 0, 1);
2272 }
2273
2274 static inline void rto_start_unlock(atomic_t *v)
2275 {
2276 atomic_set_release(v, 0);
2277 }
2278
2279 static void tell_cpu_to_push(struct rq *rq)
2280 {
2281 int cpu = -1;
2282
2283
2284 atomic_inc(&rq->rd->rto_loop_next);
2285
2286
2287 if (!rto_start_trylock(&rq->rd->rto_loop_start))
2288 return;
2289
2290 raw_spin_lock(&rq->rd->rto_lock);
2291
2292
2293
2294
2295
2296
2297
2298 if (rq->rd->rto_cpu < 0)
2299 cpu = rto_next_cpu(rq->rd);
2300
2301 raw_spin_unlock(&rq->rd->rto_lock);
2302
2303 rto_start_unlock(&rq->rd->rto_loop_start);
2304
2305 if (cpu >= 0) {
2306
2307 sched_get_rd(rq->rd);
2308 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
2309 }
2310 }
2311
2312
2313 void rto_push_irq_work_func(struct irq_work *work)
2314 {
2315 struct root_domain *rd =
2316 container_of(work, struct root_domain, rto_push_work);
2317 struct rq *rq;
2318 int cpu;
2319
2320 rq = this_rq();
2321
2322
2323
2324
2325
2326 if (has_pushable_tasks(rq)) {
2327 raw_spin_rq_lock(rq);
2328 while (push_rt_task(rq, true))
2329 ;
2330 raw_spin_rq_unlock(rq);
2331 }
2332
2333 raw_spin_lock(&rd->rto_lock);
2334
2335
2336 cpu = rto_next_cpu(rd);
2337
2338 raw_spin_unlock(&rd->rto_lock);
2339
2340 if (cpu < 0) {
2341 sched_put_rd(rd);
2342 return;
2343 }
2344
2345
2346 irq_work_queue_on(&rd->rto_push_work, cpu);
2347 }
2348 #endif
2349
2350 static void pull_rt_task(struct rq *this_rq)
2351 {
2352 int this_cpu = this_rq->cpu, cpu;
2353 bool resched = false;
2354 struct task_struct *p, *push_task;
2355 struct rq *src_rq;
2356 int rt_overload_count = rt_overloaded(this_rq);
2357
2358 if (likely(!rt_overload_count))
2359 return;
2360
2361
2362
2363
2364
2365 smp_rmb();
2366
2367
2368 if (rt_overload_count == 1 &&
2369 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2370 return;
2371
2372 #ifdef HAVE_RT_PUSH_IPI
2373 if (sched_feat(RT_PUSH_IPI)) {
2374 tell_cpu_to_push(this_rq);
2375 return;
2376 }
2377 #endif
2378
2379 for_each_cpu(cpu, this_rq->rd->rto_mask) {
2380 if (this_cpu == cpu)
2381 continue;
2382
2383 src_rq = cpu_rq(cpu);
2384
2385
2386
2387
2388
2389
2390
2391
2392 if (src_rq->rt.highest_prio.next >=
2393 this_rq->rt.highest_prio.curr)
2394 continue;
2395
2396
2397
2398
2399
2400
2401 push_task = NULL;
2402 double_lock_balance(this_rq, src_rq);
2403
2404
2405
2406
2407
2408 p = pick_highest_pushable_task(src_rq, this_cpu);
2409
2410
2411
2412
2413
2414 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
2415 WARN_ON(p == src_rq->curr);
2416 WARN_ON(!task_on_rq_queued(p));
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426 if (p->prio < src_rq->curr->prio)
2427 goto skip;
2428
2429 if (is_migration_disabled(p)) {
2430 push_task = get_push_task(src_rq);
2431 } else {
2432 deactivate_task(src_rq, p, 0);
2433 set_task_cpu(p, this_cpu);
2434 activate_task(this_rq, p, 0);
2435 resched = true;
2436 }
2437
2438
2439
2440
2441
2442
2443 }
2444 skip:
2445 double_unlock_balance(this_rq, src_rq);
2446
2447 if (push_task) {
2448 raw_spin_rq_unlock(this_rq);
2449 stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
2450 push_task, &src_rq->push_work);
2451 raw_spin_rq_lock(this_rq);
2452 }
2453 }
2454
2455 if (resched)
2456 resched_curr(this_rq);
2457 }
2458
2459
2460
2461
2462
2463 static void task_woken_rt(struct rq *rq, struct task_struct *p)
2464 {
2465 bool need_to_push = !task_running(rq, p) &&
2466 !test_tsk_need_resched(rq->curr) &&
2467 p->nr_cpus_allowed > 1 &&
2468 (dl_task(rq->curr) || rt_task(rq->curr)) &&
2469 (rq->curr->nr_cpus_allowed < 2 ||
2470 rq->curr->prio <= p->prio);
2471
2472 if (need_to_push)
2473 push_rt_tasks(rq);
2474 }
2475
2476
2477 static void rq_online_rt(struct rq *rq)
2478 {
2479 if (rq->rt.overloaded)
2480 rt_set_overload(rq);
2481
2482 __enable_runtime(rq);
2483
2484 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
2485 }
2486
2487
2488 static void rq_offline_rt(struct rq *rq)
2489 {
2490 if (rq->rt.overloaded)
2491 rt_clear_overload(rq);
2492
2493 __disable_runtime(rq);
2494
2495 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
2496 }
2497
2498
2499
2500
2501
2502 static void switched_from_rt(struct rq *rq, struct task_struct *p)
2503 {
2504
2505
2506
2507
2508
2509
2510
2511 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
2512 return;
2513
2514 rt_queue_pull_task(rq);
2515 }
2516
2517 void __init init_sched_rt_class(void)
2518 {
2519 unsigned int i;
2520
2521 for_each_possible_cpu(i) {
2522 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
2523 GFP_KERNEL, cpu_to_node(i));
2524 }
2525 }
2526 #endif
2527
2528
2529
2530
2531
2532
2533 static void switched_to_rt(struct rq *rq, struct task_struct *p)
2534 {
2535
2536
2537
2538
2539 if (task_current(rq, p)) {
2540 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2541 return;
2542 }
2543
2544
2545
2546
2547
2548
2549 if (task_on_rq_queued(p)) {
2550 #ifdef CONFIG_SMP
2551 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2552 rt_queue_push_tasks(rq);
2553 #endif
2554 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
2555 resched_curr(rq);
2556 }
2557 }
2558
2559
2560
2561
2562
2563 static void
2564 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
2565 {
2566 if (!task_on_rq_queued(p))
2567 return;
2568
2569 if (task_current(rq, p)) {
2570 #ifdef CONFIG_SMP
2571
2572
2573
2574
2575 if (oldprio < p->prio)
2576 rt_queue_pull_task(rq);
2577
2578
2579
2580
2581
2582 if (p->prio > rq->rt.highest_prio.curr)
2583 resched_curr(rq);
2584 #else
2585
2586 if (oldprio < p->prio)
2587 resched_curr(rq);
2588 #endif
2589 } else {
2590
2591
2592
2593
2594
2595 if (p->prio < rq->curr->prio)
2596 resched_curr(rq);
2597 }
2598 }
2599
2600 #ifdef CONFIG_POSIX_TIMERS
2601 static void watchdog(struct rq *rq, struct task_struct *p)
2602 {
2603 unsigned long soft, hard;
2604
2605
2606 soft = task_rlimit(p, RLIMIT_RTTIME);
2607 hard = task_rlimit_max(p, RLIMIT_RTTIME);
2608
2609 if (soft != RLIM_INFINITY) {
2610 unsigned long next;
2611
2612 if (p->rt.watchdog_stamp != jiffies) {
2613 p->rt.timeout++;
2614 p->rt.watchdog_stamp = jiffies;
2615 }
2616
2617 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2618 if (p->rt.timeout > next) {
2619 posix_cputimers_rt_watchdog(&p->posix_cputimers,
2620 p->se.sum_exec_runtime);
2621 }
2622 }
2623 }
2624 #else
2625 static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2626 #endif
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2637 {
2638 struct sched_rt_entity *rt_se = &p->rt;
2639
2640 update_curr_rt(rq);
2641 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
2642
2643 watchdog(rq, p);
2644
2645
2646
2647
2648
2649 if (p->policy != SCHED_RR)
2650 return;
2651
2652 if (--p->rt.time_slice)
2653 return;
2654
2655 p->rt.time_slice = sched_rr_timeslice;
2656
2657
2658
2659
2660
2661 for_each_sched_rt_entity(rt_se) {
2662 if (rt_se->run_list.prev != rt_se->run_list.next) {
2663 requeue_task_rt(rq, p, 0);
2664 resched_curr(rq);
2665 return;
2666 }
2667 }
2668 }
2669
2670 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2671 {
2672
2673
2674
2675 if (task->policy == SCHED_RR)
2676 return sched_rr_timeslice;
2677 else
2678 return 0;
2679 }
2680
2681 DEFINE_SCHED_CLASS(rt) = {
2682
2683 .enqueue_task = enqueue_task_rt,
2684 .dequeue_task = dequeue_task_rt,
2685 .yield_task = yield_task_rt,
2686
2687 .check_preempt_curr = check_preempt_curr_rt,
2688
2689 .pick_next_task = pick_next_task_rt,
2690 .put_prev_task = put_prev_task_rt,
2691 .set_next_task = set_next_task_rt,
2692
2693 #ifdef CONFIG_SMP
2694 .balance = balance_rt,
2695 .pick_task = pick_task_rt,
2696 .select_task_rq = select_task_rq_rt,
2697 .set_cpus_allowed = set_cpus_allowed_common,
2698 .rq_online = rq_online_rt,
2699 .rq_offline = rq_offline_rt,
2700 .task_woken = task_woken_rt,
2701 .switched_from = switched_from_rt,
2702 .find_lock_rq = find_lock_lowest_rq,
2703 #endif
2704
2705 .task_tick = task_tick_rt,
2706
2707 .get_rr_interval = get_rr_interval_rt,
2708
2709 .prio_changed = prio_changed_rt,
2710 .switched_to = switched_to_rt,
2711
2712 .update_curr = update_curr_rt,
2713
2714 #ifdef CONFIG_UCLAMP_TASK
2715 .uclamp_enabled = 1,
2716 #endif
2717 };
2718
2719 #ifdef CONFIG_RT_GROUP_SCHED
2720
2721
2722
2723 static DEFINE_MUTEX(rt_constraints_mutex);
2724
2725 static inline int tg_has_rt_tasks(struct task_group *tg)
2726 {
2727 struct task_struct *task;
2728 struct css_task_iter it;
2729 int ret = 0;
2730
2731
2732
2733
2734 if (task_group_is_autogroup(tg))
2735 return 0;
2736
2737 css_task_iter_start(&tg->css, 0, &it);
2738 while (!ret && (task = css_task_iter_next(&it)))
2739 ret |= rt_task(task);
2740 css_task_iter_end(&it);
2741
2742 return ret;
2743 }
2744
2745 struct rt_schedulable_data {
2746 struct task_group *tg;
2747 u64 rt_period;
2748 u64 rt_runtime;
2749 };
2750
2751 static int tg_rt_schedulable(struct task_group *tg, void *data)
2752 {
2753 struct rt_schedulable_data *d = data;
2754 struct task_group *child;
2755 unsigned long total, sum = 0;
2756 u64 period, runtime;
2757
2758 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2759 runtime = tg->rt_bandwidth.rt_runtime;
2760
2761 if (tg == d->tg) {
2762 period = d->rt_period;
2763 runtime = d->rt_runtime;
2764 }
2765
2766
2767
2768
2769 if (runtime > period && runtime != RUNTIME_INF)
2770 return -EINVAL;
2771
2772
2773
2774
2775 if (rt_bandwidth_enabled() && !runtime &&
2776 tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
2777 return -EBUSY;
2778
2779 total = to_ratio(period, runtime);
2780
2781
2782
2783
2784 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2785 return -EINVAL;
2786
2787
2788
2789
2790 list_for_each_entry_rcu(child, &tg->children, siblings) {
2791 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2792 runtime = child->rt_bandwidth.rt_runtime;
2793
2794 if (child == d->tg) {
2795 period = d->rt_period;
2796 runtime = d->rt_runtime;
2797 }
2798
2799 sum += to_ratio(period, runtime);
2800 }
2801
2802 if (sum > total)
2803 return -EINVAL;
2804
2805 return 0;
2806 }
2807
2808 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2809 {
2810 int ret;
2811
2812 struct rt_schedulable_data data = {
2813 .tg = tg,
2814 .rt_period = period,
2815 .rt_runtime = runtime,
2816 };
2817
2818 rcu_read_lock();
2819 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2820 rcu_read_unlock();
2821
2822 return ret;
2823 }
2824
2825 static int tg_set_rt_bandwidth(struct task_group *tg,
2826 u64 rt_period, u64 rt_runtime)
2827 {
2828 int i, err = 0;
2829
2830
2831
2832
2833
2834 if (tg == &root_task_group && rt_runtime == 0)
2835 return -EINVAL;
2836
2837
2838 if (rt_period == 0)
2839 return -EINVAL;
2840
2841
2842
2843
2844 if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
2845 return -EINVAL;
2846
2847 mutex_lock(&rt_constraints_mutex);
2848 err = __rt_schedulable(tg, rt_period, rt_runtime);
2849 if (err)
2850 goto unlock;
2851
2852 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2853 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2854 tg->rt_bandwidth.rt_runtime = rt_runtime;
2855
2856 for_each_possible_cpu(i) {
2857 struct rt_rq *rt_rq = tg->rt_rq[i];
2858
2859 raw_spin_lock(&rt_rq->rt_runtime_lock);
2860 rt_rq->rt_runtime = rt_runtime;
2861 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2862 }
2863 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2864 unlock:
2865 mutex_unlock(&rt_constraints_mutex);
2866
2867 return err;
2868 }
2869
2870 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2871 {
2872 u64 rt_runtime, rt_period;
2873
2874 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2875 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2876 if (rt_runtime_us < 0)
2877 rt_runtime = RUNTIME_INF;
2878 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
2879 return -EINVAL;
2880
2881 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2882 }
2883
2884 long sched_group_rt_runtime(struct task_group *tg)
2885 {
2886 u64 rt_runtime_us;
2887
2888 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2889 return -1;
2890
2891 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2892 do_div(rt_runtime_us, NSEC_PER_USEC);
2893 return rt_runtime_us;
2894 }
2895
2896 int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2897 {
2898 u64 rt_runtime, rt_period;
2899
2900 if (rt_period_us > U64_MAX / NSEC_PER_USEC)
2901 return -EINVAL;
2902
2903 rt_period = rt_period_us * NSEC_PER_USEC;
2904 rt_runtime = tg->rt_bandwidth.rt_runtime;
2905
2906 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2907 }
2908
2909 long sched_group_rt_period(struct task_group *tg)
2910 {
2911 u64 rt_period_us;
2912
2913 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2914 do_div(rt_period_us, NSEC_PER_USEC);
2915 return rt_period_us;
2916 }
2917
2918 #ifdef CONFIG_SYSCTL
2919 static int sched_rt_global_constraints(void)
2920 {
2921 int ret = 0;
2922
2923 mutex_lock(&rt_constraints_mutex);
2924 ret = __rt_schedulable(NULL, 0, 0);
2925 mutex_unlock(&rt_constraints_mutex);
2926
2927 return ret;
2928 }
2929 #endif
2930
2931 int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2932 {
2933
2934 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2935 return 0;
2936
2937 return 1;
2938 }
2939
2940 #else
2941
2942 #ifdef CONFIG_SYSCTL
2943 static int sched_rt_global_constraints(void)
2944 {
2945 unsigned long flags;
2946 int i;
2947
2948 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2949 for_each_possible_cpu(i) {
2950 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2951
2952 raw_spin_lock(&rt_rq->rt_runtime_lock);
2953 rt_rq->rt_runtime = global_rt_runtime();
2954 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2955 }
2956 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2957
2958 return 0;
2959 }
2960 #endif
2961 #endif
2962
2963 #ifdef CONFIG_SYSCTL
2964 static int sched_rt_global_validate(void)
2965 {
2966 if (sysctl_sched_rt_period <= 0)
2967 return -EINVAL;
2968
2969 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2970 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
2971 ((u64)sysctl_sched_rt_runtime *
2972 NSEC_PER_USEC > max_rt_runtime)))
2973 return -EINVAL;
2974
2975 return 0;
2976 }
2977
2978 static void sched_rt_do_global(void)
2979 {
2980 unsigned long flags;
2981
2982 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2983 def_rt_bandwidth.rt_runtime = global_rt_runtime();
2984 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
2985 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2986 }
2987
2988 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
2989 size_t *lenp, loff_t *ppos)
2990 {
2991 int old_period, old_runtime;
2992 static DEFINE_MUTEX(mutex);
2993 int ret;
2994
2995 mutex_lock(&mutex);
2996 old_period = sysctl_sched_rt_period;
2997 old_runtime = sysctl_sched_rt_runtime;
2998
2999 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3000
3001 if (!ret && write) {
3002 ret = sched_rt_global_validate();
3003 if (ret)
3004 goto undo;
3005
3006 ret = sched_dl_global_validate();
3007 if (ret)
3008 goto undo;
3009
3010 ret = sched_rt_global_constraints();
3011 if (ret)
3012 goto undo;
3013
3014 sched_rt_do_global();
3015 sched_dl_do_global();
3016 }
3017 if (0) {
3018 undo:
3019 sysctl_sched_rt_period = old_period;
3020 sysctl_sched_rt_runtime = old_runtime;
3021 }
3022 mutex_unlock(&mutex);
3023
3024 return ret;
3025 }
3026
3027 static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
3028 size_t *lenp, loff_t *ppos)
3029 {
3030 int ret;
3031 static DEFINE_MUTEX(mutex);
3032
3033 mutex_lock(&mutex);
3034 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3035
3036
3037
3038
3039 if (!ret && write) {
3040 sched_rr_timeslice =
3041 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
3042 msecs_to_jiffies(sysctl_sched_rr_timeslice);
3043 }
3044 mutex_unlock(&mutex);
3045
3046 return ret;
3047 }
3048 #endif
3049
3050 #ifdef CONFIG_SCHED_DEBUG
3051 void print_rt_stats(struct seq_file *m, int cpu)
3052 {
3053 rt_rq_iter_t iter;
3054 struct rt_rq *rt_rq;
3055
3056 rcu_read_lock();
3057 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
3058 print_rt_rq(m, cpu, rt_rq);
3059 rcu_read_unlock();
3060 }
3061 #endif