kernel/sched/autogroup.c

0001 // SPDX-License-Identifier: GPL-2.0
0002
0003 /*
0004  * Auto-group scheduling implementation:
0005  */
0006
0007 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
0008 static struct autogroup autogroup_default;
0009 static atomic_t autogroup_seq_nr;
0010
0011 #ifdef CONFIG_SYSCTL
0012 static struct ctl_table sched_autogroup_sysctls[] = {
0013     {
0014         .procname       = "sched_autogroup_enabled",
0015         .data           = &sysctl_sched_autogroup_enabled,
0016         .maxlen         = sizeof(unsigned int),
0017         .mode           = 0644,
0018         .proc_handler   = proc_dointvec_minmax,
0019         .extra1         = SYSCTL_ZERO,
0020         .extra2         = SYSCTL_ONE,
0021     },
0022     {}
0023 };
0024
0025 static void __init sched_autogroup_sysctl_init(void)
0026 {
0027     register_sysctl_init("kernel", sched_autogroup_sysctls);
0028 }
0029 #else
0030 #define sched_autogroup_sysctl_init() do { } while (0)
0031 #endif
0032
0033 void __init autogroup_init(struct task_struct *init_task)
0034 {
0035     autogroup_default.tg = &root_task_group;
0036     kref_init(&autogroup_default.kref);
0037     init_rwsem(&autogroup_default.lock);
0038     init_task->signal->autogroup = &autogroup_default;
0039     sched_autogroup_sysctl_init();
0040 }
0041
0042 void autogroup_free(struct task_group *tg)
0043 {
0044     kfree(tg->autogroup);
0045 }
0046
0047 static inline void autogroup_destroy(struct kref *kref)
0048 {
0049     struct autogroup *ag = container_of(kref, struct autogroup, kref);
0050
0051 #ifdef CONFIG_RT_GROUP_SCHED
0052     /* We've redirected RT tasks to the root task group... */
0053     ag->tg->rt_se = NULL;
0054     ag->tg->rt_rq = NULL;
0055 #endif
0056     sched_release_group(ag->tg);
0057     sched_destroy_group(ag->tg);
0058 }
0059
0060 static inline void autogroup_kref_put(struct autogroup *ag)
0061 {
0062     kref_put(&ag->kref, autogroup_destroy);
0063 }
0064
0065 static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
0066 {
0067     kref_get(&ag->kref);
0068     return ag;
0069 }
0070
0071 static inline struct autogroup *autogroup_task_get(struct task_struct *p)
0072 {
0073     struct autogroup *ag;
0074     unsigned long flags;
0075
0076     if (!lock_task_sighand(p, &flags))
0077         return autogroup_kref_get(&autogroup_default);
0078
0079     ag = autogroup_kref_get(p->signal->autogroup);
0080     unlock_task_sighand(p, &flags);
0081
0082     return ag;
0083 }
0084
0085 static inline struct autogroup *autogroup_create(void)
0086 {
0087     struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
0088     struct task_group *tg;
0089
0090     if (!ag)
0091         goto out_fail;
0092
0093     tg = sched_create_group(&root_task_group);
0094     if (IS_ERR(tg))
0095         goto out_free;
0096
0097     kref_init(&ag->kref);
0098     init_rwsem(&ag->lock);
0099     ag->id = atomic_inc_return(&autogroup_seq_nr);
0100     ag->tg = tg;
0101 #ifdef CONFIG_RT_GROUP_SCHED
0102     /*
0103      * Autogroup RT tasks are redirected to the root task group
0104      * so we don't have to move tasks around upon policy change,
0105      * or flail around trying to allocate bandwidth on the fly.
0106      * A bandwidth exception in __sched_setscheduler() allows
0107      * the policy change to proceed.
0108      */
0109     free_rt_sched_group(tg);
0110     tg->rt_se = root_task_group.rt_se;
0111     tg->rt_rq = root_task_group.rt_rq;
0112 #endif
0113     tg->autogroup = ag;
0114
0115     sched_online_group(tg, &root_task_group);
0116     return ag;
0117
0118 out_free:
0119     kfree(ag);
0120 out_fail:
0121     if (printk_ratelimit()) {
0122         printk(KERN_WARNING "autogroup_create: %s failure.\n",
0123             ag ? "sched_create_group()" : "kzalloc()");
0124     }
0125
0126     return autogroup_kref_get(&autogroup_default);
0127 }
0128
0129 bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
0130 {
0131     if (tg != &root_task_group)
0132         return false;
0133     /*
0134      * If we race with autogroup_move_group() the caller can use the old
0135      * value of signal->autogroup but in this case sched_move_task() will
0136      * be called again before autogroup_kref_put().
0137      *
0138      * However, there is no way sched_autogroup_exit_task() could tell us
0139      * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case.
0140      */
0141     if (p->flags & PF_EXITING)
0142         return false;
0143
0144     return true;
0145 }
0146
0147 void sched_autogroup_exit_task(struct task_struct *p)
0148 {
0149     /*
0150      * We are going to call exit_notify() and autogroup_move_group() can't
0151      * see this thread after that: we can no longer use signal->autogroup.
0152      * See the PF_EXITING check in task_wants_autogroup().
0153      */
0154     sched_move_task(p);
0155 }
0156
0157 static void
0158 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
0159 {
0160     struct autogroup *prev;
0161     struct task_struct *t;
0162     unsigned long flags;
0163
0164     BUG_ON(!lock_task_sighand(p, &flags));
0165
0166     prev = p->signal->autogroup;
0167     if (prev == ag) {
0168         unlock_task_sighand(p, &flags);
0169         return;
0170     }
0171
0172     p->signal->autogroup = autogroup_kref_get(ag);
0173     /*
0174      * We can't avoid sched_move_task() after we changed signal->autogroup,
0175      * this process can already run with task_group() == prev->tg or we can
0176      * race with cgroup code which can read autogroup = prev under rq->lock.
0177      * In the latter case for_each_thread() can not miss a migrating thread,
0178      * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
0179      * can't be removed from thread list, we hold ->siglock.
0180      *
0181      * If an exiting thread was already removed from thread list we rely on
0182      * sched_autogroup_exit_task().
0183      */
0184     for_each_thread(p, t)
0185         sched_move_task(t);
0186
0187     unlock_task_sighand(p, &flags);
0188     autogroup_kref_put(prev);
0189 }
0190
0191 /* Allocates GFP_KERNEL, cannot be called under any spinlock: */
0192 void sched_autogroup_create_attach(struct task_struct *p)
0193 {
0194     struct autogroup *ag = autogroup_create();
0195
0196     autogroup_move_group(p, ag);
0197
0198     /* Drop extra reference added by autogroup_create(): */
0199     autogroup_kref_put(ag);
0200 }
0201 EXPORT_SYMBOL(sched_autogroup_create_attach);
0202
0203 /* Cannot be called under siglock. Currently has no users: */
0204 void sched_autogroup_detach(struct task_struct *p)
0205 {
0206     autogroup_move_group(p, &autogroup_default);
0207 }
0208 EXPORT_SYMBOL(sched_autogroup_detach);
0209
0210 void sched_autogroup_fork(struct signal_struct *sig)
0211 {
0212     sig->autogroup = autogroup_task_get(current);
0213 }
0214
0215 void sched_autogroup_exit(struct signal_struct *sig)
0216 {
0217     autogroup_kref_put(sig->autogroup);
0218 }
0219
0220 static int __init setup_autogroup(char *str)
0221 {
0222     sysctl_sched_autogroup_enabled = 0;
0223
0224     return 1;
0225 }
0226 __setup("noautogroup", setup_autogroup);
0227
0228 #ifdef CONFIG_PROC_FS
0229
0230 int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
0231 {
0232     static unsigned long next = INITIAL_JIFFIES;
0233     struct autogroup *ag;
0234     unsigned long shares;
0235     int err, idx;
0236
0237     if (nice < MIN_NICE || nice > MAX_NICE)
0238         return -EINVAL;
0239
0240     err = security_task_setnice(current, nice);
0241     if (err)
0242         return err;
0243
0244     if (nice < 0 && !can_nice(current, nice))
0245         return -EPERM;
0246
0247     /* This is a heavy operation, taking global locks.. */
0248     if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
0249         return -EAGAIN;
0250
0251     next = HZ / 10 + jiffies;
0252     ag = autogroup_task_get(p);
0253
0254     idx = array_index_nospec(nice + 20, 40);
0255     shares = scale_load(sched_prio_to_weight[idx]);
0256
0257     down_write(&ag->lock);
0258     err = sched_group_set_shares(ag->tg, shares);
0259     if (!err)
0260         ag->nice = nice;
0261     up_write(&ag->lock);
0262
0263     autogroup_kref_put(ag);
0264
0265     return err;
0266 }
0267
0268 void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
0269 {
0270     struct autogroup *ag = autogroup_task_get(p);
0271
0272     if (!task_group_is_autogroup(ag->tg))
0273         goto out;
0274
0275     down_read(&ag->lock);
0276     seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
0277     up_read(&ag->lock);
0278
0279 out:
0280     autogroup_kref_put(ag);
0281 }
0282 #endif /* CONFIG_PROC_FS */
0283
0284 int autogroup_path(struct task_group *tg, char *buf, int buflen)
0285 {
0286     if (!task_group_is_autogroup(tg))
0287         return 0;
0288
0289     return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
0290 }