0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/cpu.h>
0026 #include <linux/cpumask.h>
0027 #include <linux/cpuset.h>
0028 #include <linux/err.h>
0029 #include <linux/errno.h>
0030 #include <linux/file.h>
0031 #include <linux/fs.h>
0032 #include <linux/init.h>
0033 #include <linux/interrupt.h>
0034 #include <linux/kernel.h>
0035 #include <linux/kmod.h>
0036 #include <linux/list.h>
0037 #include <linux/mempolicy.h>
0038 #include <linux/mm.h>
0039 #include <linux/memory.h>
0040 #include <linux/export.h>
0041 #include <linux/mount.h>
0042 #include <linux/fs_context.h>
0043 #include <linux/namei.h>
0044 #include <linux/pagemap.h>
0045 #include <linux/proc_fs.h>
0046 #include <linux/rcupdate.h>
0047 #include <linux/sched.h>
0048 #include <linux/sched/deadline.h>
0049 #include <linux/sched/mm.h>
0050 #include <linux/sched/task.h>
0051 #include <linux/seq_file.h>
0052 #include <linux/security.h>
0053 #include <linux/slab.h>
0054 #include <linux/spinlock.h>
0055 #include <linux/stat.h>
0056 #include <linux/string.h>
0057 #include <linux/time.h>
0058 #include <linux/time64.h>
0059 #include <linux/backing-dev.h>
0060 #include <linux/sort.h>
0061 #include <linux/oom.h>
0062 #include <linux/sched/isolation.h>
0063 #include <linux/uaccess.h>
0064 #include <linux/atomic.h>
0065 #include <linux/mutex.h>
0066 #include <linux/cgroup.h>
0067 #include <linux/wait.h>
0068
0069 DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
0070 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
0071
0072
0073
0074
0075
0076
0077 DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key);
0078
0079
0080
0081 struct fmeter {
0082 int cnt;
0083 int val;
0084 time64_t time;
0085 spinlock_t lock;
0086 };
0087
0088 struct cpuset {
0089 struct cgroup_subsys_state css;
0090
0091 unsigned long flags;
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114 cpumask_var_t cpus_allowed;
0115 nodemask_t mems_allowed;
0116
0117
0118 cpumask_var_t effective_cpus;
0119 nodemask_t effective_mems;
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129 cpumask_var_t subparts_cpus;
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141 nodemask_t old_mems_allowed;
0142
0143 struct fmeter fmeter;
0144
0145
0146
0147
0148
0149 int attach_in_progress;
0150
0151
0152 int pn;
0153
0154
0155 int relax_domain_level;
0156
0157
0158 int nr_subparts_cpus;
0159
0160
0161 int partition_root_state;
0162
0163
0164
0165
0166
0167
0168 int use_parent_ecpus;
0169 int child_ecpus_count;
0170
0171
0172 struct cgroup_file partition_file;
0173 };
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189 #define PRS_DISABLED 0
0190 #define PRS_ENABLED 1
0191 #define PRS_ERROR -1
0192
0193
0194
0195
0196
0197 struct tmpmasks {
0198 cpumask_var_t addmask, delmask;
0199 cpumask_var_t new_cpus;
0200 };
0201
0202 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
0203 {
0204 return css ? container_of(css, struct cpuset, css) : NULL;
0205 }
0206
0207
0208 static inline struct cpuset *task_cs(struct task_struct *task)
0209 {
0210 return css_cs(task_css(task, cpuset_cgrp_id));
0211 }
0212
0213 static inline struct cpuset *parent_cs(struct cpuset *cs)
0214 {
0215 return css_cs(cs->css.parent);
0216 }
0217
0218
0219 typedef enum {
0220 CS_ONLINE,
0221 CS_CPU_EXCLUSIVE,
0222 CS_MEM_EXCLUSIVE,
0223 CS_MEM_HARDWALL,
0224 CS_MEMORY_MIGRATE,
0225 CS_SCHED_LOAD_BALANCE,
0226 CS_SPREAD_PAGE,
0227 CS_SPREAD_SLAB,
0228 } cpuset_flagbits_t;
0229
0230
0231 static inline bool is_cpuset_online(struct cpuset *cs)
0232 {
0233 return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
0234 }
0235
0236 static inline int is_cpu_exclusive(const struct cpuset *cs)
0237 {
0238 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
0239 }
0240
0241 static inline int is_mem_exclusive(const struct cpuset *cs)
0242 {
0243 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
0244 }
0245
0246 static inline int is_mem_hardwall(const struct cpuset *cs)
0247 {
0248 return test_bit(CS_MEM_HARDWALL, &cs->flags);
0249 }
0250
0251 static inline int is_sched_load_balance(const struct cpuset *cs)
0252 {
0253 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
0254 }
0255
0256 static inline int is_memory_migrate(const struct cpuset *cs)
0257 {
0258 return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
0259 }
0260
0261 static inline int is_spread_page(const struct cpuset *cs)
0262 {
0263 return test_bit(CS_SPREAD_PAGE, &cs->flags);
0264 }
0265
0266 static inline int is_spread_slab(const struct cpuset *cs)
0267 {
0268 return test_bit(CS_SPREAD_SLAB, &cs->flags);
0269 }
0270
0271 static inline int is_partition_root(const struct cpuset *cs)
0272 {
0273 return cs->partition_root_state > 0;
0274 }
0275
0276
0277
0278
0279 static inline void notify_partition_change(struct cpuset *cs,
0280 int old_prs, int new_prs)
0281 {
0282 if (old_prs != new_prs)
0283 cgroup_file_notify(&cs->partition_file);
0284 }
0285
0286 static struct cpuset top_cpuset = {
0287 .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
0288 (1 << CS_MEM_EXCLUSIVE)),
0289 .partition_root_state = PRS_ENABLED,
0290 };
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301 #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \
0302 css_for_each_child((pos_css), &(parent_cs)->css) \
0303 if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \
0317 css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
0318 if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358 DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem);
0359
0360 void cpuset_read_lock(void)
0361 {
0362 percpu_down_read(&cpuset_rwsem);
0363 }
0364
0365 void cpuset_read_unlock(void)
0366 {
0367 percpu_up_read(&cpuset_rwsem);
0368 }
0369
0370 static DEFINE_SPINLOCK(callback_lock);
0371
0372 static struct workqueue_struct *cpuset_migrate_mm_wq;
0373
0374
0375
0376
0377 static void cpuset_hotplug_workfn(struct work_struct *work);
0378 static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
0379
0380 static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
0381
0382 static inline void check_insane_mems_config(nodemask_t *nodes)
0383 {
0384 if (!cpusets_insane_config() &&
0385 movable_only_nodes(nodes)) {
0386 static_branch_enable(&cpusets_insane_config_key);
0387 pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n"
0388 "Cpuset allocations might fail even with a lot of memory available.\n",
0389 nodemask_pr_args(nodes));
0390 }
0391 }
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401 static inline bool is_in_v2_mode(void)
0402 {
0403 return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
0404 (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
0405 }
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418 static void guarantee_online_cpus(struct task_struct *tsk,
0419 struct cpumask *pmask)
0420 {
0421 const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
0422 struct cpuset *cs;
0423
0424 if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
0425 cpumask_copy(pmask, cpu_online_mask);
0426
0427 rcu_read_lock();
0428 cs = task_cs(tsk);
0429
0430 while (!cpumask_intersects(cs->effective_cpus, pmask)) {
0431 cs = parent_cs(cs);
0432 if (unlikely(!cs)) {
0433
0434
0435
0436
0437
0438
0439
0440 goto out_unlock;
0441 }
0442 }
0443 cpumask_and(pmask, pmask, cs->effective_cpus);
0444
0445 out_unlock:
0446 rcu_read_unlock();
0447 }
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460 static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
0461 {
0462 while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
0463 cs = parent_cs(cs);
0464 nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
0465 }
0466
0467
0468
0469
0470
0471
0472 static void cpuset_update_task_spread_flag(struct cpuset *cs,
0473 struct task_struct *tsk)
0474 {
0475 if (is_spread_page(cs))
0476 task_set_spread_page(tsk);
0477 else
0478 task_clear_spread_page(tsk);
0479
0480 if (is_spread_slab(cs))
0481 task_set_spread_slab(tsk);
0482 else
0483 task_clear_spread_slab(tsk);
0484 }
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
0495 {
0496 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
0497 nodes_subset(p->mems_allowed, q->mems_allowed) &&
0498 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
0499 is_mem_exclusive(p) <= is_mem_exclusive(q);
0500 }
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510 static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
0511 {
0512 cpumask_var_t *pmask1, *pmask2, *pmask3;
0513
0514 if (cs) {
0515 pmask1 = &cs->cpus_allowed;
0516 pmask2 = &cs->effective_cpus;
0517 pmask3 = &cs->subparts_cpus;
0518 } else {
0519 pmask1 = &tmp->new_cpus;
0520 pmask2 = &tmp->addmask;
0521 pmask3 = &tmp->delmask;
0522 }
0523
0524 if (!zalloc_cpumask_var(pmask1, GFP_KERNEL))
0525 return -ENOMEM;
0526
0527 if (!zalloc_cpumask_var(pmask2, GFP_KERNEL))
0528 goto free_one;
0529
0530 if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
0531 goto free_two;
0532
0533 return 0;
0534
0535 free_two:
0536 free_cpumask_var(*pmask2);
0537 free_one:
0538 free_cpumask_var(*pmask1);
0539 return -ENOMEM;
0540 }
0541
0542
0543
0544
0545
0546
0547 static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
0548 {
0549 if (cs) {
0550 free_cpumask_var(cs->cpus_allowed);
0551 free_cpumask_var(cs->effective_cpus);
0552 free_cpumask_var(cs->subparts_cpus);
0553 }
0554 if (tmp) {
0555 free_cpumask_var(tmp->new_cpus);
0556 free_cpumask_var(tmp->addmask);
0557 free_cpumask_var(tmp->delmask);
0558 }
0559 }
0560
0561
0562
0563
0564
0565 static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
0566 {
0567 struct cpuset *trial;
0568
0569 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
0570 if (!trial)
0571 return NULL;
0572
0573 if (alloc_cpumasks(trial, NULL)) {
0574 kfree(trial);
0575 return NULL;
0576 }
0577
0578 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
0579 cpumask_copy(trial->effective_cpus, cs->effective_cpus);
0580 return trial;
0581 }
0582
0583
0584
0585
0586
0587 static inline void free_cpuset(struct cpuset *cs)
0588 {
0589 free_cpumasks(cs, NULL);
0590 kfree(cs);
0591 }
0592
0593
0594
0595
0596
0597 static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial)
0598 {
0599 struct cgroup_subsys_state *css;
0600 struct cpuset *c, *par;
0601 int ret;
0602
0603 WARN_ON_ONCE(!rcu_read_lock_held());
0604
0605
0606 ret = -EBUSY;
0607 cpuset_for_each_child(c, css, cur)
0608 if (!is_cpuset_subset(c, trial))
0609 goto out;
0610
0611
0612 ret = -EACCES;
0613 par = parent_cs(cur);
0614 if (par && !is_cpuset_subset(trial, par))
0615 goto out;
0616
0617 ret = 0;
0618 out:
0619 return ret;
0620 }
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642 static int validate_change(struct cpuset *cur, struct cpuset *trial)
0643 {
0644 struct cgroup_subsys_state *css;
0645 struct cpuset *c, *par;
0646 int ret = 0;
0647
0648 rcu_read_lock();
0649
0650 if (!is_in_v2_mode())
0651 ret = validate_change_legacy(cur, trial);
0652 if (ret)
0653 goto out;
0654
0655
0656 if (cur == &top_cpuset)
0657 goto out;
0658
0659 par = parent_cs(cur);
0660
0661
0662
0663
0664
0665 ret = -EINVAL;
0666 cpuset_for_each_child(c, css, par) {
0667 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
0668 c != cur &&
0669 cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
0670 goto out;
0671 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
0672 c != cur &&
0673 nodes_intersects(trial->mems_allowed, c->mems_allowed))
0674 goto out;
0675 }
0676
0677
0678
0679
0680
0681 ret = -ENOSPC;
0682 if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
0683 if (!cpumask_empty(cur->cpus_allowed) &&
0684 cpumask_empty(trial->cpus_allowed))
0685 goto out;
0686 if (!nodes_empty(cur->mems_allowed) &&
0687 nodes_empty(trial->mems_allowed))
0688 goto out;
0689 }
0690
0691
0692
0693
0694
0695 ret = -EBUSY;
0696 if (is_cpu_exclusive(cur) &&
0697 !cpuset_cpumask_can_shrink(cur->cpus_allowed,
0698 trial->cpus_allowed))
0699 goto out;
0700
0701 ret = 0;
0702 out:
0703 rcu_read_unlock();
0704 return ret;
0705 }
0706
0707 #ifdef CONFIG_SMP
0708
0709
0710
0711
0712 static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
0713 {
0714 return cpumask_intersects(a->effective_cpus, b->effective_cpus);
0715 }
0716
0717 static void
0718 update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
0719 {
0720 if (dattr->relax_domain_level < c->relax_domain_level)
0721 dattr->relax_domain_level = c->relax_domain_level;
0722 return;
0723 }
0724
0725 static void update_domain_attr_tree(struct sched_domain_attr *dattr,
0726 struct cpuset *root_cs)
0727 {
0728 struct cpuset *cp;
0729 struct cgroup_subsys_state *pos_css;
0730
0731 rcu_read_lock();
0732 cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
0733
0734 if (cpumask_empty(cp->cpus_allowed)) {
0735 pos_css = css_rightmost_descendant(pos_css);
0736 continue;
0737 }
0738
0739 if (is_sched_load_balance(cp))
0740 update_domain_attr(dattr, cp);
0741 }
0742 rcu_read_unlock();
0743 }
0744
0745
0746 static inline int nr_cpusets(void)
0747 {
0748
0749 return static_key_count(&cpusets_enabled_key.key) + 1;
0750 }
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804
0805 static int generate_sched_domains(cpumask_var_t **domains,
0806 struct sched_domain_attr **attributes)
0807 {
0808 struct cpuset *cp;
0809 struct cpuset **csa;
0810 int csn;
0811 int i, j, k;
0812 cpumask_var_t *doms;
0813 struct sched_domain_attr *dattr;
0814 int ndoms = 0;
0815 int nslot;
0816 struct cgroup_subsys_state *pos_css;
0817 bool root_load_balance = is_sched_load_balance(&top_cpuset);
0818
0819 doms = NULL;
0820 dattr = NULL;
0821 csa = NULL;
0822
0823
0824 if (root_load_balance && !top_cpuset.nr_subparts_cpus) {
0825 ndoms = 1;
0826 doms = alloc_sched_domains(ndoms);
0827 if (!doms)
0828 goto done;
0829
0830 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
0831 if (dattr) {
0832 *dattr = SD_ATTR_INIT;
0833 update_domain_attr_tree(dattr, &top_cpuset);
0834 }
0835 cpumask_and(doms[0], top_cpuset.effective_cpus,
0836 housekeeping_cpumask(HK_TYPE_DOMAIN));
0837
0838 goto done;
0839 }
0840
0841 csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL);
0842 if (!csa)
0843 goto done;
0844 csn = 0;
0845
0846 rcu_read_lock();
0847 if (root_load_balance)
0848 csa[csn++] = &top_cpuset;
0849 cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
0850 if (cp == &top_cpuset)
0851 continue;
0852
0853
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863 if (!cpumask_empty(cp->cpus_allowed) &&
0864 !(is_sched_load_balance(cp) &&
0865 cpumask_intersects(cp->cpus_allowed,
0866 housekeeping_cpumask(HK_TYPE_DOMAIN))))
0867 continue;
0868
0869 if (root_load_balance &&
0870 cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
0871 continue;
0872
0873 if (is_sched_load_balance(cp) &&
0874 !cpumask_empty(cp->effective_cpus))
0875 csa[csn++] = cp;
0876
0877
0878 if (!is_partition_root(cp))
0879 pos_css = css_rightmost_descendant(pos_css);
0880 }
0881 rcu_read_unlock();
0882
0883 for (i = 0; i < csn; i++)
0884 csa[i]->pn = i;
0885 ndoms = csn;
0886
0887 restart:
0888
0889 for (i = 0; i < csn; i++) {
0890 struct cpuset *a = csa[i];
0891 int apn = a->pn;
0892
0893 for (j = 0; j < csn; j++) {
0894 struct cpuset *b = csa[j];
0895 int bpn = b->pn;
0896
0897 if (apn != bpn && cpusets_overlap(a, b)) {
0898 for (k = 0; k < csn; k++) {
0899 struct cpuset *c = csa[k];
0900
0901 if (c->pn == bpn)
0902 c->pn = apn;
0903 }
0904 ndoms--;
0905 goto restart;
0906 }
0907 }
0908 }
0909
0910
0911
0912
0913
0914 doms = alloc_sched_domains(ndoms);
0915 if (!doms)
0916 goto done;
0917
0918
0919
0920
0921
0922 dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr),
0923 GFP_KERNEL);
0924
0925 for (nslot = 0, i = 0; i < csn; i++) {
0926 struct cpuset *a = csa[i];
0927 struct cpumask *dp;
0928 int apn = a->pn;
0929
0930 if (apn < 0) {
0931
0932 continue;
0933 }
0934
0935 dp = doms[nslot];
0936
0937 if (nslot == ndoms) {
0938 static int warnings = 10;
0939 if (warnings) {
0940 pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
0941 nslot, ndoms, csn, i, apn);
0942 warnings--;
0943 }
0944 continue;
0945 }
0946
0947 cpumask_clear(dp);
0948 if (dattr)
0949 *(dattr + nslot) = SD_ATTR_INIT;
0950 for (j = i; j < csn; j++) {
0951 struct cpuset *b = csa[j];
0952
0953 if (apn == b->pn) {
0954 cpumask_or(dp, dp, b->effective_cpus);
0955 cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
0956 if (dattr)
0957 update_domain_attr_tree(dattr + nslot, b);
0958
0959
0960 b->pn = -1;
0961 }
0962 }
0963 nslot++;
0964 }
0965 BUG_ON(nslot != ndoms);
0966
0967 done:
0968 kfree(csa);
0969
0970
0971
0972
0973
0974 if (doms == NULL)
0975 ndoms = 1;
0976
0977 *domains = doms;
0978 *attributes = dattr;
0979 return ndoms;
0980 }
0981
0982 static void update_tasks_root_domain(struct cpuset *cs)
0983 {
0984 struct css_task_iter it;
0985 struct task_struct *task;
0986
0987 css_task_iter_start(&cs->css, 0, &it);
0988
0989 while ((task = css_task_iter_next(&it)))
0990 dl_add_task_root_domain(task);
0991
0992 css_task_iter_end(&it);
0993 }
0994
0995 static void rebuild_root_domains(void)
0996 {
0997 struct cpuset *cs = NULL;
0998 struct cgroup_subsys_state *pos_css;
0999
1000 percpu_rwsem_assert_held(&cpuset_rwsem);
1001 lockdep_assert_cpus_held();
1002 lockdep_assert_held(&sched_domains_mutex);
1003
1004 rcu_read_lock();
1005
1006
1007
1008
1009
1010 dl_clear_root_domain(&def_root_domain);
1011
1012 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
1013
1014 if (cpumask_empty(cs->effective_cpus)) {
1015 pos_css = css_rightmost_descendant(pos_css);
1016 continue;
1017 }
1018
1019 css_get(&cs->css);
1020
1021 rcu_read_unlock();
1022
1023 update_tasks_root_domain(cs);
1024
1025 rcu_read_lock();
1026 css_put(&cs->css);
1027 }
1028 rcu_read_unlock();
1029 }
1030
1031 static void
1032 partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1033 struct sched_domain_attr *dattr_new)
1034 {
1035 mutex_lock(&sched_domains_mutex);
1036 partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
1037 rebuild_root_domains();
1038 mutex_unlock(&sched_domains_mutex);
1039 }
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 static void rebuild_sched_domains_locked(void)
1053 {
1054 struct cgroup_subsys_state *pos_css;
1055 struct sched_domain_attr *attr;
1056 cpumask_var_t *doms;
1057 struct cpuset *cs;
1058 int ndoms;
1059
1060 lockdep_assert_cpus_held();
1061 percpu_rwsem_assert_held(&cpuset_rwsem);
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 if (!top_cpuset.nr_subparts_cpus &&
1073 !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
1074 return;
1075
1076
1077
1078
1079
1080
1081 if (top_cpuset.nr_subparts_cpus) {
1082 rcu_read_lock();
1083 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
1084 if (!is_partition_root(cs)) {
1085 pos_css = css_rightmost_descendant(pos_css);
1086 continue;
1087 }
1088 if (!cpumask_subset(cs->effective_cpus,
1089 cpu_active_mask)) {
1090 rcu_read_unlock();
1091 return;
1092 }
1093 }
1094 rcu_read_unlock();
1095 }
1096
1097
1098 ndoms = generate_sched_domains(&doms, &attr);
1099
1100
1101 partition_and_rebuild_sched_domains(ndoms, doms, attr);
1102 }
1103 #else
1104 static void rebuild_sched_domains_locked(void)
1105 {
1106 }
1107 #endif
1108
1109 void rebuild_sched_domains(void)
1110 {
1111 cpus_read_lock();
1112 percpu_down_write(&cpuset_rwsem);
1113 rebuild_sched_domains_locked();
1114 percpu_up_write(&cpuset_rwsem);
1115 cpus_read_unlock();
1116 }
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126 static void update_tasks_cpumask(struct cpuset *cs)
1127 {
1128 struct css_task_iter it;
1129 struct task_struct *task;
1130
1131 css_task_iter_start(&cs->css, 0, &it);
1132 while ((task = css_task_iter_next(&it)))
1133 set_cpus_allowed_ptr(task, cs->effective_cpus);
1134 css_task_iter_end(&it);
1135 }
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 static void compute_effective_cpumask(struct cpumask *new_cpus,
1149 struct cpuset *cs, struct cpuset *parent)
1150 {
1151 if (parent->nr_subparts_cpus) {
1152 cpumask_or(new_cpus, parent->effective_cpus,
1153 parent->subparts_cpus);
1154 cpumask_and(new_cpus, new_cpus, cs->cpus_allowed);
1155 cpumask_and(new_cpus, new_cpus, cpu_active_mask);
1156 } else {
1157 cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus);
1158 }
1159 }
1160
1161
1162
1163
1164 enum subparts_cmd {
1165 partcmd_enable,
1166 partcmd_disable,
1167 partcmd_update,
1168 };
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210 static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
1211 struct cpumask *newmask,
1212 struct tmpmasks *tmp)
1213 {
1214 struct cpuset *parent = parent_cs(cpuset);
1215 int adding;
1216 int deleting;
1217 int old_prs, new_prs;
1218 bool part_error = false;
1219
1220 percpu_rwsem_assert_held(&cpuset_rwsem);
1221
1222
1223
1224
1225
1226
1227 if (!is_partition_root(parent) ||
1228 (newmask && cpumask_empty(newmask)) ||
1229 (!newmask && cpumask_empty(cpuset->cpus_allowed)))
1230 return -EINVAL;
1231
1232
1233
1234
1235
1236 if ((cmd != partcmd_update) && css_has_online_children(&cpuset->css))
1237 return -EBUSY;
1238
1239
1240
1241
1242
1243
1244 if ((cmd == partcmd_enable) &&
1245 (!cpumask_subset(cpuset->cpus_allowed, parent->effective_cpus) ||
1246 cpumask_equal(cpuset->cpus_allowed, parent->effective_cpus)))
1247 return -EINVAL;
1248
1249
1250
1251
1252 adding = deleting = false;
1253 old_prs = new_prs = cpuset->partition_root_state;
1254 if (cmd == partcmd_enable) {
1255 cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
1256 adding = true;
1257 } else if (cmd == partcmd_disable) {
1258 deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
1259 parent->subparts_cpus);
1260 } else if (newmask) {
1261
1262
1263
1264
1265
1266
1267
1268 cpumask_andnot(tmp->delmask, cpuset->cpus_allowed, newmask);
1269 deleting = cpumask_and(tmp->delmask, tmp->delmask,
1270 parent->subparts_cpus);
1271
1272 cpumask_and(tmp->addmask, newmask, parent->effective_cpus);
1273 adding = cpumask_andnot(tmp->addmask, tmp->addmask,
1274 parent->subparts_cpus);
1275
1276
1277
1278 if (adding &&
1279 cpumask_equal(parent->effective_cpus, tmp->addmask)) {
1280 if (!deleting)
1281 return -EINVAL;
1282
1283
1284
1285
1286
1287 if (!cpumask_and(tmp->addmask, tmp->delmask,
1288 cpu_active_mask))
1289 return -EINVAL;
1290 cpumask_copy(tmp->addmask, parent->effective_cpus);
1291 }
1292 } else {
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302 adding = cpumask_and(tmp->addmask, cpuset->cpus_allowed,
1303 parent->effective_cpus);
1304 part_error = cpumask_equal(tmp->addmask,
1305 parent->effective_cpus);
1306 }
1307
1308 if (cmd == partcmd_update) {
1309 int prev_prs = cpuset->partition_root_state;
1310
1311
1312
1313
1314
1315 switch (cpuset->partition_root_state) {
1316 case PRS_ENABLED:
1317 if (part_error)
1318 new_prs = PRS_ERROR;
1319 break;
1320 case PRS_ERROR:
1321 if (!part_error)
1322 new_prs = PRS_ENABLED;
1323 break;
1324 }
1325
1326
1327
1328 part_error = (prev_prs == PRS_ERROR);
1329 }
1330
1331 if (!part_error && (new_prs == PRS_ERROR))
1332 return 0;
1333
1334 if (new_prs == PRS_ERROR) {
1335
1336
1337
1338 adding = false;
1339 deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
1340 parent->subparts_cpus);
1341 }
1342
1343 if (!adding && !deleting && (new_prs == old_prs))
1344 return 0;
1345
1346
1347
1348
1349
1350
1351 spin_lock_irq(&callback_lock);
1352 if (adding) {
1353 cpumask_or(parent->subparts_cpus,
1354 parent->subparts_cpus, tmp->addmask);
1355 cpumask_andnot(parent->effective_cpus,
1356 parent->effective_cpus, tmp->addmask);
1357 }
1358 if (deleting) {
1359 cpumask_andnot(parent->subparts_cpus,
1360 parent->subparts_cpus, tmp->delmask);
1361
1362
1363
1364 cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask);
1365 cpumask_or(parent->effective_cpus,
1366 parent->effective_cpus, tmp->delmask);
1367 }
1368
1369 parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
1370
1371 if (old_prs != new_prs)
1372 cpuset->partition_root_state = new_prs;
1373
1374 spin_unlock_irq(&callback_lock);
1375 notify_partition_change(cpuset, old_prs, new_prs);
1376
1377 return cmd == partcmd_update;
1378 }
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
1393 {
1394 struct cpuset *cp;
1395 struct cgroup_subsys_state *pos_css;
1396 bool need_rebuild_sched_domains = false;
1397 int old_prs, new_prs;
1398
1399 rcu_read_lock();
1400 cpuset_for_each_descendant_pre(cp, pos_css, cs) {
1401 struct cpuset *parent = parent_cs(cp);
1402
1403 compute_effective_cpumask(tmp->new_cpus, cp, parent);
1404
1405
1406
1407
1408
1409 if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) {
1410 cpumask_copy(tmp->new_cpus, parent->effective_cpus);
1411 if (!cp->use_parent_ecpus) {
1412 cp->use_parent_ecpus = true;
1413 parent->child_ecpus_count++;
1414 }
1415 } else if (cp->use_parent_ecpus) {
1416 cp->use_parent_ecpus = false;
1417 WARN_ON_ONCE(!parent->child_ecpus_count);
1418 parent->child_ecpus_count--;
1419 }
1420
1421
1422
1423
1424
1425 if (!cp->partition_root_state &&
1426 cpumask_equal(tmp->new_cpus, cp->effective_cpus)) {
1427 pos_css = css_rightmost_descendant(pos_css);
1428 continue;
1429 }
1430
1431
1432
1433
1434
1435
1436
1437 old_prs = new_prs = cp->partition_root_state;
1438 if ((cp != cs) && old_prs) {
1439 switch (parent->partition_root_state) {
1440 case PRS_DISABLED:
1441
1442
1443
1444
1445
1446 WARN_ON_ONCE(cp->partition_root_state
1447 != PRS_ERROR);
1448 new_prs = PRS_DISABLED;
1449
1450
1451
1452
1453
1454
1455
1456
1457 clear_bit(CS_CPU_EXCLUSIVE, &cp->flags);
1458 break;
1459
1460 case PRS_ENABLED:
1461 if (update_parent_subparts_cpumask(cp, partcmd_update, NULL, tmp))
1462 update_tasks_cpumask(parent);
1463 break;
1464
1465 case PRS_ERROR:
1466
1467
1468
1469 new_prs = PRS_ERROR;
1470 break;
1471 }
1472 }
1473
1474 if (!css_tryget_online(&cp->css))
1475 continue;
1476 rcu_read_unlock();
1477
1478 spin_lock_irq(&callback_lock);
1479
1480 cpumask_copy(cp->effective_cpus, tmp->new_cpus);
1481 if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
1482 cp->nr_subparts_cpus = 0;
1483 cpumask_clear(cp->subparts_cpus);
1484 } else if (cp->nr_subparts_cpus) {
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 cpumask_andnot(cp->effective_cpus, cp->effective_cpus,
1495 cp->subparts_cpus);
1496 if (cpumask_empty(cp->effective_cpus)) {
1497 cpumask_copy(cp->effective_cpus, tmp->new_cpus);
1498 cpumask_clear(cp->subparts_cpus);
1499 cp->nr_subparts_cpus = 0;
1500 } else if (!cpumask_subset(cp->subparts_cpus,
1501 tmp->new_cpus)) {
1502 cpumask_andnot(cp->subparts_cpus,
1503 cp->subparts_cpus, tmp->new_cpus);
1504 cp->nr_subparts_cpus
1505 = cpumask_weight(cp->subparts_cpus);
1506 }
1507 }
1508
1509 if (new_prs != old_prs)
1510 cp->partition_root_state = new_prs;
1511
1512 spin_unlock_irq(&callback_lock);
1513 notify_partition_change(cp, old_prs, new_prs);
1514
1515 WARN_ON(!is_in_v2_mode() &&
1516 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
1517
1518 update_tasks_cpumask(cp);
1519
1520
1521
1522
1523
1524
1525
1526 if (!cpumask_empty(cp->cpus_allowed) &&
1527 is_sched_load_balance(cp) &&
1528 (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
1529 is_partition_root(cp)))
1530 need_rebuild_sched_domains = true;
1531
1532 rcu_read_lock();
1533 css_put(&cp->css);
1534 }
1535 rcu_read_unlock();
1536
1537 if (need_rebuild_sched_domains)
1538 rebuild_sched_domains_locked();
1539 }
1540
1541
1542
1543
1544
1545
1546
1547 static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
1548 struct tmpmasks *tmp)
1549 {
1550 struct cpuset *sibling;
1551 struct cgroup_subsys_state *pos_css;
1552
1553 percpu_rwsem_assert_held(&cpuset_rwsem);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563 rcu_read_lock();
1564 cpuset_for_each_child(sibling, pos_css, parent) {
1565 if (sibling == cs)
1566 continue;
1567 if (!sibling->use_parent_ecpus)
1568 continue;
1569 if (!css_tryget_online(&sibling->css))
1570 continue;
1571
1572 rcu_read_unlock();
1573 update_cpumasks_hier(sibling, tmp);
1574 rcu_read_lock();
1575 css_put(&sibling->css);
1576 }
1577 rcu_read_unlock();
1578 }
1579
1580
1581
1582
1583
1584
1585
1586 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
1587 const char *buf)
1588 {
1589 int retval;
1590 struct tmpmasks tmp;
1591
1592
1593 if (cs == &top_cpuset)
1594 return -EACCES;
1595
1596
1597
1598
1599
1600
1601
1602 if (!*buf) {
1603 cpumask_clear(trialcs->cpus_allowed);
1604 } else {
1605 retval = cpulist_parse(buf, trialcs->cpus_allowed);
1606 if (retval < 0)
1607 return retval;
1608
1609 if (!cpumask_subset(trialcs->cpus_allowed,
1610 top_cpuset.cpus_allowed))
1611 return -EINVAL;
1612 }
1613
1614
1615 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
1616 return 0;
1617
1618 retval = validate_change(cs, trialcs);
1619 if (retval < 0)
1620 return retval;
1621
1622 #ifdef CONFIG_CPUMASK_OFFSTACK
1623
1624
1625
1626
1627 tmp.addmask = trialcs->subparts_cpus;
1628 tmp.delmask = trialcs->effective_cpus;
1629 tmp.new_cpus = trialcs->cpus_allowed;
1630 #endif
1631
1632 if (cs->partition_root_state) {
1633
1634 if (cpumask_empty(trialcs->cpus_allowed))
1635 return -EINVAL;
1636 if (update_parent_subparts_cpumask(cs, partcmd_update,
1637 trialcs->cpus_allowed, &tmp) < 0)
1638 return -EINVAL;
1639 }
1640
1641 spin_lock_irq(&callback_lock);
1642 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
1643
1644
1645
1646
1647 if (cs->nr_subparts_cpus) {
1648 cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
1649 cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
1650 }
1651 spin_unlock_irq(&callback_lock);
1652
1653 update_cpumasks_hier(cs, &tmp);
1654
1655 if (cs->partition_root_state) {
1656 struct cpuset *parent = parent_cs(cs);
1657
1658
1659
1660
1661
1662 if (parent->child_ecpus_count)
1663 update_sibling_cpumasks(parent, cs, &tmp);
1664 }
1665 return 0;
1666 }
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676 struct cpuset_migrate_mm_work {
1677 struct work_struct work;
1678 struct mm_struct *mm;
1679 nodemask_t from;
1680 nodemask_t to;
1681 };
1682
1683 static void cpuset_migrate_mm_workfn(struct work_struct *work)
1684 {
1685 struct cpuset_migrate_mm_work *mwork =
1686 container_of(work, struct cpuset_migrate_mm_work, work);
1687
1688
1689 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
1690 mmput(mwork->mm);
1691 kfree(mwork);
1692 }
1693
1694 static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
1695 const nodemask_t *to)
1696 {
1697 struct cpuset_migrate_mm_work *mwork;
1698
1699 if (nodes_equal(*from, *to)) {
1700 mmput(mm);
1701 return;
1702 }
1703
1704 mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
1705 if (mwork) {
1706 mwork->mm = mm;
1707 mwork->from = *from;
1708 mwork->to = *to;
1709 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
1710 queue_work(cpuset_migrate_mm_wq, &mwork->work);
1711 } else {
1712 mmput(mm);
1713 }
1714 }
1715
1716 static void cpuset_post_attach(void)
1717 {
1718 flush_workqueue(cpuset_migrate_mm_wq);
1719 }
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731 static void cpuset_change_task_nodemask(struct task_struct *tsk,
1732 nodemask_t *newmems)
1733 {
1734 task_lock(tsk);
1735
1736 local_irq_disable();
1737 write_seqcount_begin(&tsk->mems_allowed_seq);
1738
1739 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
1740 mpol_rebind_task(tsk, newmems);
1741 tsk->mems_allowed = *newmems;
1742
1743 write_seqcount_end(&tsk->mems_allowed_seq);
1744 local_irq_enable();
1745
1746 task_unlock(tsk);
1747 }
1748
1749 static void *cpuset_being_rebound;
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759 static void update_tasks_nodemask(struct cpuset *cs)
1760 {
1761 static nodemask_t newmems;
1762 struct css_task_iter it;
1763 struct task_struct *task;
1764
1765 cpuset_being_rebound = cs;
1766
1767 guarantee_online_mems(cs, &newmems);
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779 css_task_iter_start(&cs->css, 0, &it);
1780 while ((task = css_task_iter_next(&it))) {
1781 struct mm_struct *mm;
1782 bool migrate;
1783
1784 cpuset_change_task_nodemask(task, &newmems);
1785
1786 mm = get_task_mm(task);
1787 if (!mm)
1788 continue;
1789
1790 migrate = is_memory_migrate(cs);
1791
1792 mpol_rebind_mm(mm, &cs->mems_allowed);
1793 if (migrate)
1794 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
1795 else
1796 mmput(mm);
1797 }
1798 css_task_iter_end(&it);
1799
1800
1801
1802
1803
1804 cs->old_mems_allowed = newmems;
1805
1806
1807 cpuset_being_rebound = NULL;
1808 }
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822 static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1823 {
1824 struct cpuset *cp;
1825 struct cgroup_subsys_state *pos_css;
1826
1827 rcu_read_lock();
1828 cpuset_for_each_descendant_pre(cp, pos_css, cs) {
1829 struct cpuset *parent = parent_cs(cp);
1830
1831 nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
1832
1833
1834
1835
1836
1837 if (is_in_v2_mode() && nodes_empty(*new_mems))
1838 *new_mems = parent->effective_mems;
1839
1840
1841 if (nodes_equal(*new_mems, cp->effective_mems)) {
1842 pos_css = css_rightmost_descendant(pos_css);
1843 continue;
1844 }
1845
1846 if (!css_tryget_online(&cp->css))
1847 continue;
1848 rcu_read_unlock();
1849
1850 spin_lock_irq(&callback_lock);
1851 cp->effective_mems = *new_mems;
1852 spin_unlock_irq(&callback_lock);
1853
1854 WARN_ON(!is_in_v2_mode() &&
1855 !nodes_equal(cp->mems_allowed, cp->effective_mems));
1856
1857 update_tasks_nodemask(cp);
1858
1859 rcu_read_lock();
1860 css_put(&cp->css);
1861 }
1862 rcu_read_unlock();
1863 }
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878 static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1879 const char *buf)
1880 {
1881 int retval;
1882
1883
1884
1885
1886
1887 if (cs == &top_cpuset) {
1888 retval = -EACCES;
1889 goto done;
1890 }
1891
1892
1893
1894
1895
1896
1897
1898 if (!*buf) {
1899 nodes_clear(trialcs->mems_allowed);
1900 } else {
1901 retval = nodelist_parse(buf, trialcs->mems_allowed);
1902 if (retval < 0)
1903 goto done;
1904
1905 if (!nodes_subset(trialcs->mems_allowed,
1906 top_cpuset.mems_allowed)) {
1907 retval = -EINVAL;
1908 goto done;
1909 }
1910 }
1911
1912 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
1913 retval = 0;
1914 goto done;
1915 }
1916 retval = validate_change(cs, trialcs);
1917 if (retval < 0)
1918 goto done;
1919
1920 check_insane_mems_config(&trialcs->mems_allowed);
1921
1922 spin_lock_irq(&callback_lock);
1923 cs->mems_allowed = trialcs->mems_allowed;
1924 spin_unlock_irq(&callback_lock);
1925
1926
1927 update_nodemasks_hier(cs, &trialcs->mems_allowed);
1928 done:
1929 return retval;
1930 }
1931
1932 bool current_cpuset_is_being_rebound(void)
1933 {
1934 bool ret;
1935
1936 rcu_read_lock();
1937 ret = task_cs(current) == cpuset_being_rebound;
1938 rcu_read_unlock();
1939
1940 return ret;
1941 }
1942
1943 static int update_relax_domain_level(struct cpuset *cs, s64 val)
1944 {
1945 #ifdef CONFIG_SMP
1946 if (val < -1 || val >= sched_domain_level_max)
1947 return -EINVAL;
1948 #endif
1949
1950 if (val != cs->relax_domain_level) {
1951 cs->relax_domain_level = val;
1952 if (!cpumask_empty(cs->cpus_allowed) &&
1953 is_sched_load_balance(cs))
1954 rebuild_sched_domains_locked();
1955 }
1956
1957 return 0;
1958 }
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968 static void update_tasks_flags(struct cpuset *cs)
1969 {
1970 struct css_task_iter it;
1971 struct task_struct *task;
1972
1973 css_task_iter_start(&cs->css, 0, &it);
1974 while ((task = css_task_iter_next(&it)))
1975 cpuset_update_task_spread_flag(cs, task);
1976 css_task_iter_end(&it);
1977 }
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1989 int turning_on)
1990 {
1991 struct cpuset *trialcs;
1992 int balance_flag_changed;
1993 int spread_flag_changed;
1994 int err;
1995
1996 trialcs = alloc_trial_cpuset(cs);
1997 if (!trialcs)
1998 return -ENOMEM;
1999
2000 if (turning_on)
2001 set_bit(bit, &trialcs->flags);
2002 else
2003 clear_bit(bit, &trialcs->flags);
2004
2005 err = validate_change(cs, trialcs);
2006 if (err < 0)
2007 goto out;
2008
2009 balance_flag_changed = (is_sched_load_balance(cs) !=
2010 is_sched_load_balance(trialcs));
2011
2012 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
2013 || (is_spread_page(cs) != is_spread_page(trialcs)));
2014
2015 spin_lock_irq(&callback_lock);
2016 cs->flags = trialcs->flags;
2017 spin_unlock_irq(&callback_lock);
2018
2019 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
2020 rebuild_sched_domains_locked();
2021
2022 if (spread_flag_changed)
2023 update_tasks_flags(cs);
2024 out:
2025 free_cpuset(trialcs);
2026 return err;
2027 }
2028
2029
2030
2031
2032
2033
2034
2035
2036 static int update_prstate(struct cpuset *cs, int new_prs)
2037 {
2038 int err, old_prs = cs->partition_root_state;
2039 struct cpuset *parent = parent_cs(cs);
2040 struct tmpmasks tmpmask;
2041
2042 if (old_prs == new_prs)
2043 return 0;
2044
2045
2046
2047
2048
2049 if (new_prs && (old_prs == PRS_ERROR))
2050 return -EINVAL;
2051
2052 if (alloc_cpumasks(NULL, &tmpmask))
2053 return -ENOMEM;
2054
2055 err = -EINVAL;
2056 if (!old_prs) {
2057
2058
2059
2060
2061
2062 if (cpumask_empty(cs->cpus_allowed))
2063 goto out;
2064
2065 err = update_flag(CS_CPU_EXCLUSIVE, cs, 1);
2066 if (err)
2067 goto out;
2068
2069 err = update_parent_subparts_cpumask(cs, partcmd_enable,
2070 NULL, &tmpmask);
2071 if (err) {
2072 update_flag(CS_CPU_EXCLUSIVE, cs, 0);
2073 goto out;
2074 }
2075 } else {
2076
2077
2078
2079
2080 if (old_prs == PRS_ERROR) {
2081 update_flag(CS_CPU_EXCLUSIVE, cs, 0);
2082 err = 0;
2083 goto out;
2084 }
2085
2086 err = update_parent_subparts_cpumask(cs, partcmd_disable,
2087 NULL, &tmpmask);
2088 if (err)
2089 goto out;
2090
2091
2092 update_flag(CS_CPU_EXCLUSIVE, cs, 0);
2093 }
2094
2095
2096
2097
2098
2099 if (parent != &top_cpuset)
2100 update_tasks_cpumask(parent);
2101
2102 if (parent->child_ecpus_count)
2103 update_sibling_cpumasks(parent, cs, &tmpmask);
2104
2105 rebuild_sched_domains_locked();
2106 out:
2107 if (!err) {
2108 spin_lock_irq(&callback_lock);
2109 cs->partition_root_state = new_prs;
2110 spin_unlock_irq(&callback_lock);
2111 notify_partition_change(cs, old_prs, new_prs);
2112 }
2113
2114 free_cpumasks(NULL, &tmpmask);
2115 return err;
2116 }
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163 #define FM_COEF 933
2164 #define FM_MAXTICKS ((u32)99)
2165 #define FM_MAXCNT 1000000
2166 #define FM_SCALE 1000
2167
2168
2169 static void fmeter_init(struct fmeter *fmp)
2170 {
2171 fmp->cnt = 0;
2172 fmp->val = 0;
2173 fmp->time = 0;
2174 spin_lock_init(&fmp->lock);
2175 }
2176
2177
2178 static void fmeter_update(struct fmeter *fmp)
2179 {
2180 time64_t now;
2181 u32 ticks;
2182
2183 now = ktime_get_seconds();
2184 ticks = now - fmp->time;
2185
2186 if (ticks == 0)
2187 return;
2188
2189 ticks = min(FM_MAXTICKS, ticks);
2190 while (ticks-- > 0)
2191 fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
2192 fmp->time = now;
2193
2194 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
2195 fmp->cnt = 0;
2196 }
2197
2198
2199 static void fmeter_markevent(struct fmeter *fmp)
2200 {
2201 spin_lock(&fmp->lock);
2202 fmeter_update(fmp);
2203 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
2204 spin_unlock(&fmp->lock);
2205 }
2206
2207
2208 static int fmeter_getrate(struct fmeter *fmp)
2209 {
2210 int val;
2211
2212 spin_lock(&fmp->lock);
2213 fmeter_update(fmp);
2214 val = fmp->val;
2215 spin_unlock(&fmp->lock);
2216 return val;
2217 }
2218
2219 static struct cpuset *cpuset_attach_old_cs;
2220
2221
2222 static int cpuset_can_attach(struct cgroup_taskset *tset)
2223 {
2224 struct cgroup_subsys_state *css;
2225 struct cpuset *cs;
2226 struct task_struct *task;
2227 int ret;
2228
2229
2230 cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
2231 cs = css_cs(css);
2232
2233 percpu_down_write(&cpuset_rwsem);
2234
2235
2236 ret = -ENOSPC;
2237 if (!is_in_v2_mode() &&
2238 (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
2239 goto out_unlock;
2240
2241 cgroup_taskset_for_each(task, css, tset) {
2242 ret = task_can_attach(task, cs->effective_cpus);
2243 if (ret)
2244 goto out_unlock;
2245 ret = security_task_setscheduler(task);
2246 if (ret)
2247 goto out_unlock;
2248 }
2249
2250
2251
2252
2253
2254 cs->attach_in_progress++;
2255 ret = 0;
2256 out_unlock:
2257 percpu_up_write(&cpuset_rwsem);
2258 return ret;
2259 }
2260
2261 static void cpuset_cancel_attach(struct cgroup_taskset *tset)
2262 {
2263 struct cgroup_subsys_state *css;
2264
2265 cgroup_taskset_first(tset, &css);
2266
2267 percpu_down_write(&cpuset_rwsem);
2268 css_cs(css)->attach_in_progress--;
2269 percpu_up_write(&cpuset_rwsem);
2270 }
2271
2272
2273
2274
2275
2276
2277 static cpumask_var_t cpus_attach;
2278
2279 static void cpuset_attach(struct cgroup_taskset *tset)
2280 {
2281
2282 static nodemask_t cpuset_attach_nodemask_to;
2283 struct task_struct *task;
2284 struct task_struct *leader;
2285 struct cgroup_subsys_state *css;
2286 struct cpuset *cs;
2287 struct cpuset *oldcs = cpuset_attach_old_cs;
2288
2289 cgroup_taskset_first(tset, &css);
2290 cs = css_cs(css);
2291
2292 lockdep_assert_cpus_held();
2293 percpu_down_write(&cpuset_rwsem);
2294
2295 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
2296
2297 cgroup_taskset_for_each(task, css, tset) {
2298 if (cs != &top_cpuset)
2299 guarantee_online_cpus(task, cpus_attach);
2300 else
2301 cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
2302
2303
2304
2305
2306 WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
2307
2308 cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
2309 cpuset_update_task_spread_flag(cs, task);
2310 }
2311
2312
2313
2314
2315
2316 cpuset_attach_nodemask_to = cs->effective_mems;
2317 cgroup_taskset_for_each_leader(leader, css, tset) {
2318 struct mm_struct *mm = get_task_mm(leader);
2319
2320 if (mm) {
2321 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331 if (is_memory_migrate(cs))
2332 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
2333 &cpuset_attach_nodemask_to);
2334 else
2335 mmput(mm);
2336 }
2337 }
2338
2339 cs->old_mems_allowed = cpuset_attach_nodemask_to;
2340
2341 cs->attach_in_progress--;
2342 if (!cs->attach_in_progress)
2343 wake_up(&cpuset_attach_wq);
2344
2345 percpu_up_write(&cpuset_rwsem);
2346 }
2347
2348
2349
2350 typedef enum {
2351 FILE_MEMORY_MIGRATE,
2352 FILE_CPULIST,
2353 FILE_MEMLIST,
2354 FILE_EFFECTIVE_CPULIST,
2355 FILE_EFFECTIVE_MEMLIST,
2356 FILE_SUBPARTS_CPULIST,
2357 FILE_CPU_EXCLUSIVE,
2358 FILE_MEM_EXCLUSIVE,
2359 FILE_MEM_HARDWALL,
2360 FILE_SCHED_LOAD_BALANCE,
2361 FILE_PARTITION_ROOT,
2362 FILE_SCHED_RELAX_DOMAIN_LEVEL,
2363 FILE_MEMORY_PRESSURE_ENABLED,
2364 FILE_MEMORY_PRESSURE,
2365 FILE_SPREAD_PAGE,
2366 FILE_SPREAD_SLAB,
2367 } cpuset_filetype_t;
2368
2369 static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
2370 u64 val)
2371 {
2372 struct cpuset *cs = css_cs(css);
2373 cpuset_filetype_t type = cft->private;
2374 int retval = 0;
2375
2376 cpus_read_lock();
2377 percpu_down_write(&cpuset_rwsem);
2378 if (!is_cpuset_online(cs)) {
2379 retval = -ENODEV;
2380 goto out_unlock;
2381 }
2382
2383 switch (type) {
2384 case FILE_CPU_EXCLUSIVE:
2385 retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
2386 break;
2387 case FILE_MEM_EXCLUSIVE:
2388 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
2389 break;
2390 case FILE_MEM_HARDWALL:
2391 retval = update_flag(CS_MEM_HARDWALL, cs, val);
2392 break;
2393 case FILE_SCHED_LOAD_BALANCE:
2394 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
2395 break;
2396 case FILE_MEMORY_MIGRATE:
2397 retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
2398 break;
2399 case FILE_MEMORY_PRESSURE_ENABLED:
2400 cpuset_memory_pressure_enabled = !!val;
2401 break;
2402 case FILE_SPREAD_PAGE:
2403 retval = update_flag(CS_SPREAD_PAGE, cs, val);
2404 break;
2405 case FILE_SPREAD_SLAB:
2406 retval = update_flag(CS_SPREAD_SLAB, cs, val);
2407 break;
2408 default:
2409 retval = -EINVAL;
2410 break;
2411 }
2412 out_unlock:
2413 percpu_up_write(&cpuset_rwsem);
2414 cpus_read_unlock();
2415 return retval;
2416 }
2417
2418 static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
2419 s64 val)
2420 {
2421 struct cpuset *cs = css_cs(css);
2422 cpuset_filetype_t type = cft->private;
2423 int retval = -ENODEV;
2424
2425 cpus_read_lock();
2426 percpu_down_write(&cpuset_rwsem);
2427 if (!is_cpuset_online(cs))
2428 goto out_unlock;
2429
2430 switch (type) {
2431 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
2432 retval = update_relax_domain_level(cs, val);
2433 break;
2434 default:
2435 retval = -EINVAL;
2436 break;
2437 }
2438 out_unlock:
2439 percpu_up_write(&cpuset_rwsem);
2440 cpus_read_unlock();
2441 return retval;
2442 }
2443
2444
2445
2446
2447 static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
2448 char *buf, size_t nbytes, loff_t off)
2449 {
2450 struct cpuset *cs = css_cs(of_css(of));
2451 struct cpuset *trialcs;
2452 int retval = -ENODEV;
2453
2454 buf = strstrip(buf);
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475 css_get(&cs->css);
2476 kernfs_break_active_protection(of->kn);
2477 flush_work(&cpuset_hotplug_work);
2478
2479 cpus_read_lock();
2480 percpu_down_write(&cpuset_rwsem);
2481 if (!is_cpuset_online(cs))
2482 goto out_unlock;
2483
2484 trialcs = alloc_trial_cpuset(cs);
2485 if (!trialcs) {
2486 retval = -ENOMEM;
2487 goto out_unlock;
2488 }
2489
2490 switch (of_cft(of)->private) {
2491 case FILE_CPULIST:
2492 retval = update_cpumask(cs, trialcs, buf);
2493 break;
2494 case FILE_MEMLIST:
2495 retval = update_nodemask(cs, trialcs, buf);
2496 break;
2497 default:
2498 retval = -EINVAL;
2499 break;
2500 }
2501
2502 free_cpuset(trialcs);
2503 out_unlock:
2504 percpu_up_write(&cpuset_rwsem);
2505 cpus_read_unlock();
2506 kernfs_unbreak_active_protection(of->kn);
2507 css_put(&cs->css);
2508 flush_workqueue(cpuset_migrate_mm_wq);
2509 return retval ?: nbytes;
2510 }
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520 static int cpuset_common_seq_show(struct seq_file *sf, void *v)
2521 {
2522 struct cpuset *cs = css_cs(seq_css(sf));
2523 cpuset_filetype_t type = seq_cft(sf)->private;
2524 int ret = 0;
2525
2526 spin_lock_irq(&callback_lock);
2527
2528 switch (type) {
2529 case FILE_CPULIST:
2530 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
2531 break;
2532 case FILE_MEMLIST:
2533 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
2534 break;
2535 case FILE_EFFECTIVE_CPULIST:
2536 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
2537 break;
2538 case FILE_EFFECTIVE_MEMLIST:
2539 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
2540 break;
2541 case FILE_SUBPARTS_CPULIST:
2542 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus));
2543 break;
2544 default:
2545 ret = -EINVAL;
2546 }
2547
2548 spin_unlock_irq(&callback_lock);
2549 return ret;
2550 }
2551
2552 static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
2553 {
2554 struct cpuset *cs = css_cs(css);
2555 cpuset_filetype_t type = cft->private;
2556 switch (type) {
2557 case FILE_CPU_EXCLUSIVE:
2558 return is_cpu_exclusive(cs);
2559 case FILE_MEM_EXCLUSIVE:
2560 return is_mem_exclusive(cs);
2561 case FILE_MEM_HARDWALL:
2562 return is_mem_hardwall(cs);
2563 case FILE_SCHED_LOAD_BALANCE:
2564 return is_sched_load_balance(cs);
2565 case FILE_MEMORY_MIGRATE:
2566 return is_memory_migrate(cs);
2567 case FILE_MEMORY_PRESSURE_ENABLED:
2568 return cpuset_memory_pressure_enabled;
2569 case FILE_MEMORY_PRESSURE:
2570 return fmeter_getrate(&cs->fmeter);
2571 case FILE_SPREAD_PAGE:
2572 return is_spread_page(cs);
2573 case FILE_SPREAD_SLAB:
2574 return is_spread_slab(cs);
2575 default:
2576 BUG();
2577 }
2578
2579
2580 return 0;
2581 }
2582
2583 static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
2584 {
2585 struct cpuset *cs = css_cs(css);
2586 cpuset_filetype_t type = cft->private;
2587 switch (type) {
2588 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
2589 return cs->relax_domain_level;
2590 default:
2591 BUG();
2592 }
2593
2594
2595 return 0;
2596 }
2597
2598 static int sched_partition_show(struct seq_file *seq, void *v)
2599 {
2600 struct cpuset *cs = css_cs(seq_css(seq));
2601
2602 switch (cs->partition_root_state) {
2603 case PRS_ENABLED:
2604 seq_puts(seq, "root\n");
2605 break;
2606 case PRS_DISABLED:
2607 seq_puts(seq, "member\n");
2608 break;
2609 case PRS_ERROR:
2610 seq_puts(seq, "root invalid\n");
2611 break;
2612 }
2613 return 0;
2614 }
2615
2616 static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
2617 size_t nbytes, loff_t off)
2618 {
2619 struct cpuset *cs = css_cs(of_css(of));
2620 int val;
2621 int retval = -ENODEV;
2622
2623 buf = strstrip(buf);
2624
2625
2626
2627
2628 if (!strcmp(buf, "root"))
2629 val = PRS_ENABLED;
2630 else if (!strcmp(buf, "member"))
2631 val = PRS_DISABLED;
2632 else
2633 return -EINVAL;
2634
2635 css_get(&cs->css);
2636 cpus_read_lock();
2637 percpu_down_write(&cpuset_rwsem);
2638 if (!is_cpuset_online(cs))
2639 goto out_unlock;
2640
2641 retval = update_prstate(cs, val);
2642 out_unlock:
2643 percpu_up_write(&cpuset_rwsem);
2644 cpus_read_unlock();
2645 css_put(&cs->css);
2646 return retval ?: nbytes;
2647 }
2648
2649
2650
2651
2652
2653 static struct cftype legacy_files[] = {
2654 {
2655 .name = "cpus",
2656 .seq_show = cpuset_common_seq_show,
2657 .write = cpuset_write_resmask,
2658 .max_write_len = (100U + 6 * NR_CPUS),
2659 .private = FILE_CPULIST,
2660 },
2661
2662 {
2663 .name = "mems",
2664 .seq_show = cpuset_common_seq_show,
2665 .write = cpuset_write_resmask,
2666 .max_write_len = (100U + 6 * MAX_NUMNODES),
2667 .private = FILE_MEMLIST,
2668 },
2669
2670 {
2671 .name = "effective_cpus",
2672 .seq_show = cpuset_common_seq_show,
2673 .private = FILE_EFFECTIVE_CPULIST,
2674 },
2675
2676 {
2677 .name = "effective_mems",
2678 .seq_show = cpuset_common_seq_show,
2679 .private = FILE_EFFECTIVE_MEMLIST,
2680 },
2681
2682 {
2683 .name = "cpu_exclusive",
2684 .read_u64 = cpuset_read_u64,
2685 .write_u64 = cpuset_write_u64,
2686 .private = FILE_CPU_EXCLUSIVE,
2687 },
2688
2689 {
2690 .name = "mem_exclusive",
2691 .read_u64 = cpuset_read_u64,
2692 .write_u64 = cpuset_write_u64,
2693 .private = FILE_MEM_EXCLUSIVE,
2694 },
2695
2696 {
2697 .name = "mem_hardwall",
2698 .read_u64 = cpuset_read_u64,
2699 .write_u64 = cpuset_write_u64,
2700 .private = FILE_MEM_HARDWALL,
2701 },
2702
2703 {
2704 .name = "sched_load_balance",
2705 .read_u64 = cpuset_read_u64,
2706 .write_u64 = cpuset_write_u64,
2707 .private = FILE_SCHED_LOAD_BALANCE,
2708 },
2709
2710 {
2711 .name = "sched_relax_domain_level",
2712 .read_s64 = cpuset_read_s64,
2713 .write_s64 = cpuset_write_s64,
2714 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
2715 },
2716
2717 {
2718 .name = "memory_migrate",
2719 .read_u64 = cpuset_read_u64,
2720 .write_u64 = cpuset_write_u64,
2721 .private = FILE_MEMORY_MIGRATE,
2722 },
2723
2724 {
2725 .name = "memory_pressure",
2726 .read_u64 = cpuset_read_u64,
2727 .private = FILE_MEMORY_PRESSURE,
2728 },
2729
2730 {
2731 .name = "memory_spread_page",
2732 .read_u64 = cpuset_read_u64,
2733 .write_u64 = cpuset_write_u64,
2734 .private = FILE_SPREAD_PAGE,
2735 },
2736
2737 {
2738 .name = "memory_spread_slab",
2739 .read_u64 = cpuset_read_u64,
2740 .write_u64 = cpuset_write_u64,
2741 .private = FILE_SPREAD_SLAB,
2742 },
2743
2744 {
2745 .name = "memory_pressure_enabled",
2746 .flags = CFTYPE_ONLY_ON_ROOT,
2747 .read_u64 = cpuset_read_u64,
2748 .write_u64 = cpuset_write_u64,
2749 .private = FILE_MEMORY_PRESSURE_ENABLED,
2750 },
2751
2752 { }
2753 };
2754
2755
2756
2757
2758
2759 static struct cftype dfl_files[] = {
2760 {
2761 .name = "cpus",
2762 .seq_show = cpuset_common_seq_show,
2763 .write = cpuset_write_resmask,
2764 .max_write_len = (100U + 6 * NR_CPUS),
2765 .private = FILE_CPULIST,
2766 .flags = CFTYPE_NOT_ON_ROOT,
2767 },
2768
2769 {
2770 .name = "mems",
2771 .seq_show = cpuset_common_seq_show,
2772 .write = cpuset_write_resmask,
2773 .max_write_len = (100U + 6 * MAX_NUMNODES),
2774 .private = FILE_MEMLIST,
2775 .flags = CFTYPE_NOT_ON_ROOT,
2776 },
2777
2778 {
2779 .name = "cpus.effective",
2780 .seq_show = cpuset_common_seq_show,
2781 .private = FILE_EFFECTIVE_CPULIST,
2782 },
2783
2784 {
2785 .name = "mems.effective",
2786 .seq_show = cpuset_common_seq_show,
2787 .private = FILE_EFFECTIVE_MEMLIST,
2788 },
2789
2790 {
2791 .name = "cpus.partition",
2792 .seq_show = sched_partition_show,
2793 .write = sched_partition_write,
2794 .private = FILE_PARTITION_ROOT,
2795 .flags = CFTYPE_NOT_ON_ROOT,
2796 .file_offset = offsetof(struct cpuset, partition_file),
2797 },
2798
2799 {
2800 .name = "cpus.subpartitions",
2801 .seq_show = cpuset_common_seq_show,
2802 .private = FILE_SUBPARTS_CPULIST,
2803 .flags = CFTYPE_DEBUG,
2804 },
2805
2806 { }
2807 };
2808
2809
2810
2811
2812
2813
2814
2815 static struct cgroup_subsys_state *
2816 cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
2817 {
2818 struct cpuset *cs;
2819
2820 if (!parent_css)
2821 return &top_cpuset.css;
2822
2823 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
2824 if (!cs)
2825 return ERR_PTR(-ENOMEM);
2826
2827 if (alloc_cpumasks(cs, NULL)) {
2828 kfree(cs);
2829 return ERR_PTR(-ENOMEM);
2830 }
2831
2832 __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
2833 nodes_clear(cs->mems_allowed);
2834 nodes_clear(cs->effective_mems);
2835 fmeter_init(&cs->fmeter);
2836 cs->relax_domain_level = -1;
2837
2838
2839 if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
2840 __set_bit(CS_MEMORY_MIGRATE, &cs->flags);
2841
2842 return &cs->css;
2843 }
2844
2845 static int cpuset_css_online(struct cgroup_subsys_state *css)
2846 {
2847 struct cpuset *cs = css_cs(css);
2848 struct cpuset *parent = parent_cs(cs);
2849 struct cpuset *tmp_cs;
2850 struct cgroup_subsys_state *pos_css;
2851
2852 if (!parent)
2853 return 0;
2854
2855 cpus_read_lock();
2856 percpu_down_write(&cpuset_rwsem);
2857
2858 set_bit(CS_ONLINE, &cs->flags);
2859 if (is_spread_page(parent))
2860 set_bit(CS_SPREAD_PAGE, &cs->flags);
2861 if (is_spread_slab(parent))
2862 set_bit(CS_SPREAD_SLAB, &cs->flags);
2863
2864 cpuset_inc();
2865
2866 spin_lock_irq(&callback_lock);
2867 if (is_in_v2_mode()) {
2868 cpumask_copy(cs->effective_cpus, parent->effective_cpus);
2869 cs->effective_mems = parent->effective_mems;
2870 cs->use_parent_ecpus = true;
2871 parent->child_ecpus_count++;
2872 }
2873 spin_unlock_irq(&callback_lock);
2874
2875 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
2876 goto out_unlock;
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891 rcu_read_lock();
2892 cpuset_for_each_child(tmp_cs, pos_css, parent) {
2893 if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
2894 rcu_read_unlock();
2895 goto out_unlock;
2896 }
2897 }
2898 rcu_read_unlock();
2899
2900 spin_lock_irq(&callback_lock);
2901 cs->mems_allowed = parent->mems_allowed;
2902 cs->effective_mems = parent->mems_allowed;
2903 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
2904 cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
2905 spin_unlock_irq(&callback_lock);
2906 out_unlock:
2907 percpu_up_write(&cpuset_rwsem);
2908 cpus_read_unlock();
2909 return 0;
2910 }
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923 static void cpuset_css_offline(struct cgroup_subsys_state *css)
2924 {
2925 struct cpuset *cs = css_cs(css);
2926
2927 cpus_read_lock();
2928 percpu_down_write(&cpuset_rwsem);
2929
2930 if (is_partition_root(cs))
2931 update_prstate(cs, 0);
2932
2933 if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
2934 is_sched_load_balance(cs))
2935 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
2936
2937 if (cs->use_parent_ecpus) {
2938 struct cpuset *parent = parent_cs(cs);
2939
2940 cs->use_parent_ecpus = false;
2941 parent->child_ecpus_count--;
2942 }
2943
2944 cpuset_dec();
2945 clear_bit(CS_ONLINE, &cs->flags);
2946
2947 percpu_up_write(&cpuset_rwsem);
2948 cpus_read_unlock();
2949 }
2950
2951 static void cpuset_css_free(struct cgroup_subsys_state *css)
2952 {
2953 struct cpuset *cs = css_cs(css);
2954
2955 free_cpuset(cs);
2956 }
2957
2958 static void cpuset_bind(struct cgroup_subsys_state *root_css)
2959 {
2960 percpu_down_write(&cpuset_rwsem);
2961 spin_lock_irq(&callback_lock);
2962
2963 if (is_in_v2_mode()) {
2964 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
2965 top_cpuset.mems_allowed = node_possible_map;
2966 } else {
2967 cpumask_copy(top_cpuset.cpus_allowed,
2968 top_cpuset.effective_cpus);
2969 top_cpuset.mems_allowed = top_cpuset.effective_mems;
2970 }
2971
2972 spin_unlock_irq(&callback_lock);
2973 percpu_up_write(&cpuset_rwsem);
2974 }
2975
2976
2977
2978
2979
2980
2981 static void cpuset_fork(struct task_struct *task)
2982 {
2983 if (task_css_is_root(task, cpuset_cgrp_id))
2984 return;
2985
2986 set_cpus_allowed_ptr(task, current->cpus_ptr);
2987 task->mems_allowed = current->mems_allowed;
2988 }
2989
2990 struct cgroup_subsys cpuset_cgrp_subsys = {
2991 .css_alloc = cpuset_css_alloc,
2992 .css_online = cpuset_css_online,
2993 .css_offline = cpuset_css_offline,
2994 .css_free = cpuset_css_free,
2995 .can_attach = cpuset_can_attach,
2996 .cancel_attach = cpuset_cancel_attach,
2997 .attach = cpuset_attach,
2998 .post_attach = cpuset_post_attach,
2999 .bind = cpuset_bind,
3000 .fork = cpuset_fork,
3001 .legacy_cftypes = legacy_files,
3002 .dfl_cftypes = dfl_files,
3003 .early_init = true,
3004 .threaded = true,
3005 };
3006
3007
3008
3009
3010
3011
3012
3013 int __init cpuset_init(void)
3014 {
3015 BUG_ON(percpu_init_rwsem(&cpuset_rwsem));
3016
3017 BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
3018 BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
3019 BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
3020
3021 cpumask_setall(top_cpuset.cpus_allowed);
3022 nodes_setall(top_cpuset.mems_allowed);
3023 cpumask_setall(top_cpuset.effective_cpus);
3024 nodes_setall(top_cpuset.effective_mems);
3025
3026 fmeter_init(&top_cpuset.fmeter);
3027 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
3028 top_cpuset.relax_domain_level = -1;
3029
3030 BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
3031
3032 return 0;
3033 }
3034
3035
3036
3037
3038
3039
3040
3041
3042 static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
3043 {
3044 struct cpuset *parent;
3045
3046
3047
3048
3049
3050 parent = parent_cs(cs);
3051 while (cpumask_empty(parent->cpus_allowed) ||
3052 nodes_empty(parent->mems_allowed))
3053 parent = parent_cs(parent);
3054
3055 if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
3056 pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
3057 pr_cont_cgroup_name(cs->css.cgroup);
3058 pr_cont("\n");
3059 }
3060 }
3061
3062 static void
3063 hotplug_update_tasks_legacy(struct cpuset *cs,
3064 struct cpumask *new_cpus, nodemask_t *new_mems,
3065 bool cpus_updated, bool mems_updated)
3066 {
3067 bool is_empty;
3068
3069 spin_lock_irq(&callback_lock);
3070 cpumask_copy(cs->cpus_allowed, new_cpus);
3071 cpumask_copy(cs->effective_cpus, new_cpus);
3072 cs->mems_allowed = *new_mems;
3073 cs->effective_mems = *new_mems;
3074 spin_unlock_irq(&callback_lock);
3075
3076
3077
3078
3079
3080 if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
3081 update_tasks_cpumask(cs);
3082 if (mems_updated && !nodes_empty(cs->mems_allowed))
3083 update_tasks_nodemask(cs);
3084
3085 is_empty = cpumask_empty(cs->cpus_allowed) ||
3086 nodes_empty(cs->mems_allowed);
3087
3088 percpu_up_write(&cpuset_rwsem);
3089
3090
3091
3092
3093
3094
3095 if (is_empty)
3096 remove_tasks_in_empty_cpuset(cs);
3097
3098 percpu_down_write(&cpuset_rwsem);
3099 }
3100
3101 static void
3102 hotplug_update_tasks(struct cpuset *cs,
3103 struct cpumask *new_cpus, nodemask_t *new_mems,
3104 bool cpus_updated, bool mems_updated)
3105 {
3106 if (cpumask_empty(new_cpus))
3107 cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
3108 if (nodes_empty(*new_mems))
3109 *new_mems = parent_cs(cs)->effective_mems;
3110
3111 spin_lock_irq(&callback_lock);
3112 cpumask_copy(cs->effective_cpus, new_cpus);
3113 cs->effective_mems = *new_mems;
3114 spin_unlock_irq(&callback_lock);
3115
3116 if (cpus_updated)
3117 update_tasks_cpumask(cs);
3118 if (mems_updated)
3119 update_tasks_nodemask(cs);
3120 }
3121
3122 static bool force_rebuild;
3123
3124 void cpuset_force_rebuild(void)
3125 {
3126 force_rebuild = true;
3127 }
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138 static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
3139 {
3140 static cpumask_t new_cpus;
3141 static nodemask_t new_mems;
3142 bool cpus_updated;
3143 bool mems_updated;
3144 struct cpuset *parent;
3145 retry:
3146 wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
3147
3148 percpu_down_write(&cpuset_rwsem);
3149
3150
3151
3152
3153
3154 if (cs->attach_in_progress) {
3155 percpu_up_write(&cpuset_rwsem);
3156 goto retry;
3157 }
3158
3159 parent = parent_cs(cs);
3160 compute_effective_cpumask(&new_cpus, cs, parent);
3161 nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
3162
3163 if (cs->nr_subparts_cpus)
3164
3165
3166
3167
3168 cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus);
3169
3170 if (!tmp || !cs->partition_root_state)
3171 goto update_tasks;
3172
3173
3174
3175
3176
3177
3178 if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
3179 (parent->partition_root_state == PRS_ERROR))) {
3180 if (cs->nr_subparts_cpus) {
3181 spin_lock_irq(&callback_lock);
3182 cs->nr_subparts_cpus = 0;
3183 cpumask_clear(cs->subparts_cpus);
3184 spin_unlock_irq(&callback_lock);
3185 compute_effective_cpumask(&new_cpus, cs, parent);
3186 }
3187
3188
3189
3190
3191
3192
3193
3194 if ((parent->partition_root_state == PRS_ERROR) ||
3195 cpumask_empty(&new_cpus)) {
3196 int old_prs;
3197
3198 update_parent_subparts_cpumask(cs, partcmd_disable,
3199 NULL, tmp);
3200 old_prs = cs->partition_root_state;
3201 if (old_prs != PRS_ERROR) {
3202 spin_lock_irq(&callback_lock);
3203 cs->partition_root_state = PRS_ERROR;
3204 spin_unlock_irq(&callback_lock);
3205 notify_partition_change(cs, old_prs, PRS_ERROR);
3206 }
3207 }
3208 cpuset_force_rebuild();
3209 }
3210
3211
3212
3213
3214
3215
3216 if (is_partition_root(parent) &&
3217 ((cs->partition_root_state == PRS_ERROR) ||
3218 !cpumask_intersects(&new_cpus, parent->subparts_cpus)) &&
3219 update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp))
3220 cpuset_force_rebuild();
3221
3222 update_tasks:
3223 cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
3224 mems_updated = !nodes_equal(new_mems, cs->effective_mems);
3225
3226 if (mems_updated)
3227 check_insane_mems_config(&new_mems);
3228
3229 if (is_in_v2_mode())
3230 hotplug_update_tasks(cs, &new_cpus, &new_mems,
3231 cpus_updated, mems_updated);
3232 else
3233 hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
3234 cpus_updated, mems_updated);
3235
3236 percpu_up_write(&cpuset_rwsem);
3237 }
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255 static void cpuset_hotplug_workfn(struct work_struct *work)
3256 {
3257 static cpumask_t new_cpus;
3258 static nodemask_t new_mems;
3259 bool cpus_updated, mems_updated;
3260 bool on_dfl = is_in_v2_mode();
3261 struct tmpmasks tmp, *ptmp = NULL;
3262
3263 if (on_dfl && !alloc_cpumasks(NULL, &tmp))
3264 ptmp = &tmp;
3265
3266 percpu_down_write(&cpuset_rwsem);
3267
3268
3269 cpumask_copy(&new_cpus, cpu_active_mask);
3270 new_mems = node_states[N_MEMORY];
3271
3272
3273
3274
3275
3276
3277 cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
3278 mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
3279
3280
3281
3282
3283
3284 if (!cpus_updated && top_cpuset.nr_subparts_cpus)
3285 cpus_updated = true;
3286
3287
3288 if (cpus_updated) {
3289 spin_lock_irq(&callback_lock);
3290 if (!on_dfl)
3291 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
3292
3293
3294
3295
3296
3297
3298 if (top_cpuset.nr_subparts_cpus) {
3299 if (cpumask_subset(&new_cpus,
3300 top_cpuset.subparts_cpus)) {
3301 top_cpuset.nr_subparts_cpus = 0;
3302 cpumask_clear(top_cpuset.subparts_cpus);
3303 } else {
3304 cpumask_andnot(&new_cpus, &new_cpus,
3305 top_cpuset.subparts_cpus);
3306 }
3307 }
3308 cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
3309 spin_unlock_irq(&callback_lock);
3310
3311 }
3312
3313
3314 if (mems_updated) {
3315 spin_lock_irq(&callback_lock);
3316 if (!on_dfl)
3317 top_cpuset.mems_allowed = new_mems;
3318 top_cpuset.effective_mems = new_mems;
3319 spin_unlock_irq(&callback_lock);
3320 update_tasks_nodemask(&top_cpuset);
3321 }
3322
3323 percpu_up_write(&cpuset_rwsem);
3324
3325
3326 if (cpus_updated || mems_updated) {
3327 struct cpuset *cs;
3328 struct cgroup_subsys_state *pos_css;
3329
3330 rcu_read_lock();
3331 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
3332 if (cs == &top_cpuset || !css_tryget_online(&cs->css))
3333 continue;
3334 rcu_read_unlock();
3335
3336 cpuset_hotplug_update_tasks(cs, ptmp);
3337
3338 rcu_read_lock();
3339 css_put(&cs->css);
3340 }
3341 rcu_read_unlock();
3342 }
3343
3344
3345 if (cpus_updated || force_rebuild) {
3346 force_rebuild = false;
3347 rebuild_sched_domains();
3348 }
3349
3350 free_cpumasks(NULL, ptmp);
3351 }
3352
3353 void cpuset_update_active_cpus(void)
3354 {
3355
3356
3357
3358
3359
3360 schedule_work(&cpuset_hotplug_work);
3361 }
3362
3363 void cpuset_wait_for_hotplug(void)
3364 {
3365 flush_work(&cpuset_hotplug_work);
3366 }
3367
3368
3369
3370
3371
3372
3373 static int cpuset_track_online_nodes(struct notifier_block *self,
3374 unsigned long action, void *arg)
3375 {
3376 schedule_work(&cpuset_hotplug_work);
3377 return NOTIFY_OK;
3378 }
3379
3380 static struct notifier_block cpuset_track_online_nodes_nb = {
3381 .notifier_call = cpuset_track_online_nodes,
3382 .priority = 10,
3383 };
3384
3385
3386
3387
3388
3389
3390 void __init cpuset_init_smp(void)
3391 {
3392
3393
3394
3395
3396
3397 top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
3398
3399 cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
3400 top_cpuset.effective_mems = node_states[N_MEMORY];
3401
3402 register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
3403
3404 cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
3405 BUG_ON(!cpuset_migrate_mm_wq);
3406 }
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419 void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
3420 {
3421 unsigned long flags;
3422
3423 spin_lock_irqsave(&callback_lock, flags);
3424 guarantee_online_cpus(tsk, pmask);
3425 spin_unlock_irqrestore(&callback_lock, flags);
3426 }
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442 bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
3443 {
3444 const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
3445 const struct cpumask *cs_mask;
3446 bool changed = false;
3447
3448 rcu_read_lock();
3449 cs_mask = task_cs(tsk)->cpus_allowed;
3450 if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
3451 do_set_cpus_allowed(tsk, cs_mask);
3452 changed = true;
3453 }
3454 rcu_read_unlock();
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473 return changed;
3474 }
3475
3476 void __init cpuset_init_current_mems_allowed(void)
3477 {
3478 nodes_setall(current->mems_allowed);
3479 }
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491 nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
3492 {
3493 nodemask_t mask;
3494 unsigned long flags;
3495
3496 spin_lock_irqsave(&callback_lock, flags);
3497 rcu_read_lock();
3498 guarantee_online_mems(task_cs(tsk), &mask);
3499 rcu_read_unlock();
3500 spin_unlock_irqrestore(&callback_lock, flags);
3501
3502 return mask;
3503 }
3504
3505
3506
3507
3508
3509
3510
3511 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
3512 {
3513 return nodes_intersects(*nodemask, current->mems_allowed);
3514 }
3515
3516
3517
3518
3519
3520
3521
3522 static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
3523 {
3524 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
3525 cs = parent_cs(cs);
3526 return cs;
3527 }
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569 bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
3570 {
3571 struct cpuset *cs;
3572 bool allowed;
3573 unsigned long flags;
3574
3575 if (in_interrupt())
3576 return true;
3577 if (node_isset(node, current->mems_allowed))
3578 return true;
3579
3580
3581
3582
3583 if (unlikely(tsk_is_oom_victim(current)))
3584 return true;
3585 if (gfp_mask & __GFP_HARDWALL)
3586 return false;
3587
3588 if (current->flags & PF_EXITING)
3589 return true;
3590
3591
3592 spin_lock_irqsave(&callback_lock, flags);
3593
3594 rcu_read_lock();
3595 cs = nearest_hardwall_ancestor(task_cs(current));
3596 allowed = node_isset(node, cs->mems_allowed);
3597 rcu_read_unlock();
3598
3599 spin_unlock_irqrestore(&callback_lock, flags);
3600 return allowed;
3601 }
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630 static int cpuset_spread_node(int *rotor)
3631 {
3632 return *rotor = next_node_in(*rotor, current->mems_allowed);
3633 }
3634
3635 int cpuset_mem_spread_node(void)
3636 {
3637 if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
3638 current->cpuset_mem_spread_rotor =
3639 node_random(¤t->mems_allowed);
3640
3641 return cpuset_spread_node(¤t->cpuset_mem_spread_rotor);
3642 }
3643
3644 int cpuset_slab_spread_node(void)
3645 {
3646 if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
3647 current->cpuset_slab_spread_rotor =
3648 node_random(¤t->mems_allowed);
3649
3650 return cpuset_spread_node(¤t->cpuset_slab_spread_rotor);
3651 }
3652
3653 EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666 int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
3667 const struct task_struct *tsk2)
3668 {
3669 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
3670 }
3671
3672
3673
3674
3675
3676
3677
3678 void cpuset_print_current_mems_allowed(void)
3679 {
3680 struct cgroup *cgrp;
3681
3682 rcu_read_lock();
3683
3684 cgrp = task_cs(current)->css.cgroup;
3685 pr_cont(",cpuset=");
3686 pr_cont_cgroup_name(cgrp);
3687 pr_cont(",mems_allowed=%*pbl",
3688 nodemask_pr_args(¤t->mems_allowed));
3689
3690 rcu_read_unlock();
3691 }
3692
3693
3694
3695
3696
3697
3698
3699 int cpuset_memory_pressure_enabled __read_mostly;
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719 void __cpuset_memory_pressure_bump(void)
3720 {
3721 rcu_read_lock();
3722 fmeter_markevent(&task_cs(current)->fmeter);
3723 rcu_read_unlock();
3724 }
3725
3726 #ifdef CONFIG_PROC_PID_CPUSET
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736 int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
3737 struct pid *pid, struct task_struct *tsk)
3738 {
3739 char *buf;
3740 struct cgroup_subsys_state *css;
3741 int retval;
3742
3743 retval = -ENOMEM;
3744 buf = kmalloc(PATH_MAX, GFP_KERNEL);
3745 if (!buf)
3746 goto out;
3747
3748 css = task_get_css(tsk, cpuset_cgrp_id);
3749 retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
3750 current->nsproxy->cgroup_ns);
3751 css_put(css);
3752 if (retval >= PATH_MAX)
3753 retval = -ENAMETOOLONG;
3754 if (retval < 0)
3755 goto out_free;
3756 seq_puts(m, buf);
3757 seq_putc(m, '\n');
3758 retval = 0;
3759 out_free:
3760 kfree(buf);
3761 out:
3762 return retval;
3763 }
3764 #endif
3765
3766
3767 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
3768 {
3769 seq_printf(m, "Mems_allowed:\t%*pb\n",
3770 nodemask_pr_args(&task->mems_allowed));
3771 seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
3772 nodemask_pr_args(&task->mems_allowed));
3773 }