0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0030
0031 #include "cgroup-internal.h"
0032
0033 #include <linux/bpf-cgroup.h>
0034 #include <linux/cred.h>
0035 #include <linux/errno.h>
0036 #include <linux/init_task.h>
0037 #include <linux/kernel.h>
0038 #include <linux/magic.h>
0039 #include <linux/mutex.h>
0040 #include <linux/mount.h>
0041 #include <linux/pagemap.h>
0042 #include <linux/proc_fs.h>
0043 #include <linux/rcupdate.h>
0044 #include <linux/sched.h>
0045 #include <linux/sched/task.h>
0046 #include <linux/slab.h>
0047 #include <linux/spinlock.h>
0048 #include <linux/percpu-rwsem.h>
0049 #include <linux/string.h>
0050 #include <linux/hashtable.h>
0051 #include <linux/idr.h>
0052 #include <linux/kthread.h>
0053 #include <linux/atomic.h>
0054 #include <linux/cpuset.h>
0055 #include <linux/proc_ns.h>
0056 #include <linux/nsproxy.h>
0057 #include <linux/file.h>
0058 #include <linux/fs_parser.h>
0059 #include <linux/sched/cputime.h>
0060 #include <linux/psi.h>
0061 #include <net/sock.h>
0062
0063 #define CREATE_TRACE_POINTS
0064 #include <trace/events/cgroup.h>
0065
0066 #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
0067 MAX_CFTYPE_NAME + 2)
0068
0069 #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
0070
0071
0072
0073
0074
0075
0076
0077 #define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 DEFINE_MUTEX(cgroup_mutex);
0090 DEFINE_SPINLOCK(css_set_lock);
0091
0092 #ifdef CONFIG_PROVE_RCU
0093 EXPORT_SYMBOL_GPL(cgroup_mutex);
0094 EXPORT_SYMBOL_GPL(css_set_lock);
0095 #endif
0096
0097 DEFINE_SPINLOCK(trace_cgroup_path_lock);
0098 char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
0099 static bool cgroup_debug __read_mostly;
0100
0101
0102
0103
0104
0105 static DEFINE_SPINLOCK(cgroup_idr_lock);
0106
0107
0108
0109
0110
0111 static DEFINE_SPINLOCK(cgroup_file_kn_lock);
0112
0113 DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
0114
0115 #define cgroup_assert_mutex_or_rcu_locked() \
0116 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
0117 !lockdep_is_held(&cgroup_mutex), \
0118 "cgroup_mutex or RCU read lock required");
0119
0120
0121
0122
0123
0124
0125
0126 static struct workqueue_struct *cgroup_destroy_wq;
0127
0128
0129 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
0130 struct cgroup_subsys *cgroup_subsys[] = {
0131 #include <linux/cgroup_subsys.h>
0132 };
0133 #undef SUBSYS
0134
0135
0136 #define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
0137 static const char *cgroup_subsys_name[] = {
0138 #include <linux/cgroup_subsys.h>
0139 };
0140 #undef SUBSYS
0141
0142
0143 #define SUBSYS(_x) \
0144 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
0145 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
0146 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
0147 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
0148 #include <linux/cgroup_subsys.h>
0149 #undef SUBSYS
0150
0151 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
0152 static struct static_key_true *cgroup_subsys_enabled_key[] = {
0153 #include <linux/cgroup_subsys.h>
0154 };
0155 #undef SUBSYS
0156
0157 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
0158 static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
0159 #include <linux/cgroup_subsys.h>
0160 };
0161 #undef SUBSYS
0162
0163 static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
0164
0165
0166 struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
0167 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
0168
0169
0170
0171
0172
0173 static bool cgrp_dfl_visible;
0174
0175
0176 static u16 cgrp_dfl_inhibit_ss_mask;
0177
0178
0179 static u16 cgrp_dfl_implicit_ss_mask;
0180
0181
0182 static u16 cgrp_dfl_threaded_ss_mask;
0183
0184
0185 LIST_HEAD(cgroup_roots);
0186 static int cgroup_root_count;
0187
0188
0189 static DEFINE_IDR(cgroup_hierarchy_idr);
0190
0191
0192
0193
0194
0195
0196
0197
0198 static u64 css_serial_nr_next = 1;
0199
0200
0201
0202
0203
0204 static u16 have_fork_callback __read_mostly;
0205 static u16 have_exit_callback __read_mostly;
0206 static u16 have_release_callback __read_mostly;
0207 static u16 have_canfork_callback __read_mostly;
0208
0209
0210 struct cgroup_namespace init_cgroup_ns = {
0211 .ns.count = REFCOUNT_INIT(2),
0212 .user_ns = &init_user_ns,
0213 .ns.ops = &cgroupns_operations,
0214 .ns.inum = PROC_CGROUP_INIT_INO,
0215 .root_cset = &init_css_set,
0216 };
0217
0218 static struct file_system_type cgroup2_fs_type;
0219 static struct cftype cgroup_base_files[];
0220
0221
0222 enum cgroup_opt_features {
0223 #ifdef CONFIG_PSI
0224 OPT_FEATURE_PRESSURE,
0225 #endif
0226 OPT_FEATURE_COUNT
0227 };
0228
0229 static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
0230 #ifdef CONFIG_PSI
0231 "pressure",
0232 #endif
0233 };
0234
0235 static u16 cgroup_feature_disable_mask __read_mostly;
0236
0237 static int cgroup_apply_control(struct cgroup *cgrp);
0238 static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
0239 static void css_task_iter_skip(struct css_task_iter *it,
0240 struct task_struct *task);
0241 static int cgroup_destroy_locked(struct cgroup *cgrp);
0242 static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
0243 struct cgroup_subsys *ss);
0244 static void css_release(struct percpu_ref *ref);
0245 static void kill_css(struct cgroup_subsys_state *css);
0246 static int cgroup_addrm_files(struct cgroup_subsys_state *css,
0247 struct cgroup *cgrp, struct cftype cfts[],
0248 bool is_add);
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258 bool cgroup_ssid_enabled(int ssid)
0259 {
0260 if (!CGROUP_HAS_SUBSYS_CONFIG)
0261 return false;
0262
0263 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
0264 }
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311 bool cgroup_on_dfl(const struct cgroup *cgrp)
0312 {
0313 return cgrp->root == &cgrp_dfl_root;
0314 }
0315
0316
0317 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
0318 gfp_t gfp_mask)
0319 {
0320 int ret;
0321
0322 idr_preload(gfp_mask);
0323 spin_lock_bh(&cgroup_idr_lock);
0324 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
0325 spin_unlock_bh(&cgroup_idr_lock);
0326 idr_preload_end();
0327 return ret;
0328 }
0329
0330 static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
0331 {
0332 void *ret;
0333
0334 spin_lock_bh(&cgroup_idr_lock);
0335 ret = idr_replace(idr, ptr, id);
0336 spin_unlock_bh(&cgroup_idr_lock);
0337 return ret;
0338 }
0339
0340 static void cgroup_idr_remove(struct idr *idr, int id)
0341 {
0342 spin_lock_bh(&cgroup_idr_lock);
0343 idr_remove(idr, id);
0344 spin_unlock_bh(&cgroup_idr_lock);
0345 }
0346
0347 static bool cgroup_has_tasks(struct cgroup *cgrp)
0348 {
0349 return cgrp->nr_populated_csets;
0350 }
0351
0352 bool cgroup_is_threaded(struct cgroup *cgrp)
0353 {
0354 return cgrp->dom_cgrp != cgrp;
0355 }
0356
0357
0358 static bool cgroup_is_mixable(struct cgroup *cgrp)
0359 {
0360
0361
0362
0363
0364
0365 return !cgroup_parent(cgrp);
0366 }
0367
0368
0369 static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
0370 {
0371
0372 if (cgroup_is_mixable(cgrp))
0373 return true;
0374
0375
0376 if (cgroup_is_threaded(cgrp))
0377 return false;
0378
0379
0380 if (cgrp->nr_populated_domain_children)
0381 return false;
0382
0383
0384 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
0385 return false;
0386
0387 return true;
0388 }
0389
0390
0391 bool cgroup_is_thread_root(struct cgroup *cgrp)
0392 {
0393
0394 if (cgroup_is_threaded(cgrp))
0395 return false;
0396
0397
0398 if (cgrp->nr_threaded_children)
0399 return true;
0400
0401
0402
0403
0404
0405 if (cgroup_has_tasks(cgrp) &&
0406 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
0407 return true;
0408
0409 return false;
0410 }
0411
0412
0413 static bool cgroup_is_valid_domain(struct cgroup *cgrp)
0414 {
0415
0416 if (cgroup_is_threaded(cgrp))
0417 return false;
0418
0419
0420 while ((cgrp = cgroup_parent(cgrp))) {
0421 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
0422 return false;
0423 if (cgroup_is_threaded(cgrp))
0424 return false;
0425 }
0426
0427 return true;
0428 }
0429
0430
0431 static u16 cgroup_control(struct cgroup *cgrp)
0432 {
0433 struct cgroup *parent = cgroup_parent(cgrp);
0434 u16 root_ss_mask = cgrp->root->subsys_mask;
0435
0436 if (parent) {
0437 u16 ss_mask = parent->subtree_control;
0438
0439
0440 if (cgroup_is_threaded(cgrp))
0441 ss_mask &= cgrp_dfl_threaded_ss_mask;
0442 return ss_mask;
0443 }
0444
0445 if (cgroup_on_dfl(cgrp))
0446 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
0447 cgrp_dfl_implicit_ss_mask);
0448 return root_ss_mask;
0449 }
0450
0451
0452 static u16 cgroup_ss_mask(struct cgroup *cgrp)
0453 {
0454 struct cgroup *parent = cgroup_parent(cgrp);
0455
0456 if (parent) {
0457 u16 ss_mask = parent->subtree_ss_mask;
0458
0459
0460 if (cgroup_is_threaded(cgrp))
0461 ss_mask &= cgrp_dfl_threaded_ss_mask;
0462 return ss_mask;
0463 }
0464
0465 return cgrp->root->subsys_mask;
0466 }
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479 static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
0480 struct cgroup_subsys *ss)
0481 {
0482 if (CGROUP_HAS_SUBSYS_CONFIG && ss)
0483 return rcu_dereference_check(cgrp->subsys[ss->id],
0484 lockdep_is_held(&cgroup_mutex));
0485 else
0486 return &cgrp->self;
0487 }
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497 static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
0498 struct cgroup_subsys *ss)
0499 {
0500 struct cgroup_subsys_state *css;
0501
0502 rcu_read_lock();
0503 css = cgroup_css(cgrp, ss);
0504 if (css && !css_tryget_online(css))
0505 css = NULL;
0506 rcu_read_unlock();
0507
0508 return css;
0509 }
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521 static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
0522 struct cgroup_subsys *ss)
0523 {
0524 lockdep_assert_held(&cgroup_mutex);
0525
0526 if (!ss)
0527 return &cgrp->self;
0528
0529
0530
0531
0532
0533 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
0534 cgrp = cgroup_parent(cgrp);
0535 if (!cgrp)
0536 return NULL;
0537 }
0538
0539 return cgroup_css(cgrp, ss);
0540 }
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
0556 struct cgroup_subsys *ss)
0557 {
0558 struct cgroup_subsys_state *css;
0559
0560 if (!CGROUP_HAS_SUBSYS_CONFIG)
0561 return NULL;
0562
0563 do {
0564 css = cgroup_css(cgrp, ss);
0565
0566 if (css)
0567 return css;
0568 cgrp = cgroup_parent(cgrp);
0569 } while (cgrp);
0570
0571 return init_css_set.subsys[ss->id];
0572 }
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
0586 struct cgroup_subsys *ss)
0587 {
0588 struct cgroup_subsys_state *css;
0589
0590 if (!CGROUP_HAS_SUBSYS_CONFIG)
0591 return NULL;
0592
0593 rcu_read_lock();
0594
0595 do {
0596 css = cgroup_css(cgrp, ss);
0597
0598 if (css && css_tryget_online(css))
0599 goto out_unlock;
0600 cgrp = cgroup_parent(cgrp);
0601 } while (cgrp);
0602
0603 css = init_css_set.subsys[ss->id];
0604 css_get(css);
0605 out_unlock:
0606 rcu_read_unlock();
0607 return css;
0608 }
0609 EXPORT_SYMBOL_GPL(cgroup_get_e_css);
0610
0611 static void cgroup_get_live(struct cgroup *cgrp)
0612 {
0613 WARN_ON_ONCE(cgroup_is_dead(cgrp));
0614 css_get(&cgrp->self);
0615 }
0616
0617
0618
0619
0620
0621
0622 int __cgroup_task_count(const struct cgroup *cgrp)
0623 {
0624 int count = 0;
0625 struct cgrp_cset_link *link;
0626
0627 lockdep_assert_held(&css_set_lock);
0628
0629 list_for_each_entry(link, &cgrp->cset_links, cset_link)
0630 count += link->cset->nr_tasks;
0631
0632 return count;
0633 }
0634
0635
0636
0637
0638
0639 int cgroup_task_count(const struct cgroup *cgrp)
0640 {
0641 int count;
0642
0643 spin_lock_irq(&css_set_lock);
0644 count = __cgroup_task_count(cgrp);
0645 spin_unlock_irq(&css_set_lock);
0646
0647 return count;
0648 }
0649
0650 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
0651 {
0652 struct cgroup *cgrp = of->kn->parent->priv;
0653 struct cftype *cft = of_cft(of);
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663 if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss)
0664 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
0665 else
0666 return &cgrp->self;
0667 }
0668 EXPORT_SYMBOL_GPL(of_css);
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678 #define for_each_css(css, ssid, cgrp) \
0679 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
0680 if (!((css) = rcu_dereference_check( \
0681 (cgrp)->subsys[(ssid)], \
0682 lockdep_is_held(&cgroup_mutex)))) { } \
0683 else
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693 #define for_each_e_css(css, ssid, cgrp) \
0694 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
0695 if (!((css) = cgroup_e_css_by_mask(cgrp, \
0696 cgroup_subsys[(ssid)]))) \
0697 ; \
0698 else
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709 #define do_each_subsys_mask(ss, ssid, ss_mask) do { \
0710 unsigned long __ss_mask = (ss_mask); \
0711 if (!CGROUP_HAS_SUBSYS_CONFIG) { \
0712 (ssid) = 0; \
0713 break; \
0714 } \
0715 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
0716 (ss) = cgroup_subsys[ssid]; \
0717 {
0718
0719 #define while_each_subsys_mask() \
0720 } \
0721 } \
0722 } while (false)
0723
0724
0725 #define cgroup_for_each_live_child(child, cgrp) \
0726 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
0727 if (({ lockdep_assert_held(&cgroup_mutex); \
0728 cgroup_is_dead(child); })) \
0729 ; \
0730 else
0731
0732
0733 #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
0734 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
0735 if (({ lockdep_assert_held(&cgroup_mutex); \
0736 (dsct) = (d_css)->cgroup; \
0737 cgroup_is_dead(dsct); })) \
0738 ; \
0739 else
0740
0741
0742 #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
0743 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
0744 if (({ lockdep_assert_held(&cgroup_mutex); \
0745 (dsct) = (d_css)->cgroup; \
0746 cgroup_is_dead(dsct); })) \
0747 ; \
0748 else
0749
0750
0751
0752
0753
0754
0755
0756
0757 struct css_set init_css_set = {
0758 .refcount = REFCOUNT_INIT(1),
0759 .dom_cset = &init_css_set,
0760 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
0761 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
0762 .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
0763 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
0764 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
0765 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
0766 .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node),
0767 .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node),
0768 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
0769
0770
0771
0772
0773
0774
0775
0776 .dfl_cgrp = &cgrp_dfl_root.cgrp,
0777 };
0778
0779 static int css_set_count = 1;
0780
0781 static bool css_set_threaded(struct css_set *cset)
0782 {
0783 return cset->dom_cset != cset;
0784 }
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795 static bool css_set_populated(struct css_set *cset)
0796 {
0797 lockdep_assert_held(&css_set_lock);
0798
0799 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
0800 }
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819 static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
0820 {
0821 struct cgroup *child = NULL;
0822 int adj = populated ? 1 : -1;
0823
0824 lockdep_assert_held(&css_set_lock);
0825
0826 do {
0827 bool was_populated = cgroup_is_populated(cgrp);
0828
0829 if (!child) {
0830 cgrp->nr_populated_csets += adj;
0831 } else {
0832 if (cgroup_is_threaded(child))
0833 cgrp->nr_populated_threaded_children += adj;
0834 else
0835 cgrp->nr_populated_domain_children += adj;
0836 }
0837
0838 if (was_populated == cgroup_is_populated(cgrp))
0839 break;
0840
0841 cgroup1_check_for_release(cgrp);
0842 TRACE_CGROUP_PATH(notify_populated, cgrp,
0843 cgroup_is_populated(cgrp));
0844 cgroup_file_notify(&cgrp->events_file);
0845
0846 child = cgrp;
0847 cgrp = cgroup_parent(cgrp);
0848 } while (cgrp);
0849 }
0850
0851
0852
0853
0854
0855
0856
0857
0858
0859 static void css_set_update_populated(struct css_set *cset, bool populated)
0860 {
0861 struct cgrp_cset_link *link;
0862
0863 lockdep_assert_held(&css_set_lock);
0864
0865 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
0866 cgroup_update_populated(link->cgrp, populated);
0867 }
0868
0869
0870
0871
0872
0873
0874
0875 static void css_set_skip_task_iters(struct css_set *cset,
0876 struct task_struct *task)
0877 {
0878 struct css_task_iter *it, *pos;
0879
0880 list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
0881 css_task_iter_skip(it, task);
0882 }
0883
0884
0885
0886
0887
0888
0889
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899 static void css_set_move_task(struct task_struct *task,
0900 struct css_set *from_cset, struct css_set *to_cset,
0901 bool use_mg_tasks)
0902 {
0903 lockdep_assert_held(&css_set_lock);
0904
0905 if (to_cset && !css_set_populated(to_cset))
0906 css_set_update_populated(to_cset, true);
0907
0908 if (from_cset) {
0909 WARN_ON_ONCE(list_empty(&task->cg_list));
0910
0911 css_set_skip_task_iters(from_cset, task);
0912 list_del_init(&task->cg_list);
0913 if (!css_set_populated(from_cset))
0914 css_set_update_populated(from_cset, false);
0915 } else {
0916 WARN_ON_ONCE(!list_empty(&task->cg_list));
0917 }
0918
0919 if (to_cset) {
0920
0921
0922
0923
0924
0925 WARN_ON_ONCE(task->flags & PF_EXITING);
0926
0927 cgroup_move_task(task, to_cset);
0928 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
0929 &to_cset->tasks);
0930 }
0931 }
0932
0933
0934
0935
0936
0937
0938 #define CSS_SET_HASH_BITS 7
0939 static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
0940
0941 static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
0942 {
0943 unsigned long key = 0UL;
0944 struct cgroup_subsys *ss;
0945 int i;
0946
0947 for_each_subsys(ss, i)
0948 key += (unsigned long)css[i];
0949 key = (key >> 16) ^ key;
0950
0951 return key;
0952 }
0953
0954 void put_css_set_locked(struct css_set *cset)
0955 {
0956 struct cgrp_cset_link *link, *tmp_link;
0957 struct cgroup_subsys *ss;
0958 int ssid;
0959
0960 lockdep_assert_held(&css_set_lock);
0961
0962 if (!refcount_dec_and_test(&cset->refcount))
0963 return;
0964
0965 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
0966
0967
0968 for_each_subsys(ss, ssid) {
0969 list_del(&cset->e_cset_node[ssid]);
0970 css_put(cset->subsys[ssid]);
0971 }
0972 hash_del(&cset->hlist);
0973 css_set_count--;
0974
0975 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
0976 list_del(&link->cset_link);
0977 list_del(&link->cgrp_link);
0978 if (cgroup_parent(link->cgrp))
0979 cgroup_put(link->cgrp);
0980 kfree(link);
0981 }
0982
0983 if (css_set_threaded(cset)) {
0984 list_del(&cset->threaded_csets_node);
0985 put_css_set_locked(cset->dom_cset);
0986 }
0987
0988 kfree_rcu(cset, rcu_head);
0989 }
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000
1001 static bool compare_css_sets(struct css_set *cset,
1002 struct css_set *old_cset,
1003 struct cgroup *new_cgrp,
1004 struct cgroup_subsys_state *template[])
1005 {
1006 struct cgroup *new_dfl_cgrp;
1007 struct list_head *l1, *l2;
1008
1009
1010
1011
1012
1013
1014 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
1015 return false;
1016
1017
1018
1019 if (cgroup_on_dfl(new_cgrp))
1020 new_dfl_cgrp = new_cgrp;
1021 else
1022 new_dfl_cgrp = old_cset->dfl_cgrp;
1023
1024 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1025 return false;
1026
1027
1028
1029
1030
1031
1032
1033 l1 = &cset->cgrp_links;
1034 l2 = &old_cset->cgrp_links;
1035 while (1) {
1036 struct cgrp_cset_link *link1, *link2;
1037 struct cgroup *cgrp1, *cgrp2;
1038
1039 l1 = l1->next;
1040 l2 = l2->next;
1041
1042 if (l1 == &cset->cgrp_links) {
1043 BUG_ON(l2 != &old_cset->cgrp_links);
1044 break;
1045 } else {
1046 BUG_ON(l2 == &old_cset->cgrp_links);
1047 }
1048
1049 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1050 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1051 cgrp1 = link1->cgrp;
1052 cgrp2 = link2->cgrp;
1053
1054 BUG_ON(cgrp1->root != cgrp2->root);
1055
1056
1057
1058
1059
1060
1061
1062
1063 if (cgrp1->root == new_cgrp->root) {
1064 if (cgrp1 != new_cgrp)
1065 return false;
1066 } else {
1067 if (cgrp1 != cgrp2)
1068 return false;
1069 }
1070 }
1071 return true;
1072 }
1073
1074
1075
1076
1077
1078
1079
1080 static struct css_set *find_existing_css_set(struct css_set *old_cset,
1081 struct cgroup *cgrp,
1082 struct cgroup_subsys_state *template[])
1083 {
1084 struct cgroup_root *root = cgrp->root;
1085 struct cgroup_subsys *ss;
1086 struct css_set *cset;
1087 unsigned long key;
1088 int i;
1089
1090
1091
1092
1093
1094
1095 for_each_subsys(ss, i) {
1096 if (root->subsys_mask & (1UL << i)) {
1097
1098
1099
1100
1101 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1102 } else {
1103
1104
1105
1106
1107 template[i] = old_cset->subsys[i];
1108 }
1109 }
1110
1111 key = css_set_hash(template);
1112 hash_for_each_possible(css_set_table, cset, hlist, key) {
1113 if (!compare_css_sets(cset, old_cset, cgrp, template))
1114 continue;
1115
1116
1117 return cset;
1118 }
1119
1120
1121 return NULL;
1122 }
1123
1124 static void free_cgrp_cset_links(struct list_head *links_to_free)
1125 {
1126 struct cgrp_cset_link *link, *tmp_link;
1127
1128 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1129 list_del(&link->cset_link);
1130 kfree(link);
1131 }
1132 }
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142 static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1143 {
1144 struct cgrp_cset_link *link;
1145 int i;
1146
1147 INIT_LIST_HEAD(tmp_links);
1148
1149 for (i = 0; i < count; i++) {
1150 link = kzalloc(sizeof(*link), GFP_KERNEL);
1151 if (!link) {
1152 free_cgrp_cset_links(tmp_links);
1153 return -ENOMEM;
1154 }
1155 list_add(&link->cset_link, tmp_links);
1156 }
1157 return 0;
1158 }
1159
1160
1161
1162
1163
1164
1165
1166 static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1167 struct cgroup *cgrp)
1168 {
1169 struct cgrp_cset_link *link;
1170
1171 BUG_ON(list_empty(tmp_links));
1172
1173 if (cgroup_on_dfl(cgrp))
1174 cset->dfl_cgrp = cgrp;
1175
1176 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1177 link->cset = cset;
1178 link->cgrp = cgrp;
1179
1180
1181
1182
1183
1184 list_move_tail(&link->cset_link, &cgrp->cset_links);
1185 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1186
1187 if (cgroup_parent(cgrp))
1188 cgroup_get_live(cgrp);
1189 }
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199 static struct css_set *find_css_set(struct css_set *old_cset,
1200 struct cgroup *cgrp)
1201 {
1202 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1203 struct css_set *cset;
1204 struct list_head tmp_links;
1205 struct cgrp_cset_link *link;
1206 struct cgroup_subsys *ss;
1207 unsigned long key;
1208 int ssid;
1209
1210 lockdep_assert_held(&cgroup_mutex);
1211
1212
1213
1214 spin_lock_irq(&css_set_lock);
1215 cset = find_existing_css_set(old_cset, cgrp, template);
1216 if (cset)
1217 get_css_set(cset);
1218 spin_unlock_irq(&css_set_lock);
1219
1220 if (cset)
1221 return cset;
1222
1223 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1224 if (!cset)
1225 return NULL;
1226
1227
1228 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1229 kfree(cset);
1230 return NULL;
1231 }
1232
1233 refcount_set(&cset->refcount, 1);
1234 cset->dom_cset = cset;
1235 INIT_LIST_HEAD(&cset->tasks);
1236 INIT_LIST_HEAD(&cset->mg_tasks);
1237 INIT_LIST_HEAD(&cset->dying_tasks);
1238 INIT_LIST_HEAD(&cset->task_iters);
1239 INIT_LIST_HEAD(&cset->threaded_csets);
1240 INIT_HLIST_NODE(&cset->hlist);
1241 INIT_LIST_HEAD(&cset->cgrp_links);
1242 INIT_LIST_HEAD(&cset->mg_src_preload_node);
1243 INIT_LIST_HEAD(&cset->mg_dst_preload_node);
1244 INIT_LIST_HEAD(&cset->mg_node);
1245
1246
1247
1248 memcpy(cset->subsys, template, sizeof(cset->subsys));
1249
1250 spin_lock_irq(&css_set_lock);
1251
1252 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1253 struct cgroup *c = link->cgrp;
1254
1255 if (c->root == cgrp->root)
1256 c = cgrp;
1257 link_css_set(&tmp_links, cset, c);
1258 }
1259
1260 BUG_ON(!list_empty(&tmp_links));
1261
1262 css_set_count++;
1263
1264
1265 key = css_set_hash(cset->subsys);
1266 hash_add(css_set_table, &cset->hlist, key);
1267
1268 for_each_subsys(ss, ssid) {
1269 struct cgroup_subsys_state *css = cset->subsys[ssid];
1270
1271 list_add_tail(&cset->e_cset_node[ssid],
1272 &css->cgroup->e_csets[ssid]);
1273 css_get(css);
1274 }
1275
1276 spin_unlock_irq(&css_set_lock);
1277
1278
1279
1280
1281
1282
1283
1284 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1285 struct css_set *dcset;
1286
1287 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1288 if (!dcset) {
1289 put_css_set(cset);
1290 return NULL;
1291 }
1292
1293 spin_lock_irq(&css_set_lock);
1294 cset->dom_cset = dcset;
1295 list_add_tail(&cset->threaded_csets_node,
1296 &dcset->threaded_csets);
1297 spin_unlock_irq(&css_set_lock);
1298 }
1299
1300 return cset;
1301 }
1302
1303 struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1304 {
1305 struct cgroup *root_cgrp = kernfs_root_to_node(kf_root)->priv;
1306
1307 return root_cgrp->root;
1308 }
1309
1310 void cgroup_favor_dynmods(struct cgroup_root *root, bool favor)
1311 {
1312 bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS;
1313
1314
1315 if (favor && !favoring) {
1316 rcu_sync_enter(&cgroup_threadgroup_rwsem.rss);
1317 root->flags |= CGRP_ROOT_FAVOR_DYNMODS;
1318 } else if (!favor && favoring) {
1319 rcu_sync_exit(&cgroup_threadgroup_rwsem.rss);
1320 root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
1321 }
1322 }
1323
1324 static int cgroup_init_root_id(struct cgroup_root *root)
1325 {
1326 int id;
1327
1328 lockdep_assert_held(&cgroup_mutex);
1329
1330 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1331 if (id < 0)
1332 return id;
1333
1334 root->hierarchy_id = id;
1335 return 0;
1336 }
1337
1338 static void cgroup_exit_root_id(struct cgroup_root *root)
1339 {
1340 lockdep_assert_held(&cgroup_mutex);
1341
1342 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1343 }
1344
1345 void cgroup_free_root(struct cgroup_root *root)
1346 {
1347 kfree(root);
1348 }
1349
1350 static void cgroup_destroy_root(struct cgroup_root *root)
1351 {
1352 struct cgroup *cgrp = &root->cgrp;
1353 struct cgrp_cset_link *link, *tmp_link;
1354
1355 trace_cgroup_destroy_root(root);
1356
1357 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1358
1359 BUG_ON(atomic_read(&root->nr_cgrps));
1360 BUG_ON(!list_empty(&cgrp->self.children));
1361
1362
1363 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1364
1365
1366
1367
1368
1369 spin_lock_irq(&css_set_lock);
1370
1371 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1372 list_del(&link->cset_link);
1373 list_del(&link->cgrp_link);
1374 kfree(link);
1375 }
1376
1377 spin_unlock_irq(&css_set_lock);
1378
1379 if (!list_empty(&root->root_list)) {
1380 list_del(&root->root_list);
1381 cgroup_root_count--;
1382 }
1383
1384 cgroup_favor_dynmods(root, false);
1385 cgroup_exit_root_id(root);
1386
1387 mutex_unlock(&cgroup_mutex);
1388
1389 cgroup_rstat_exit(cgrp);
1390 kernfs_destroy_root(root->kf_root);
1391 cgroup_free_root(root);
1392 }
1393
1394 static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
1395 struct cgroup_root *root)
1396 {
1397 struct cgroup *res_cgroup = NULL;
1398
1399 if (cset == &init_css_set) {
1400 res_cgroup = &root->cgrp;
1401 } else if (root == &cgrp_dfl_root) {
1402 res_cgroup = cset->dfl_cgrp;
1403 } else {
1404 struct cgrp_cset_link *link;
1405
1406 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1407 struct cgroup *c = link->cgrp;
1408
1409 if (c->root == root) {
1410 res_cgroup = c;
1411 break;
1412 }
1413 }
1414 }
1415
1416 return res_cgroup;
1417 }
1418
1419
1420
1421
1422
1423 static struct cgroup *
1424 current_cgns_cgroup_from_root(struct cgroup_root *root)
1425 {
1426 struct cgroup *res = NULL;
1427 struct css_set *cset;
1428
1429 lockdep_assert_held(&css_set_lock);
1430
1431 rcu_read_lock();
1432
1433 cset = current->nsproxy->cgroup_ns->root_cset;
1434 res = __cset_cgroup_from_root(cset, root);
1435
1436 rcu_read_unlock();
1437
1438 BUG_ON(!res);
1439 return res;
1440 }
1441
1442
1443 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1444 struct cgroup_root *root)
1445 {
1446 struct cgroup *res = NULL;
1447
1448 lockdep_assert_held(&cgroup_mutex);
1449 lockdep_assert_held(&css_set_lock);
1450
1451 res = __cset_cgroup_from_root(cset, root);
1452
1453 BUG_ON(!res);
1454 return res;
1455 }
1456
1457
1458
1459
1460
1461 struct cgroup *task_cgroup_from_root(struct task_struct *task,
1462 struct cgroup_root *root)
1463 {
1464
1465
1466
1467
1468 return cset_cgroup_from_root(task_css_set(task), root);
1469 }
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1498
1499 static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1500 char *buf)
1501 {
1502 struct cgroup_subsys *ss = cft->ss;
1503
1504 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1505 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1506 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1507
1508 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1509 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1510 cft->name);
1511 } else {
1512 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1513 }
1514 return buf;
1515 }
1516
1517
1518
1519
1520
1521
1522
1523 static umode_t cgroup_file_mode(const struct cftype *cft)
1524 {
1525 umode_t mode = 0;
1526
1527 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1528 mode |= S_IRUGO;
1529
1530 if (cft->write_u64 || cft->write_s64 || cft->write) {
1531 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1532 mode |= S_IWUGO;
1533 else
1534 mode |= S_IWUSR;
1535 }
1536
1537 return mode;
1538 }
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552 static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1553 {
1554 u16 cur_ss_mask = subtree_control;
1555 struct cgroup_subsys *ss;
1556 int ssid;
1557
1558 lockdep_assert_held(&cgroup_mutex);
1559
1560 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1561
1562 while (true) {
1563 u16 new_ss_mask = cur_ss_mask;
1564
1565 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1566 new_ss_mask |= ss->depends_on;
1567 } while_each_subsys_mask();
1568
1569
1570
1571
1572
1573
1574 new_ss_mask &= this_ss_mask;
1575
1576 if (new_ss_mask == cur_ss_mask)
1577 break;
1578 cur_ss_mask = new_ss_mask;
1579 }
1580
1581 return cur_ss_mask;
1582 }
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 void cgroup_kn_unlock(struct kernfs_node *kn)
1595 {
1596 struct cgroup *cgrp;
1597
1598 if (kernfs_type(kn) == KERNFS_DIR)
1599 cgrp = kn->priv;
1600 else
1601 cgrp = kn->parent->priv;
1602
1603 mutex_unlock(&cgroup_mutex);
1604
1605 kernfs_unbreak_active_protection(kn);
1606 cgroup_put(cgrp);
1607 }
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626 struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1627 {
1628 struct cgroup *cgrp;
1629
1630 if (kernfs_type(kn) == KERNFS_DIR)
1631 cgrp = kn->priv;
1632 else
1633 cgrp = kn->parent->priv;
1634
1635
1636
1637
1638
1639
1640
1641 if (!cgroup_tryget(cgrp))
1642 return NULL;
1643 kernfs_break_active_protection(kn);
1644
1645 if (drain_offline)
1646 cgroup_lock_and_drain_offline(cgrp);
1647 else
1648 mutex_lock(&cgroup_mutex);
1649
1650 if (!cgroup_is_dead(cgrp))
1651 return cgrp;
1652
1653 cgroup_kn_unlock(kn);
1654 return NULL;
1655 }
1656
1657 static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1658 {
1659 char name[CGROUP_FILE_NAME_MAX];
1660
1661 lockdep_assert_held(&cgroup_mutex);
1662
1663 if (cft->file_offset) {
1664 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1665 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1666
1667 spin_lock_irq(&cgroup_file_kn_lock);
1668 cfile->kn = NULL;
1669 spin_unlock_irq(&cgroup_file_kn_lock);
1670
1671 del_timer_sync(&cfile->notify_timer);
1672 }
1673
1674 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1675 }
1676
1677
1678
1679
1680
1681 static void css_clear_dir(struct cgroup_subsys_state *css)
1682 {
1683 struct cgroup *cgrp = css->cgroup;
1684 struct cftype *cfts;
1685
1686 if (!(css->flags & CSS_VISIBLE))
1687 return;
1688
1689 css->flags &= ~CSS_VISIBLE;
1690
1691 if (!css->ss) {
1692 if (cgroup_on_dfl(cgrp))
1693 cfts = cgroup_base_files;
1694 else
1695 cfts = cgroup1_base_files;
1696
1697 cgroup_addrm_files(css, cgrp, cfts, false);
1698 } else {
1699 list_for_each_entry(cfts, &css->ss->cfts, node)
1700 cgroup_addrm_files(css, cgrp, cfts, false);
1701 }
1702 }
1703
1704
1705
1706
1707
1708
1709
1710 static int css_populate_dir(struct cgroup_subsys_state *css)
1711 {
1712 struct cgroup *cgrp = css->cgroup;
1713 struct cftype *cfts, *failed_cfts;
1714 int ret;
1715
1716 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1717 return 0;
1718
1719 if (!css->ss) {
1720 if (cgroup_on_dfl(cgrp))
1721 cfts = cgroup_base_files;
1722 else
1723 cfts = cgroup1_base_files;
1724
1725 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1726 if (ret < 0)
1727 return ret;
1728 } else {
1729 list_for_each_entry(cfts, &css->ss->cfts, node) {
1730 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1731 if (ret < 0) {
1732 failed_cfts = cfts;
1733 goto err;
1734 }
1735 }
1736 }
1737
1738 css->flags |= CSS_VISIBLE;
1739
1740 return 0;
1741 err:
1742 list_for_each_entry(cfts, &css->ss->cfts, node) {
1743 if (cfts == failed_cfts)
1744 break;
1745 cgroup_addrm_files(css, cgrp, cfts, false);
1746 }
1747 return ret;
1748 }
1749
1750 int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1751 {
1752 struct cgroup *dcgrp = &dst_root->cgrp;
1753 struct cgroup_subsys *ss;
1754 int ssid, i, ret;
1755 u16 dfl_disable_ss_mask = 0;
1756
1757 lockdep_assert_held(&cgroup_mutex);
1758
1759 do_each_subsys_mask(ss, ssid, ss_mask) {
1760
1761
1762
1763
1764
1765 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1766 !ss->implicit_on_dfl)
1767 return -EBUSY;
1768
1769
1770 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1771 return -EBUSY;
1772
1773
1774
1775
1776
1777 if (ss->root == &cgrp_dfl_root)
1778 dfl_disable_ss_mask |= 1 << ssid;
1779
1780 } while_each_subsys_mask();
1781
1782 if (dfl_disable_ss_mask) {
1783 struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
1784
1785
1786
1787
1788
1789 cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
1790 WARN_ON(cgroup_apply_control(scgrp));
1791 cgroup_finalize_control(scgrp, 0);
1792 }
1793
1794 do_each_subsys_mask(ss, ssid, ss_mask) {
1795 struct cgroup_root *src_root = ss->root;
1796 struct cgroup *scgrp = &src_root->cgrp;
1797 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1798 struct css_set *cset;
1799
1800 WARN_ON(!css || cgroup_css(dcgrp, ss));
1801
1802 if (src_root != &cgrp_dfl_root) {
1803
1804 src_root->subsys_mask &= ~(1 << ssid);
1805 WARN_ON(cgroup_apply_control(scgrp));
1806 cgroup_finalize_control(scgrp, 0);
1807 }
1808
1809
1810 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1811 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1812 ss->root = dst_root;
1813 css->cgroup = dcgrp;
1814
1815 spin_lock_irq(&css_set_lock);
1816 hash_for_each(css_set_table, i, cset, hlist)
1817 list_move_tail(&cset->e_cset_node[ss->id],
1818 &dcgrp->e_csets[ss->id]);
1819 spin_unlock_irq(&css_set_lock);
1820
1821 if (ss->css_rstat_flush) {
1822 list_del_rcu(&css->rstat_css_node);
1823 synchronize_rcu();
1824 list_add_rcu(&css->rstat_css_node,
1825 &dcgrp->rstat_css_list);
1826 }
1827
1828
1829 dst_root->subsys_mask |= 1 << ssid;
1830 if (dst_root == &cgrp_dfl_root) {
1831 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1832 } else {
1833 dcgrp->subtree_control |= 1 << ssid;
1834 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1835 }
1836
1837 ret = cgroup_apply_control(dcgrp);
1838 if (ret)
1839 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1840 ss->name, ret);
1841
1842 if (ss->bind)
1843 ss->bind(css);
1844 } while_each_subsys_mask();
1845
1846 kernfs_activate(dcgrp->kn);
1847 return 0;
1848 }
1849
1850 int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1851 struct kernfs_root *kf_root)
1852 {
1853 int len = 0;
1854 char *buf = NULL;
1855 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1856 struct cgroup *ns_cgroup;
1857
1858 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1859 if (!buf)
1860 return -ENOMEM;
1861
1862 spin_lock_irq(&css_set_lock);
1863 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1864 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1865 spin_unlock_irq(&css_set_lock);
1866
1867 if (len >= PATH_MAX)
1868 len = -ERANGE;
1869 else if (len > 0) {
1870 seq_escape(sf, buf, " \t\n\\");
1871 len = 0;
1872 }
1873 kfree(buf);
1874 return len;
1875 }
1876
1877 enum cgroup2_param {
1878 Opt_nsdelegate,
1879 Opt_favordynmods,
1880 Opt_memory_localevents,
1881 Opt_memory_recursiveprot,
1882 nr__cgroup2_params
1883 };
1884
1885 static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
1886 fsparam_flag("nsdelegate", Opt_nsdelegate),
1887 fsparam_flag("favordynmods", Opt_favordynmods),
1888 fsparam_flag("memory_localevents", Opt_memory_localevents),
1889 fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
1890 {}
1891 };
1892
1893 static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1894 {
1895 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1896 struct fs_parse_result result;
1897 int opt;
1898
1899 opt = fs_parse(fc, cgroup2_fs_parameters, param, &result);
1900 if (opt < 0)
1901 return opt;
1902
1903 switch (opt) {
1904 case Opt_nsdelegate:
1905 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1906 return 0;
1907 case Opt_favordynmods:
1908 ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
1909 return 0;
1910 case Opt_memory_localevents:
1911 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1912 return 0;
1913 case Opt_memory_recursiveprot:
1914 ctx->flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1915 return 0;
1916 }
1917 return -EINVAL;
1918 }
1919
1920 static void apply_cgroup_root_flags(unsigned int root_flags)
1921 {
1922 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1923 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1924 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1925 else
1926 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1927
1928 cgroup_favor_dynmods(&cgrp_dfl_root,
1929 root_flags & CGRP_ROOT_FAVOR_DYNMODS);
1930
1931 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1932 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1933 else
1934 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1935
1936 if (root_flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
1937 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1938 else
1939 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1940 }
1941 }
1942
1943 static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1944 {
1945 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1946 seq_puts(seq, ",nsdelegate");
1947 if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS)
1948 seq_puts(seq, ",favordynmods");
1949 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1950 seq_puts(seq, ",memory_localevents");
1951 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
1952 seq_puts(seq, ",memory_recursiveprot");
1953 return 0;
1954 }
1955
1956 static int cgroup_reconfigure(struct fs_context *fc)
1957 {
1958 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1959
1960 apply_cgroup_root_flags(ctx->flags);
1961 return 0;
1962 }
1963
1964 static void init_cgroup_housekeeping(struct cgroup *cgrp)
1965 {
1966 struct cgroup_subsys *ss;
1967 int ssid;
1968
1969 INIT_LIST_HEAD(&cgrp->self.sibling);
1970 INIT_LIST_HEAD(&cgrp->self.children);
1971 INIT_LIST_HEAD(&cgrp->cset_links);
1972 INIT_LIST_HEAD(&cgrp->pidlists);
1973 mutex_init(&cgrp->pidlist_mutex);
1974 cgrp->self.cgroup = cgrp;
1975 cgrp->self.flags |= CSS_ONLINE;
1976 cgrp->dom_cgrp = cgrp;
1977 cgrp->max_descendants = INT_MAX;
1978 cgrp->max_depth = INT_MAX;
1979 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1980 prev_cputime_init(&cgrp->prev_cputime);
1981
1982 for_each_subsys(ss, ssid)
1983 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1984
1985 init_waitqueue_head(&cgrp->offline_waitq);
1986 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1987 }
1988
1989 void init_cgroup_root(struct cgroup_fs_context *ctx)
1990 {
1991 struct cgroup_root *root = ctx->root;
1992 struct cgroup *cgrp = &root->cgrp;
1993
1994 INIT_LIST_HEAD(&root->root_list);
1995 atomic_set(&root->nr_cgrps, 1);
1996 cgrp->root = root;
1997 init_cgroup_housekeeping(cgrp);
1998
1999
2000 root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS;
2001 if (ctx->release_agent)
2002 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
2003 if (ctx->name)
2004 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
2005 if (ctx->cpuset_clone_children)
2006 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
2007 }
2008
2009 int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
2010 {
2011 LIST_HEAD(tmp_links);
2012 struct cgroup *root_cgrp = &root->cgrp;
2013 struct kernfs_syscall_ops *kf_sops;
2014 struct css_set *cset;
2015 int i, ret;
2016
2017 lockdep_assert_held(&cgroup_mutex);
2018
2019 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
2020 0, GFP_KERNEL);
2021 if (ret)
2022 goto out;
2023
2024
2025
2026
2027
2028
2029
2030
2031 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
2032 if (ret)
2033 goto cancel_ref;
2034
2035 ret = cgroup_init_root_id(root);
2036 if (ret)
2037 goto cancel_ref;
2038
2039 kf_sops = root == &cgrp_dfl_root ?
2040 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
2041
2042 root->kf_root = kernfs_create_root(kf_sops,
2043 KERNFS_ROOT_CREATE_DEACTIVATED |
2044 KERNFS_ROOT_SUPPORT_EXPORTOP |
2045 KERNFS_ROOT_SUPPORT_USER_XATTR,
2046 root_cgrp);
2047 if (IS_ERR(root->kf_root)) {
2048 ret = PTR_ERR(root->kf_root);
2049 goto exit_root_id;
2050 }
2051 root_cgrp->kn = kernfs_root_to_node(root->kf_root);
2052 WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
2053 root_cgrp->ancestor_ids[0] = cgroup_id(root_cgrp);
2054
2055 ret = css_populate_dir(&root_cgrp->self);
2056 if (ret)
2057 goto destroy_root;
2058
2059 ret = cgroup_rstat_init(root_cgrp);
2060 if (ret)
2061 goto destroy_root;
2062
2063 ret = rebind_subsystems(root, ss_mask);
2064 if (ret)
2065 goto exit_stats;
2066
2067 ret = cgroup_bpf_inherit(root_cgrp);
2068 WARN_ON_ONCE(ret);
2069
2070 trace_cgroup_setup_root(root);
2071
2072
2073
2074
2075
2076
2077 list_add(&root->root_list, &cgroup_roots);
2078 cgroup_root_count++;
2079
2080
2081
2082
2083
2084 spin_lock_irq(&css_set_lock);
2085 hash_for_each(css_set_table, i, cset, hlist) {
2086 link_css_set(&tmp_links, cset, root_cgrp);
2087 if (css_set_populated(cset))
2088 cgroup_update_populated(root_cgrp, true);
2089 }
2090 spin_unlock_irq(&css_set_lock);
2091
2092 BUG_ON(!list_empty(&root_cgrp->self.children));
2093 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2094
2095 ret = 0;
2096 goto out;
2097
2098 exit_stats:
2099 cgroup_rstat_exit(root_cgrp);
2100 destroy_root:
2101 kernfs_destroy_root(root->kf_root);
2102 root->kf_root = NULL;
2103 exit_root_id:
2104 cgroup_exit_root_id(root);
2105 cancel_ref:
2106 percpu_ref_exit(&root_cgrp->self.refcnt);
2107 out:
2108 free_cgrp_cset_links(&tmp_links);
2109 return ret;
2110 }
2111
2112 int cgroup_do_get_tree(struct fs_context *fc)
2113 {
2114 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2115 int ret;
2116
2117 ctx->kfc.root = ctx->root->kf_root;
2118 if (fc->fs_type == &cgroup2_fs_type)
2119 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2120 else
2121 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2122 ret = kernfs_get_tree(fc);
2123
2124
2125
2126
2127
2128 if (!ret && ctx->ns != &init_cgroup_ns) {
2129 struct dentry *nsdentry;
2130 struct super_block *sb = fc->root->d_sb;
2131 struct cgroup *cgrp;
2132
2133 mutex_lock(&cgroup_mutex);
2134 spin_lock_irq(&css_set_lock);
2135
2136 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2137
2138 spin_unlock_irq(&css_set_lock);
2139 mutex_unlock(&cgroup_mutex);
2140
2141 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2142 dput(fc->root);
2143 if (IS_ERR(nsdentry)) {
2144 deactivate_locked_super(sb);
2145 ret = PTR_ERR(nsdentry);
2146 nsdentry = NULL;
2147 }
2148 fc->root = nsdentry;
2149 }
2150
2151 if (!ctx->kfc.new_sb_created)
2152 cgroup_put(&ctx->root->cgrp);
2153
2154 return ret;
2155 }
2156
2157
2158
2159
2160 static void cgroup_fs_context_free(struct fs_context *fc)
2161 {
2162 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2163
2164 kfree(ctx->name);
2165 kfree(ctx->release_agent);
2166 put_cgroup_ns(ctx->ns);
2167 kernfs_free_fs_context(fc);
2168 kfree(ctx);
2169 }
2170
2171 static int cgroup_get_tree(struct fs_context *fc)
2172 {
2173 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2174 int ret;
2175
2176 cgrp_dfl_visible = true;
2177 cgroup_get_live(&cgrp_dfl_root.cgrp);
2178 ctx->root = &cgrp_dfl_root;
2179
2180 ret = cgroup_do_get_tree(fc);
2181 if (!ret)
2182 apply_cgroup_root_flags(ctx->flags);
2183 return ret;
2184 }
2185
2186 static const struct fs_context_operations cgroup_fs_context_ops = {
2187 .free = cgroup_fs_context_free,
2188 .parse_param = cgroup2_parse_param,
2189 .get_tree = cgroup_get_tree,
2190 .reconfigure = cgroup_reconfigure,
2191 };
2192
2193 static const struct fs_context_operations cgroup1_fs_context_ops = {
2194 .free = cgroup_fs_context_free,
2195 .parse_param = cgroup1_parse_param,
2196 .get_tree = cgroup1_get_tree,
2197 .reconfigure = cgroup1_reconfigure,
2198 };
2199
2200
2201
2202
2203
2204 static int cgroup_init_fs_context(struct fs_context *fc)
2205 {
2206 struct cgroup_fs_context *ctx;
2207
2208 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2209 if (!ctx)
2210 return -ENOMEM;
2211
2212 ctx->ns = current->nsproxy->cgroup_ns;
2213 get_cgroup_ns(ctx->ns);
2214 fc->fs_private = &ctx->kfc;
2215 if (fc->fs_type == &cgroup2_fs_type)
2216 fc->ops = &cgroup_fs_context_ops;
2217 else
2218 fc->ops = &cgroup1_fs_context_ops;
2219 put_user_ns(fc->user_ns);
2220 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2221 fc->global = true;
2222
2223 #ifdef CONFIG_CGROUP_FAVOR_DYNMODS
2224 ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
2225 #endif
2226 return 0;
2227 }
2228
2229 static void cgroup_kill_sb(struct super_block *sb)
2230 {
2231 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2232 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2233
2234
2235
2236
2237
2238
2239
2240 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2241 !percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
2242 cgroup_bpf_offline(&root->cgrp);
2243 percpu_ref_kill(&root->cgrp.self.refcnt);
2244 }
2245 cgroup_put(&root->cgrp);
2246 kernfs_kill_sb(sb);
2247 }
2248
2249 struct file_system_type cgroup_fs_type = {
2250 .name = "cgroup",
2251 .init_fs_context = cgroup_init_fs_context,
2252 .parameters = cgroup1_fs_parameters,
2253 .kill_sb = cgroup_kill_sb,
2254 .fs_flags = FS_USERNS_MOUNT,
2255 };
2256
2257 static struct file_system_type cgroup2_fs_type = {
2258 .name = "cgroup2",
2259 .init_fs_context = cgroup_init_fs_context,
2260 .parameters = cgroup2_fs_parameters,
2261 .kill_sb = cgroup_kill_sb,
2262 .fs_flags = FS_USERNS_MOUNT,
2263 };
2264
2265 #ifdef CONFIG_CPUSETS
2266 static const struct fs_context_operations cpuset_fs_context_ops = {
2267 .get_tree = cgroup1_get_tree,
2268 .free = cgroup_fs_context_free,
2269 };
2270
2271
2272
2273
2274
2275
2276 static int cpuset_init_fs_context(struct fs_context *fc)
2277 {
2278 char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
2279 struct cgroup_fs_context *ctx;
2280 int err;
2281
2282 err = cgroup_init_fs_context(fc);
2283 if (err) {
2284 kfree(agent);
2285 return err;
2286 }
2287
2288 fc->ops = &cpuset_fs_context_ops;
2289
2290 ctx = cgroup_fc2context(fc);
2291 ctx->subsys_mask = 1 << cpuset_cgrp_id;
2292 ctx->flags |= CGRP_ROOT_NOPREFIX;
2293 ctx->release_agent = agent;
2294
2295 get_filesystem(&cgroup_fs_type);
2296 put_filesystem(fc->fs_type);
2297 fc->fs_type = &cgroup_fs_type;
2298
2299 return 0;
2300 }
2301
2302 static struct file_system_type cpuset_fs_type = {
2303 .name = "cpuset",
2304 .init_fs_context = cpuset_init_fs_context,
2305 .fs_flags = FS_USERNS_MOUNT,
2306 };
2307 #endif
2308
2309 int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2310 struct cgroup_namespace *ns)
2311 {
2312 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2313
2314 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2315 }
2316
2317 int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2318 struct cgroup_namespace *ns)
2319 {
2320 int ret;
2321
2322 mutex_lock(&cgroup_mutex);
2323 spin_lock_irq(&css_set_lock);
2324
2325 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2326
2327 spin_unlock_irq(&css_set_lock);
2328 mutex_unlock(&cgroup_mutex);
2329
2330 return ret;
2331 }
2332 EXPORT_SYMBOL_GPL(cgroup_path_ns);
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347 int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2348 {
2349 struct cgroup_root *root;
2350 struct cgroup *cgrp;
2351 int hierarchy_id = 1;
2352 int ret;
2353
2354 mutex_lock(&cgroup_mutex);
2355 spin_lock_irq(&css_set_lock);
2356
2357 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2358
2359 if (root) {
2360 cgrp = task_cgroup_from_root(task, root);
2361 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2362 } else {
2363
2364 ret = strlcpy(buf, "/", buflen);
2365 }
2366
2367 spin_unlock_irq(&css_set_lock);
2368 mutex_unlock(&cgroup_mutex);
2369 return ret;
2370 }
2371 EXPORT_SYMBOL_GPL(task_cgroup_path);
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396 static void cgroup_attach_lock(bool lock_threadgroup)
2397 {
2398 cpus_read_lock();
2399 if (lock_threadgroup)
2400 percpu_down_write(&cgroup_threadgroup_rwsem);
2401 }
2402
2403
2404
2405
2406
2407 static void cgroup_attach_unlock(bool lock_threadgroup)
2408 {
2409 if (lock_threadgroup)
2410 percpu_up_write(&cgroup_threadgroup_rwsem);
2411 cpus_read_unlock();
2412 }
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424 static void cgroup_migrate_add_task(struct task_struct *task,
2425 struct cgroup_mgctx *mgctx)
2426 {
2427 struct css_set *cset;
2428
2429 lockdep_assert_held(&css_set_lock);
2430
2431
2432 if (task->flags & PF_EXITING)
2433 return;
2434
2435
2436 WARN_ON_ONCE(list_empty(&task->cg_list));
2437
2438 cset = task_css_set(task);
2439 if (!cset->mg_src_cgrp)
2440 return;
2441
2442 mgctx->tset.nr_tasks++;
2443
2444 list_move_tail(&task->cg_list, &cset->mg_tasks);
2445 if (list_empty(&cset->mg_node))
2446 list_add_tail(&cset->mg_node,
2447 &mgctx->tset.src_csets);
2448 if (list_empty(&cset->mg_dst_cset->mg_node))
2449 list_add_tail(&cset->mg_dst_cset->mg_node,
2450 &mgctx->tset.dst_csets);
2451 }
2452
2453
2454
2455
2456
2457
2458
2459
2460 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2461 struct cgroup_subsys_state **dst_cssp)
2462 {
2463 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2464 tset->cur_task = NULL;
2465
2466 return cgroup_taskset_next(tset, dst_cssp);
2467 }
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2478 struct cgroup_subsys_state **dst_cssp)
2479 {
2480 struct css_set *cset = tset->cur_cset;
2481 struct task_struct *task = tset->cur_task;
2482
2483 while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) {
2484 if (!task)
2485 task = list_first_entry(&cset->mg_tasks,
2486 struct task_struct, cg_list);
2487 else
2488 task = list_next_entry(task, cg_list);
2489
2490 if (&task->cg_list != &cset->mg_tasks) {
2491 tset->cur_cset = cset;
2492 tset->cur_task = task;
2493
2494
2495
2496
2497
2498
2499
2500 if (cset->mg_dst_cset)
2501 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2502 else
2503 *dst_cssp = cset->subsys[tset->ssid];
2504
2505 return task;
2506 }
2507
2508 cset = list_next_entry(cset, mg_node);
2509 task = NULL;
2510 }
2511
2512 return NULL;
2513 }
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524 static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2525 {
2526 struct cgroup_taskset *tset = &mgctx->tset;
2527 struct cgroup_subsys *ss;
2528 struct task_struct *task, *tmp_task;
2529 struct css_set *cset, *tmp_cset;
2530 int ssid, failed_ssid, ret;
2531
2532
2533 if (tset->nr_tasks) {
2534 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2535 if (ss->can_attach) {
2536 tset->ssid = ssid;
2537 ret = ss->can_attach(tset);
2538 if (ret) {
2539 failed_ssid = ssid;
2540 goto out_cancel_attach;
2541 }
2542 }
2543 } while_each_subsys_mask();
2544 }
2545
2546
2547
2548
2549
2550
2551 spin_lock_irq(&css_set_lock);
2552 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2553 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2554 struct css_set *from_cset = task_css_set(task);
2555 struct css_set *to_cset = cset->mg_dst_cset;
2556
2557 get_css_set(to_cset);
2558 to_cset->nr_tasks++;
2559 css_set_move_task(task, from_cset, to_cset, true);
2560 from_cset->nr_tasks--;
2561
2562
2563
2564
2565 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2566 to_cset->dfl_cgrp);
2567 put_css_set_locked(from_cset);
2568
2569 }
2570 }
2571 spin_unlock_irq(&css_set_lock);
2572
2573
2574
2575
2576
2577
2578 tset->csets = &tset->dst_csets;
2579
2580 if (tset->nr_tasks) {
2581 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2582 if (ss->attach) {
2583 tset->ssid = ssid;
2584 ss->attach(tset);
2585 }
2586 } while_each_subsys_mask();
2587 }
2588
2589 ret = 0;
2590 goto out_release_tset;
2591
2592 out_cancel_attach:
2593 if (tset->nr_tasks) {
2594 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2595 if (ssid == failed_ssid)
2596 break;
2597 if (ss->cancel_attach) {
2598 tset->ssid = ssid;
2599 ss->cancel_attach(tset);
2600 }
2601 } while_each_subsys_mask();
2602 }
2603 out_release_tset:
2604 spin_lock_irq(&css_set_lock);
2605 list_splice_init(&tset->dst_csets, &tset->src_csets);
2606 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2607 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2608 list_del_init(&cset->mg_node);
2609 }
2610 spin_unlock_irq(&css_set_lock);
2611
2612
2613
2614
2615
2616
2617 tset->nr_tasks = 0;
2618 tset->csets = &tset->src_csets;
2619 return ret;
2620 }
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631 int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2632 {
2633
2634 if (!cgroup_on_dfl(dst_cgrp))
2635 return 0;
2636
2637
2638 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2639 return -EOPNOTSUPP;
2640
2641
2642
2643
2644
2645 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2646 return 0;
2647
2648
2649 if (dst_cgrp->subtree_control)
2650 return -EBUSY;
2651
2652 return 0;
2653 }
2654
2655
2656
2657
2658
2659
2660
2661
2662 void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2663 {
2664 struct css_set *cset, *tmp_cset;
2665
2666 lockdep_assert_held(&cgroup_mutex);
2667
2668 spin_lock_irq(&css_set_lock);
2669
2670 list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets,
2671 mg_src_preload_node) {
2672 cset->mg_src_cgrp = NULL;
2673 cset->mg_dst_cgrp = NULL;
2674 cset->mg_dst_cset = NULL;
2675 list_del_init(&cset->mg_src_preload_node);
2676 put_css_set_locked(cset);
2677 }
2678
2679 list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets,
2680 mg_dst_preload_node) {
2681 cset->mg_src_cgrp = NULL;
2682 cset->mg_dst_cgrp = NULL;
2683 cset->mg_dst_cset = NULL;
2684 list_del_init(&cset->mg_dst_preload_node);
2685 put_css_set_locked(cset);
2686 }
2687
2688 spin_unlock_irq(&css_set_lock);
2689 }
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707 void cgroup_migrate_add_src(struct css_set *src_cset,
2708 struct cgroup *dst_cgrp,
2709 struct cgroup_mgctx *mgctx)
2710 {
2711 struct cgroup *src_cgrp;
2712
2713 lockdep_assert_held(&cgroup_mutex);
2714 lockdep_assert_held(&css_set_lock);
2715
2716
2717
2718
2719
2720
2721 if (src_cset->dead)
2722 return;
2723
2724 if (!list_empty(&src_cset->mg_src_preload_node))
2725 return;
2726
2727 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2728
2729 WARN_ON(src_cset->mg_src_cgrp);
2730 WARN_ON(src_cset->mg_dst_cgrp);
2731 WARN_ON(!list_empty(&src_cset->mg_tasks));
2732 WARN_ON(!list_empty(&src_cset->mg_node));
2733
2734 src_cset->mg_src_cgrp = src_cgrp;
2735 src_cset->mg_dst_cgrp = dst_cgrp;
2736 get_css_set(src_cset);
2737 list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets);
2738 }
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754 int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2755 {
2756 struct css_set *src_cset, *tmp_cset;
2757
2758 lockdep_assert_held(&cgroup_mutex);
2759
2760
2761 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2762 mg_src_preload_node) {
2763 struct css_set *dst_cset;
2764 struct cgroup_subsys *ss;
2765 int ssid;
2766
2767 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2768 if (!dst_cset)
2769 return -ENOMEM;
2770
2771 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2772
2773
2774
2775
2776
2777
2778 if (src_cset == dst_cset) {
2779 src_cset->mg_src_cgrp = NULL;
2780 src_cset->mg_dst_cgrp = NULL;
2781 list_del_init(&src_cset->mg_src_preload_node);
2782 put_css_set(src_cset);
2783 put_css_set(dst_cset);
2784 continue;
2785 }
2786
2787 src_cset->mg_dst_cset = dst_cset;
2788
2789 if (list_empty(&dst_cset->mg_dst_preload_node))
2790 list_add_tail(&dst_cset->mg_dst_preload_node,
2791 &mgctx->preloaded_dst_csets);
2792 else
2793 put_css_set(dst_cset);
2794
2795 for_each_subsys(ss, ssid)
2796 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2797 mgctx->ss_mask |= 1 << ssid;
2798 }
2799
2800 return 0;
2801 }
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821 int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2822 struct cgroup_mgctx *mgctx)
2823 {
2824 struct task_struct *task;
2825
2826
2827
2828
2829
2830
2831 spin_lock_irq(&css_set_lock);
2832 rcu_read_lock();
2833 task = leader;
2834 do {
2835 cgroup_migrate_add_task(task, mgctx);
2836 if (!threadgroup)
2837 break;
2838 } while_each_thread(leader, task);
2839 rcu_read_unlock();
2840 spin_unlock_irq(&css_set_lock);
2841
2842 return cgroup_migrate_execute(mgctx);
2843 }
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853 int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2854 bool threadgroup)
2855 {
2856 DEFINE_CGROUP_MGCTX(mgctx);
2857 struct task_struct *task;
2858 int ret = 0;
2859
2860
2861 spin_lock_irq(&css_set_lock);
2862 rcu_read_lock();
2863 task = leader;
2864 do {
2865 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2866 if (!threadgroup)
2867 break;
2868 } while_each_thread(leader, task);
2869 rcu_read_unlock();
2870 spin_unlock_irq(&css_set_lock);
2871
2872
2873 ret = cgroup_migrate_prepare_dst(&mgctx);
2874 if (!ret)
2875 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2876
2877 cgroup_migrate_finish(&mgctx);
2878
2879 if (!ret)
2880 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2881
2882 return ret;
2883 }
2884
2885 struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
2886 bool *threadgroup_locked)
2887 {
2888 struct task_struct *tsk;
2889 pid_t pid;
2890
2891 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2892 return ERR_PTR(-EINVAL);
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902 lockdep_assert_held(&cgroup_mutex);
2903 *threadgroup_locked = pid || threadgroup;
2904 cgroup_attach_lock(*threadgroup_locked);
2905
2906 rcu_read_lock();
2907 if (pid) {
2908 tsk = find_task_by_vpid(pid);
2909 if (!tsk) {
2910 tsk = ERR_PTR(-ESRCH);
2911 goto out_unlock_threadgroup;
2912 }
2913 } else {
2914 tsk = current;
2915 }
2916
2917 if (threadgroup)
2918 tsk = tsk->group_leader;
2919
2920
2921
2922
2923
2924
2925
2926 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2927 tsk = ERR_PTR(-EINVAL);
2928 goto out_unlock_threadgroup;
2929 }
2930
2931 get_task_struct(tsk);
2932 goto out_unlock_rcu;
2933
2934 out_unlock_threadgroup:
2935 cgroup_attach_unlock(*threadgroup_locked);
2936 *threadgroup_locked = false;
2937 out_unlock_rcu:
2938 rcu_read_unlock();
2939 return tsk;
2940 }
2941
2942 void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
2943 {
2944 struct cgroup_subsys *ss;
2945 int ssid;
2946
2947
2948 put_task_struct(task);
2949
2950 cgroup_attach_unlock(threadgroup_locked);
2951
2952 for_each_subsys(ss, ssid)
2953 if (ss->post_attach)
2954 ss->post_attach();
2955 }
2956
2957 static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2958 {
2959 struct cgroup_subsys *ss;
2960 bool printed = false;
2961 int ssid;
2962
2963 do_each_subsys_mask(ss, ssid, ss_mask) {
2964 if (printed)
2965 seq_putc(seq, ' ');
2966 seq_puts(seq, ss->name);
2967 printed = true;
2968 } while_each_subsys_mask();
2969 if (printed)
2970 seq_putc(seq, '\n');
2971 }
2972
2973
2974 static int cgroup_controllers_show(struct seq_file *seq, void *v)
2975 {
2976 struct cgroup *cgrp = seq_css(seq)->cgroup;
2977
2978 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2979 return 0;
2980 }
2981
2982
2983 static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2984 {
2985 struct cgroup *cgrp = seq_css(seq)->cgroup;
2986
2987 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2988 return 0;
2989 }
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000 static int cgroup_update_dfl_csses(struct cgroup *cgrp)
3001 {
3002 DEFINE_CGROUP_MGCTX(mgctx);
3003 struct cgroup_subsys_state *d_css;
3004 struct cgroup *dsct;
3005 struct css_set *src_cset;
3006 bool has_tasks;
3007 int ret;
3008
3009 lockdep_assert_held(&cgroup_mutex);
3010
3011
3012 spin_lock_irq(&css_set_lock);
3013 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3014 struct cgrp_cset_link *link;
3015
3016
3017
3018
3019
3020
3021
3022 if (dsct == cgrp)
3023 continue;
3024
3025 list_for_each_entry(link, &dsct->cset_links, cset_link)
3026 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
3027 }
3028 spin_unlock_irq(&css_set_lock);
3029
3030
3031
3032
3033
3034
3035
3036 has_tasks = !list_empty(&mgctx.preloaded_src_csets);
3037 cgroup_attach_lock(has_tasks);
3038
3039
3040 ret = cgroup_migrate_prepare_dst(&mgctx);
3041 if (ret)
3042 goto out_finish;
3043
3044 spin_lock_irq(&css_set_lock);
3045 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets,
3046 mg_src_preload_node) {
3047 struct task_struct *task, *ntask;
3048
3049
3050 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
3051 cgroup_migrate_add_task(task, &mgctx);
3052 }
3053 spin_unlock_irq(&css_set_lock);
3054
3055 ret = cgroup_migrate_execute(&mgctx);
3056 out_finish:
3057 cgroup_migrate_finish(&mgctx);
3058 cgroup_attach_unlock(has_tasks);
3059 return ret;
3060 }
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070 void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
3071 __acquires(&cgroup_mutex)
3072 {
3073 struct cgroup *dsct;
3074 struct cgroup_subsys_state *d_css;
3075 struct cgroup_subsys *ss;
3076 int ssid;
3077
3078 restart:
3079 mutex_lock(&cgroup_mutex);
3080
3081 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3082 for_each_subsys(ss, ssid) {
3083 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3084 DEFINE_WAIT(wait);
3085
3086 if (!css || !percpu_ref_is_dying(&css->refcnt))
3087 continue;
3088
3089 cgroup_get_live(dsct);
3090 prepare_to_wait(&dsct->offline_waitq, &wait,
3091 TASK_UNINTERRUPTIBLE);
3092
3093 mutex_unlock(&cgroup_mutex);
3094 schedule();
3095 finish_wait(&dsct->offline_waitq, &wait);
3096
3097 cgroup_put(dsct);
3098 goto restart;
3099 }
3100 }
3101 }
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111 static void cgroup_save_control(struct cgroup *cgrp)
3112 {
3113 struct cgroup *dsct;
3114 struct cgroup_subsys_state *d_css;
3115
3116 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3117 dsct->old_subtree_control = dsct->subtree_control;
3118 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3119 dsct->old_dom_cgrp = dsct->dom_cgrp;
3120 }
3121 }
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131 static void cgroup_propagate_control(struct cgroup *cgrp)
3132 {
3133 struct cgroup *dsct;
3134 struct cgroup_subsys_state *d_css;
3135
3136 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3137 dsct->subtree_control &= cgroup_control(dsct);
3138 dsct->subtree_ss_mask =
3139 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3140 cgroup_ss_mask(dsct));
3141 }
3142 }
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152 static void cgroup_restore_control(struct cgroup *cgrp)
3153 {
3154 struct cgroup *dsct;
3155 struct cgroup_subsys_state *d_css;
3156
3157 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3158 dsct->subtree_control = dsct->old_subtree_control;
3159 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3160 dsct->dom_cgrp = dsct->old_dom_cgrp;
3161 }
3162 }
3163
3164 static bool css_visible(struct cgroup_subsys_state *css)
3165 {
3166 struct cgroup_subsys *ss = css->ss;
3167 struct cgroup *cgrp = css->cgroup;
3168
3169 if (cgroup_control(cgrp) & (1 << ss->id))
3170 return true;
3171 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3172 return false;
3173 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3174 }
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189 static int cgroup_apply_control_enable(struct cgroup *cgrp)
3190 {
3191 struct cgroup *dsct;
3192 struct cgroup_subsys_state *d_css;
3193 struct cgroup_subsys *ss;
3194 int ssid, ret;
3195
3196 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3197 for_each_subsys(ss, ssid) {
3198 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3199
3200 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3201 continue;
3202
3203 if (!css) {
3204 css = css_create(dsct, ss);
3205 if (IS_ERR(css))
3206 return PTR_ERR(css);
3207 }
3208
3209 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3210
3211 if (css_visible(css)) {
3212 ret = css_populate_dir(css);
3213 if (ret)
3214 return ret;
3215 }
3216 }
3217 }
3218
3219 return 0;
3220 }
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235 static void cgroup_apply_control_disable(struct cgroup *cgrp)
3236 {
3237 struct cgroup *dsct;
3238 struct cgroup_subsys_state *d_css;
3239 struct cgroup_subsys *ss;
3240 int ssid;
3241
3242 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3243 for_each_subsys(ss, ssid) {
3244 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3245
3246 if (!css)
3247 continue;
3248
3249 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3250
3251 if (css->parent &&
3252 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3253 kill_css(css);
3254 } else if (!css_visible(css)) {
3255 css_clear_dir(css);
3256 if (ss->css_reset)
3257 ss->css_reset(css);
3258 }
3259 }
3260 }
3261 }
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280 static int cgroup_apply_control(struct cgroup *cgrp)
3281 {
3282 int ret;
3283
3284 cgroup_propagate_control(cgrp);
3285
3286 ret = cgroup_apply_control_enable(cgrp);
3287 if (ret)
3288 return ret;
3289
3290
3291
3292
3293
3294
3295 ret = cgroup_update_dfl_csses(cgrp);
3296 if (ret)
3297 return ret;
3298
3299 return 0;
3300 }
3301
3302
3303
3304
3305
3306
3307
3308
3309 static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3310 {
3311 if (ret) {
3312 cgroup_restore_control(cgrp);
3313 cgroup_propagate_control(cgrp);
3314 }
3315
3316 cgroup_apply_control_disable(cgrp);
3317 }
3318
3319 static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3320 {
3321 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3322
3323
3324 if (!enable)
3325 return 0;
3326
3327
3328 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3329 return -EOPNOTSUPP;
3330
3331
3332 if (cgroup_is_mixable(cgrp))
3333 return 0;
3334
3335 if (domain_enable) {
3336
3337 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3338 return -EOPNOTSUPP;
3339 } else {
3340
3341
3342
3343
3344
3345 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3346 return 0;
3347 }
3348
3349
3350
3351
3352
3353 if (cgroup_has_tasks(cgrp))
3354 return -EBUSY;
3355
3356 return 0;
3357 }
3358
3359
3360 static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3361 char *buf, size_t nbytes,
3362 loff_t off)
3363 {
3364 u16 enable = 0, disable = 0;
3365 struct cgroup *cgrp, *child;
3366 struct cgroup_subsys *ss;
3367 char *tok;
3368 int ssid, ret;
3369
3370
3371
3372
3373
3374 buf = strstrip(buf);
3375 while ((tok = strsep(&buf, " "))) {
3376 if (tok[0] == '\0')
3377 continue;
3378 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3379 if (!cgroup_ssid_enabled(ssid) ||
3380 strcmp(tok + 1, ss->name))
3381 continue;
3382
3383 if (*tok == '+') {
3384 enable |= 1 << ssid;
3385 disable &= ~(1 << ssid);
3386 } else if (*tok == '-') {
3387 disable |= 1 << ssid;
3388 enable &= ~(1 << ssid);
3389 } else {
3390 return -EINVAL;
3391 }
3392 break;
3393 } while_each_subsys_mask();
3394 if (ssid == CGROUP_SUBSYS_COUNT)
3395 return -EINVAL;
3396 }
3397
3398 cgrp = cgroup_kn_lock_live(of->kn, true);
3399 if (!cgrp)
3400 return -ENODEV;
3401
3402 for_each_subsys(ss, ssid) {
3403 if (enable & (1 << ssid)) {
3404 if (cgrp->subtree_control & (1 << ssid)) {
3405 enable &= ~(1 << ssid);
3406 continue;
3407 }
3408
3409 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3410 ret = -ENOENT;
3411 goto out_unlock;
3412 }
3413 } else if (disable & (1 << ssid)) {
3414 if (!(cgrp->subtree_control & (1 << ssid))) {
3415 disable &= ~(1 << ssid);
3416 continue;
3417 }
3418
3419
3420 cgroup_for_each_live_child(child, cgrp) {
3421 if (child->subtree_control & (1 << ssid)) {
3422 ret = -EBUSY;
3423 goto out_unlock;
3424 }
3425 }
3426 }
3427 }
3428
3429 if (!enable && !disable) {
3430 ret = 0;
3431 goto out_unlock;
3432 }
3433
3434 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3435 if (ret)
3436 goto out_unlock;
3437
3438
3439 cgroup_save_control(cgrp);
3440
3441 cgrp->subtree_control |= enable;
3442 cgrp->subtree_control &= ~disable;
3443
3444 ret = cgroup_apply_control(cgrp);
3445 cgroup_finalize_control(cgrp, ret);
3446 if (ret)
3447 goto out_unlock;
3448
3449 kernfs_activate(cgrp->kn);
3450 out_unlock:
3451 cgroup_kn_unlock(of->kn);
3452 return ret ?: nbytes;
3453 }
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464 static int cgroup_enable_threaded(struct cgroup *cgrp)
3465 {
3466 struct cgroup *parent = cgroup_parent(cgrp);
3467 struct cgroup *dom_cgrp = parent->dom_cgrp;
3468 struct cgroup *dsct;
3469 struct cgroup_subsys_state *d_css;
3470 int ret;
3471
3472 lockdep_assert_held(&cgroup_mutex);
3473
3474
3475 if (cgroup_is_threaded(cgrp))
3476 return 0;
3477
3478
3479
3480
3481
3482
3483
3484 if (cgroup_is_populated(cgrp) ||
3485 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3486 return -EOPNOTSUPP;
3487
3488
3489 if (!cgroup_is_valid_domain(dom_cgrp) ||
3490 !cgroup_can_be_thread_root(dom_cgrp))
3491 return -EOPNOTSUPP;
3492
3493
3494
3495
3496
3497 cgroup_save_control(cgrp);
3498
3499 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3500 if (dsct == cgrp || cgroup_is_threaded(dsct))
3501 dsct->dom_cgrp = dom_cgrp;
3502
3503 ret = cgroup_apply_control(cgrp);
3504 if (!ret)
3505 parent->nr_threaded_children++;
3506
3507 cgroup_finalize_control(cgrp, ret);
3508 return ret;
3509 }
3510
3511 static int cgroup_type_show(struct seq_file *seq, void *v)
3512 {
3513 struct cgroup *cgrp = seq_css(seq)->cgroup;
3514
3515 if (cgroup_is_threaded(cgrp))
3516 seq_puts(seq, "threaded\n");
3517 else if (!cgroup_is_valid_domain(cgrp))
3518 seq_puts(seq, "domain invalid\n");
3519 else if (cgroup_is_thread_root(cgrp))
3520 seq_puts(seq, "domain threaded\n");
3521 else
3522 seq_puts(seq, "domain\n");
3523
3524 return 0;
3525 }
3526
3527 static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3528 size_t nbytes, loff_t off)
3529 {
3530 struct cgroup *cgrp;
3531 int ret;
3532
3533
3534 if (strcmp(strstrip(buf), "threaded"))
3535 return -EINVAL;
3536
3537
3538 cgrp = cgroup_kn_lock_live(of->kn, true);
3539 if (!cgrp)
3540 return -ENOENT;
3541
3542
3543 ret = cgroup_enable_threaded(cgrp);
3544
3545 cgroup_kn_unlock(of->kn);
3546 return ret ?: nbytes;
3547 }
3548
3549 static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3550 {
3551 struct cgroup *cgrp = seq_css(seq)->cgroup;
3552 int descendants = READ_ONCE(cgrp->max_descendants);
3553
3554 if (descendants == INT_MAX)
3555 seq_puts(seq, "max\n");
3556 else
3557 seq_printf(seq, "%d\n", descendants);
3558
3559 return 0;
3560 }
3561
3562 static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3563 char *buf, size_t nbytes, loff_t off)
3564 {
3565 struct cgroup *cgrp;
3566 int descendants;
3567 ssize_t ret;
3568
3569 buf = strstrip(buf);
3570 if (!strcmp(buf, "max")) {
3571 descendants = INT_MAX;
3572 } else {
3573 ret = kstrtoint(buf, 0, &descendants);
3574 if (ret)
3575 return ret;
3576 }
3577
3578 if (descendants < 0)
3579 return -ERANGE;
3580
3581 cgrp = cgroup_kn_lock_live(of->kn, false);
3582 if (!cgrp)
3583 return -ENOENT;
3584
3585 cgrp->max_descendants = descendants;
3586
3587 cgroup_kn_unlock(of->kn);
3588
3589 return nbytes;
3590 }
3591
3592 static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3593 {
3594 struct cgroup *cgrp = seq_css(seq)->cgroup;
3595 int depth = READ_ONCE(cgrp->max_depth);
3596
3597 if (depth == INT_MAX)
3598 seq_puts(seq, "max\n");
3599 else
3600 seq_printf(seq, "%d\n", depth);
3601
3602 return 0;
3603 }
3604
3605 static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3606 char *buf, size_t nbytes, loff_t off)
3607 {
3608 struct cgroup *cgrp;
3609 ssize_t ret;
3610 int depth;
3611
3612 buf = strstrip(buf);
3613 if (!strcmp(buf, "max")) {
3614 depth = INT_MAX;
3615 } else {
3616 ret = kstrtoint(buf, 0, &depth);
3617 if (ret)
3618 return ret;
3619 }
3620
3621 if (depth < 0)
3622 return -ERANGE;
3623
3624 cgrp = cgroup_kn_lock_live(of->kn, false);
3625 if (!cgrp)
3626 return -ENOENT;
3627
3628 cgrp->max_depth = depth;
3629
3630 cgroup_kn_unlock(of->kn);
3631
3632 return nbytes;
3633 }
3634
3635 static int cgroup_events_show(struct seq_file *seq, void *v)
3636 {
3637 struct cgroup *cgrp = seq_css(seq)->cgroup;
3638
3639 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3640 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3641
3642 return 0;
3643 }
3644
3645 static int cgroup_stat_show(struct seq_file *seq, void *v)
3646 {
3647 struct cgroup *cgroup = seq_css(seq)->cgroup;
3648
3649 seq_printf(seq, "nr_descendants %d\n",
3650 cgroup->nr_descendants);
3651 seq_printf(seq, "nr_dying_descendants %d\n",
3652 cgroup->nr_dying_descendants);
3653
3654 return 0;
3655 }
3656
3657 static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3658 struct cgroup *cgrp, int ssid)
3659 {
3660 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3661 struct cgroup_subsys_state *css;
3662 int ret;
3663
3664 if (!ss->css_extra_stat_show)
3665 return 0;
3666
3667 css = cgroup_tryget_css(cgrp, ss);
3668 if (!css)
3669 return 0;
3670
3671 ret = ss->css_extra_stat_show(seq, css);
3672 css_put(css);
3673 return ret;
3674 }
3675
3676 static int cpu_stat_show(struct seq_file *seq, void *v)
3677 {
3678 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3679 int ret = 0;
3680
3681 cgroup_base_stat_cputime_show(seq);
3682 #ifdef CONFIG_CGROUP_SCHED
3683 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3684 #endif
3685 return ret;
3686 }
3687
3688 #ifdef CONFIG_PSI
3689 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3690 {
3691 struct cgroup *cgrp = seq_css(seq)->cgroup;
3692 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
3693
3694 return psi_show(seq, psi, PSI_IO);
3695 }
3696 static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3697 {
3698 struct cgroup *cgrp = seq_css(seq)->cgroup;
3699 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
3700
3701 return psi_show(seq, psi, PSI_MEM);
3702 }
3703 static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3704 {
3705 struct cgroup *cgrp = seq_css(seq)->cgroup;
3706 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
3707
3708 return psi_show(seq, psi, PSI_CPU);
3709 }
3710
3711 static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3712 size_t nbytes, enum psi_res res)
3713 {
3714 struct cgroup_file_ctx *ctx = of->priv;
3715 struct psi_trigger *new;
3716 struct cgroup *cgrp;
3717 struct psi_group *psi;
3718
3719 cgrp = cgroup_kn_lock_live(of->kn, false);
3720 if (!cgrp)
3721 return -ENODEV;
3722
3723 cgroup_get(cgrp);
3724 cgroup_kn_unlock(of->kn);
3725
3726
3727 if (ctx->psi.trigger) {
3728 cgroup_put(cgrp);
3729 return -EBUSY;
3730 }
3731
3732 psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
3733 new = psi_trigger_create(psi, buf, res);
3734 if (IS_ERR(new)) {
3735 cgroup_put(cgrp);
3736 return PTR_ERR(new);
3737 }
3738
3739 smp_store_release(&ctx->psi.trigger, new);
3740 cgroup_put(cgrp);
3741
3742 return nbytes;
3743 }
3744
3745 static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3746 char *buf, size_t nbytes,
3747 loff_t off)
3748 {
3749 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3750 }
3751
3752 static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3753 char *buf, size_t nbytes,
3754 loff_t off)
3755 {
3756 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3757 }
3758
3759 static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3760 char *buf, size_t nbytes,
3761 loff_t off)
3762 {
3763 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3764 }
3765
3766 static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3767 poll_table *pt)
3768 {
3769 struct cgroup_file_ctx *ctx = of->priv;
3770
3771 return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
3772 }
3773
3774 static void cgroup_pressure_release(struct kernfs_open_file *of)
3775 {
3776 struct cgroup_file_ctx *ctx = of->priv;
3777
3778 psi_trigger_destroy(ctx->psi.trigger);
3779 }
3780
3781 bool cgroup_psi_enabled(void)
3782 {
3783 return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
3784 }
3785
3786 #else
3787 bool cgroup_psi_enabled(void)
3788 {
3789 return false;
3790 }
3791
3792 #endif
3793
3794 static int cgroup_freeze_show(struct seq_file *seq, void *v)
3795 {
3796 struct cgroup *cgrp = seq_css(seq)->cgroup;
3797
3798 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3799
3800 return 0;
3801 }
3802
3803 static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3804 char *buf, size_t nbytes, loff_t off)
3805 {
3806 struct cgroup *cgrp;
3807 ssize_t ret;
3808 int freeze;
3809
3810 ret = kstrtoint(strstrip(buf), 0, &freeze);
3811 if (ret)
3812 return ret;
3813
3814 if (freeze < 0 || freeze > 1)
3815 return -ERANGE;
3816
3817 cgrp = cgroup_kn_lock_live(of->kn, false);
3818 if (!cgrp)
3819 return -ENOENT;
3820
3821 cgroup_freeze(cgrp, freeze);
3822
3823 cgroup_kn_unlock(of->kn);
3824
3825 return nbytes;
3826 }
3827
3828 static void __cgroup_kill(struct cgroup *cgrp)
3829 {
3830 struct css_task_iter it;
3831 struct task_struct *task;
3832
3833 lockdep_assert_held(&cgroup_mutex);
3834
3835 spin_lock_irq(&css_set_lock);
3836 set_bit(CGRP_KILL, &cgrp->flags);
3837 spin_unlock_irq(&css_set_lock);
3838
3839 css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
3840 while ((task = css_task_iter_next(&it))) {
3841
3842 if (task->flags & PF_KTHREAD)
3843 continue;
3844
3845
3846 if (__fatal_signal_pending(task))
3847 continue;
3848
3849 send_sig(SIGKILL, task, 0);
3850 }
3851 css_task_iter_end(&it);
3852
3853 spin_lock_irq(&css_set_lock);
3854 clear_bit(CGRP_KILL, &cgrp->flags);
3855 spin_unlock_irq(&css_set_lock);
3856 }
3857
3858 static void cgroup_kill(struct cgroup *cgrp)
3859 {
3860 struct cgroup_subsys_state *css;
3861 struct cgroup *dsct;
3862
3863 lockdep_assert_held(&cgroup_mutex);
3864
3865 cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
3866 __cgroup_kill(dsct);
3867 }
3868
3869 static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
3870 size_t nbytes, loff_t off)
3871 {
3872 ssize_t ret = 0;
3873 int kill;
3874 struct cgroup *cgrp;
3875
3876 ret = kstrtoint(strstrip(buf), 0, &kill);
3877 if (ret)
3878 return ret;
3879
3880 if (kill != 1)
3881 return -ERANGE;
3882
3883 cgrp = cgroup_kn_lock_live(of->kn, false);
3884 if (!cgrp)
3885 return -ENOENT;
3886
3887
3888
3889
3890
3891
3892 if (cgroup_is_threaded(cgrp))
3893 ret = -EOPNOTSUPP;
3894 else
3895 cgroup_kill(cgrp);
3896
3897 cgroup_kn_unlock(of->kn);
3898
3899 return ret ?: nbytes;
3900 }
3901
3902 static int cgroup_file_open(struct kernfs_open_file *of)
3903 {
3904 struct cftype *cft = of_cft(of);
3905 struct cgroup_file_ctx *ctx;
3906 int ret;
3907
3908 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
3909 if (!ctx)
3910 return -ENOMEM;
3911
3912 ctx->ns = current->nsproxy->cgroup_ns;
3913 get_cgroup_ns(ctx->ns);
3914 of->priv = ctx;
3915
3916 if (!cft->open)
3917 return 0;
3918
3919 ret = cft->open(of);
3920 if (ret) {
3921 put_cgroup_ns(ctx->ns);
3922 kfree(ctx);
3923 }
3924 return ret;
3925 }
3926
3927 static void cgroup_file_release(struct kernfs_open_file *of)
3928 {
3929 struct cftype *cft = of_cft(of);
3930 struct cgroup_file_ctx *ctx = of->priv;
3931
3932 if (cft->release)
3933 cft->release(of);
3934 put_cgroup_ns(ctx->ns);
3935 kfree(ctx);
3936 }
3937
3938 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3939 size_t nbytes, loff_t off)
3940 {
3941 struct cgroup_file_ctx *ctx = of->priv;
3942 struct cgroup *cgrp = of->kn->parent->priv;
3943 struct cftype *cft = of_cft(of);
3944 struct cgroup_subsys_state *css;
3945 int ret;
3946
3947 if (!nbytes)
3948 return 0;
3949
3950
3951
3952
3953
3954
3955
3956 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3957 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3958 ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
3959 return -EPERM;
3960
3961 if (cft->write)
3962 return cft->write(of, buf, nbytes, off);
3963
3964
3965
3966
3967
3968
3969
3970 rcu_read_lock();
3971 css = cgroup_css(cgrp, cft->ss);
3972 rcu_read_unlock();
3973
3974 if (cft->write_u64) {
3975 unsigned long long v;
3976 ret = kstrtoull(buf, 0, &v);
3977 if (!ret)
3978 ret = cft->write_u64(css, cft, v);
3979 } else if (cft->write_s64) {
3980 long long v;
3981 ret = kstrtoll(buf, 0, &v);
3982 if (!ret)
3983 ret = cft->write_s64(css, cft, v);
3984 } else {
3985 ret = -EINVAL;
3986 }
3987
3988 return ret ?: nbytes;
3989 }
3990
3991 static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3992 {
3993 struct cftype *cft = of_cft(of);
3994
3995 if (cft->poll)
3996 return cft->poll(of, pt);
3997
3998 return kernfs_generic_poll(of, pt);
3999 }
4000
4001 static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
4002 {
4003 return seq_cft(seq)->seq_start(seq, ppos);
4004 }
4005
4006 static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
4007 {
4008 return seq_cft(seq)->seq_next(seq, v, ppos);
4009 }
4010
4011 static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
4012 {
4013 if (seq_cft(seq)->seq_stop)
4014 seq_cft(seq)->seq_stop(seq, v);
4015 }
4016
4017 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
4018 {
4019 struct cftype *cft = seq_cft(m);
4020 struct cgroup_subsys_state *css = seq_css(m);
4021
4022 if (cft->seq_show)
4023 return cft->seq_show(m, arg);
4024
4025 if (cft->read_u64)
4026 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
4027 else if (cft->read_s64)
4028 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
4029 else
4030 return -EINVAL;
4031 return 0;
4032 }
4033
4034 static struct kernfs_ops cgroup_kf_single_ops = {
4035 .atomic_write_len = PAGE_SIZE,
4036 .open = cgroup_file_open,
4037 .release = cgroup_file_release,
4038 .write = cgroup_file_write,
4039 .poll = cgroup_file_poll,
4040 .seq_show = cgroup_seqfile_show,
4041 };
4042
4043 static struct kernfs_ops cgroup_kf_ops = {
4044 .atomic_write_len = PAGE_SIZE,
4045 .open = cgroup_file_open,
4046 .release = cgroup_file_release,
4047 .write = cgroup_file_write,
4048 .poll = cgroup_file_poll,
4049 .seq_start = cgroup_seqfile_start,
4050 .seq_next = cgroup_seqfile_next,
4051 .seq_stop = cgroup_seqfile_stop,
4052 .seq_show = cgroup_seqfile_show,
4053 };
4054
4055
4056 static int cgroup_kn_set_ugid(struct kernfs_node *kn)
4057 {
4058 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
4059 .ia_uid = current_fsuid(),
4060 .ia_gid = current_fsgid(), };
4061
4062 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
4063 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
4064 return 0;
4065
4066 return kernfs_setattr(kn, &iattr);
4067 }
4068
4069 static void cgroup_file_notify_timer(struct timer_list *timer)
4070 {
4071 cgroup_file_notify(container_of(timer, struct cgroup_file,
4072 notify_timer));
4073 }
4074
4075 static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
4076 struct cftype *cft)
4077 {
4078 char name[CGROUP_FILE_NAME_MAX];
4079 struct kernfs_node *kn;
4080 struct lock_class_key *key = NULL;
4081 int ret;
4082
4083 #ifdef CONFIG_DEBUG_LOCK_ALLOC
4084 key = &cft->lockdep_key;
4085 #endif
4086 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
4087 cgroup_file_mode(cft),
4088 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
4089 0, cft->kf_ops, cft,
4090 NULL, key);
4091 if (IS_ERR(kn))
4092 return PTR_ERR(kn);
4093
4094 ret = cgroup_kn_set_ugid(kn);
4095 if (ret) {
4096 kernfs_remove(kn);
4097 return ret;
4098 }
4099
4100 if (cft->file_offset) {
4101 struct cgroup_file *cfile = (void *)css + cft->file_offset;
4102
4103 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
4104
4105 spin_lock_irq(&cgroup_file_kn_lock);
4106 cfile->kn = kn;
4107 spin_unlock_irq(&cgroup_file_kn_lock);
4108 }
4109
4110 return 0;
4111 }
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123 static int cgroup_addrm_files(struct cgroup_subsys_state *css,
4124 struct cgroup *cgrp, struct cftype cfts[],
4125 bool is_add)
4126 {
4127 struct cftype *cft, *cft_end = NULL;
4128 int ret = 0;
4129
4130 lockdep_assert_held(&cgroup_mutex);
4131
4132 restart:
4133 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
4134
4135 if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
4136 continue;
4137 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
4138 continue;
4139 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
4140 continue;
4141 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
4142 continue;
4143 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
4144 continue;
4145 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
4146 continue;
4147 if (is_add) {
4148 ret = cgroup_add_file(css, cgrp, cft);
4149 if (ret) {
4150 pr_warn("%s: failed to add %s, err=%d\n",
4151 __func__, cft->name, ret);
4152 cft_end = cft;
4153 is_add = false;
4154 goto restart;
4155 }
4156 } else {
4157 cgroup_rm_file(cgrp, cft);
4158 }
4159 }
4160 return ret;
4161 }
4162
4163 static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
4164 {
4165 struct cgroup_subsys *ss = cfts[0].ss;
4166 struct cgroup *root = &ss->root->cgrp;
4167 struct cgroup_subsys_state *css;
4168 int ret = 0;
4169
4170 lockdep_assert_held(&cgroup_mutex);
4171
4172
4173 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
4174 struct cgroup *cgrp = css->cgroup;
4175
4176 if (!(css->flags & CSS_VISIBLE))
4177 continue;
4178
4179 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
4180 if (ret)
4181 break;
4182 }
4183
4184 if (is_add && !ret)
4185 kernfs_activate(root->kn);
4186 return ret;
4187 }
4188
4189 static void cgroup_exit_cftypes(struct cftype *cfts)
4190 {
4191 struct cftype *cft;
4192
4193 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4194
4195 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
4196 kfree(cft->kf_ops);
4197 cft->kf_ops = NULL;
4198 cft->ss = NULL;
4199
4200
4201 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
4202 }
4203 }
4204
4205 static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4206 {
4207 struct cftype *cft;
4208
4209 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4210 struct kernfs_ops *kf_ops;
4211
4212 WARN_ON(cft->ss || cft->kf_ops);
4213
4214 if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
4215 continue;
4216
4217 if (cft->seq_start)
4218 kf_ops = &cgroup_kf_ops;
4219 else
4220 kf_ops = &cgroup_kf_single_ops;
4221
4222
4223
4224
4225
4226 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
4227 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
4228 if (!kf_ops) {
4229 cgroup_exit_cftypes(cfts);
4230 return -ENOMEM;
4231 }
4232 kf_ops->atomic_write_len = cft->max_write_len;
4233 }
4234
4235 cft->kf_ops = kf_ops;
4236 cft->ss = ss;
4237 }
4238
4239 return 0;
4240 }
4241
4242 static int cgroup_rm_cftypes_locked(struct cftype *cfts)
4243 {
4244 lockdep_assert_held(&cgroup_mutex);
4245
4246 if (!cfts || !cfts[0].ss)
4247 return -ENOENT;
4248
4249 list_del(&cfts->node);
4250 cgroup_apply_cftypes(cfts, false);
4251 cgroup_exit_cftypes(cfts);
4252 return 0;
4253 }
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266 int cgroup_rm_cftypes(struct cftype *cfts)
4267 {
4268 int ret;
4269
4270 mutex_lock(&cgroup_mutex);
4271 ret = cgroup_rm_cftypes_locked(cfts);
4272 mutex_unlock(&cgroup_mutex);
4273 return ret;
4274 }
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290 static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4291 {
4292 int ret;
4293
4294 if (!cgroup_ssid_enabled(ss->id))
4295 return 0;
4296
4297 if (!cfts || cfts[0].name[0] == '\0')
4298 return 0;
4299
4300 ret = cgroup_init_cftypes(ss, cfts);
4301 if (ret)
4302 return ret;
4303
4304 mutex_lock(&cgroup_mutex);
4305
4306 list_add_tail(&cfts->node, &ss->cfts);
4307 ret = cgroup_apply_cftypes(cfts, true);
4308 if (ret)
4309 cgroup_rm_cftypes_locked(cfts);
4310
4311 mutex_unlock(&cgroup_mutex);
4312 return ret;
4313 }
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4324 {
4325 struct cftype *cft;
4326
4327 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4328 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4329 return cgroup_add_cftypes(ss, cfts);
4330 }
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4341 {
4342 struct cftype *cft;
4343
4344 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4345 cft->flags |= __CFTYPE_NOT_ON_DFL;
4346 return cgroup_add_cftypes(ss, cfts);
4347 }
4348
4349
4350
4351
4352
4353
4354
4355 void cgroup_file_notify(struct cgroup_file *cfile)
4356 {
4357 unsigned long flags;
4358
4359 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4360 if (cfile->kn) {
4361 unsigned long last = cfile->notified_at;
4362 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4363
4364 if (time_in_range(jiffies, last, next)) {
4365 timer_reduce(&cfile->notify_timer, next);
4366 } else {
4367 kernfs_notify(cfile->kn);
4368 cfile->notified_at = jiffies;
4369 }
4370 }
4371 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4372 }
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391 struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4392 struct cgroup_subsys_state *parent)
4393 {
4394 struct cgroup_subsys_state *next;
4395
4396 cgroup_assert_mutex_or_rcu_locked();
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418 if (!pos) {
4419 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4420 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4421 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4422 } else {
4423 list_for_each_entry_rcu(next, &parent->children, sibling,
4424 lockdep_is_held(&cgroup_mutex))
4425 if (next->serial_nr > pos->serial_nr)
4426 break;
4427 }
4428
4429
4430
4431
4432
4433 if (&next->sibling != &parent->children)
4434 return next;
4435 return NULL;
4436 }
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459 struct cgroup_subsys_state *
4460 css_next_descendant_pre(struct cgroup_subsys_state *pos,
4461 struct cgroup_subsys_state *root)
4462 {
4463 struct cgroup_subsys_state *next;
4464
4465 cgroup_assert_mutex_or_rcu_locked();
4466
4467
4468 if (!pos)
4469 return root;
4470
4471
4472 next = css_next_child(NULL, pos);
4473 if (next)
4474 return next;
4475
4476
4477 while (pos != root) {
4478 next = css_next_child(pos, pos->parent);
4479 if (next)
4480 return next;
4481 pos = pos->parent;
4482 }
4483
4484 return NULL;
4485 }
4486 EXPORT_SYMBOL_GPL(css_next_descendant_pre);
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501 struct cgroup_subsys_state *
4502 css_rightmost_descendant(struct cgroup_subsys_state *pos)
4503 {
4504 struct cgroup_subsys_state *last, *tmp;
4505
4506 cgroup_assert_mutex_or_rcu_locked();
4507
4508 do {
4509 last = pos;
4510
4511 pos = NULL;
4512 css_for_each_child(tmp, last)
4513 pos = tmp;
4514 } while (pos);
4515
4516 return last;
4517 }
4518
4519 static struct cgroup_subsys_state *
4520 css_leftmost_descendant(struct cgroup_subsys_state *pos)
4521 {
4522 struct cgroup_subsys_state *last;
4523
4524 do {
4525 last = pos;
4526 pos = css_next_child(NULL, pos);
4527 } while (pos);
4528
4529 return last;
4530 }
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554 struct cgroup_subsys_state *
4555 css_next_descendant_post(struct cgroup_subsys_state *pos,
4556 struct cgroup_subsys_state *root)
4557 {
4558 struct cgroup_subsys_state *next;
4559
4560 cgroup_assert_mutex_or_rcu_locked();
4561
4562
4563 if (!pos)
4564 return css_leftmost_descendant(root);
4565
4566
4567 if (pos == root)
4568 return NULL;
4569
4570
4571 next = css_next_child(pos, pos->parent);
4572 if (next)
4573 return css_leftmost_descendant(next);
4574
4575
4576 return pos->parent;
4577 }
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587 bool css_has_online_children(struct cgroup_subsys_state *css)
4588 {
4589 struct cgroup_subsys_state *child;
4590 bool ret = false;
4591
4592 rcu_read_lock();
4593 css_for_each_child(child, css) {
4594 if (child->flags & CSS_ONLINE) {
4595 ret = true;
4596 break;
4597 }
4598 }
4599 rcu_read_unlock();
4600 return ret;
4601 }
4602
4603 static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4604 {
4605 struct list_head *l;
4606 struct cgrp_cset_link *link;
4607 struct css_set *cset;
4608
4609 lockdep_assert_held(&css_set_lock);
4610
4611
4612 if (it->tcset_pos) {
4613 l = it->tcset_pos->next;
4614
4615 if (l != it->tcset_head) {
4616 it->tcset_pos = l;
4617 return container_of(l, struct css_set,
4618 threaded_csets_node);
4619 }
4620
4621 it->tcset_pos = NULL;
4622 }
4623
4624
4625 l = it->cset_pos;
4626 l = l->next;
4627 if (l == it->cset_head) {
4628 it->cset_pos = NULL;
4629 return NULL;
4630 }
4631
4632 if (it->ss) {
4633 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4634 } else {
4635 link = list_entry(l, struct cgrp_cset_link, cset_link);
4636 cset = link->cset;
4637 }
4638
4639 it->cset_pos = l;
4640
4641
4642 if (it->flags & CSS_TASK_ITER_THREADED) {
4643 if (it->cur_dcset)
4644 put_css_set_locked(it->cur_dcset);
4645 it->cur_dcset = cset;
4646 get_css_set(cset);
4647
4648 it->tcset_head = &cset->threaded_csets;
4649 it->tcset_pos = &cset->threaded_csets;
4650 }
4651
4652 return cset;
4653 }
4654
4655
4656
4657
4658
4659
4660
4661 static void css_task_iter_advance_css_set(struct css_task_iter *it)
4662 {
4663 struct css_set *cset;
4664
4665 lockdep_assert_held(&css_set_lock);
4666
4667
4668 while ((cset = css_task_iter_next_css_set(it))) {
4669 if (!list_empty(&cset->tasks)) {
4670 it->cur_tasks_head = &cset->tasks;
4671 break;
4672 } else if (!list_empty(&cset->mg_tasks)) {
4673 it->cur_tasks_head = &cset->mg_tasks;
4674 break;
4675 } else if (!list_empty(&cset->dying_tasks)) {
4676 it->cur_tasks_head = &cset->dying_tasks;
4677 break;
4678 }
4679 }
4680 if (!cset) {
4681 it->task_pos = NULL;
4682 return;
4683 }
4684 it->task_pos = it->cur_tasks_head->next;
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701 if (it->cur_cset) {
4702 list_del(&it->iters_node);
4703 put_css_set_locked(it->cur_cset);
4704 }
4705 get_css_set(cset);
4706 it->cur_cset = cset;
4707 list_add(&it->iters_node, &cset->task_iters);
4708 }
4709
4710 static void css_task_iter_skip(struct css_task_iter *it,
4711 struct task_struct *task)
4712 {
4713 lockdep_assert_held(&css_set_lock);
4714
4715 if (it->task_pos == &task->cg_list) {
4716 it->task_pos = it->task_pos->next;
4717 it->flags |= CSS_TASK_ITER_SKIPPED;
4718 }
4719 }
4720
4721 static void css_task_iter_advance(struct css_task_iter *it)
4722 {
4723 struct task_struct *task;
4724
4725 lockdep_assert_held(&css_set_lock);
4726 repeat:
4727 if (it->task_pos) {
4728
4729
4730
4731
4732
4733 if (it->flags & CSS_TASK_ITER_SKIPPED)
4734 it->flags &= ~CSS_TASK_ITER_SKIPPED;
4735 else
4736 it->task_pos = it->task_pos->next;
4737
4738 if (it->task_pos == &it->cur_cset->tasks) {
4739 it->cur_tasks_head = &it->cur_cset->mg_tasks;
4740 it->task_pos = it->cur_tasks_head->next;
4741 }
4742 if (it->task_pos == &it->cur_cset->mg_tasks) {
4743 it->cur_tasks_head = &it->cur_cset->dying_tasks;
4744 it->task_pos = it->cur_tasks_head->next;
4745 }
4746 if (it->task_pos == &it->cur_cset->dying_tasks)
4747 css_task_iter_advance_css_set(it);
4748 } else {
4749
4750 css_task_iter_advance_css_set(it);
4751 }
4752
4753 if (!it->task_pos)
4754 return;
4755
4756 task = list_entry(it->task_pos, struct task_struct, cg_list);
4757
4758 if (it->flags & CSS_TASK_ITER_PROCS) {
4759
4760 if (!thread_group_leader(task))
4761 goto repeat;
4762
4763
4764 if (it->cur_tasks_head == &it->cur_cset->dying_tasks &&
4765 !atomic_read(&task->signal->live))
4766 goto repeat;
4767 } else {
4768
4769 if (it->cur_tasks_head == &it->cur_cset->dying_tasks)
4770 goto repeat;
4771 }
4772 }
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785 void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4786 struct css_task_iter *it)
4787 {
4788 memset(it, 0, sizeof(*it));
4789
4790 spin_lock_irq(&css_set_lock);
4791
4792 it->ss = css->ss;
4793 it->flags = flags;
4794
4795 if (CGROUP_HAS_SUBSYS_CONFIG && it->ss)
4796 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4797 else
4798 it->cset_pos = &css->cgroup->cset_links;
4799
4800 it->cset_head = it->cset_pos;
4801
4802 css_task_iter_advance(it);
4803
4804 spin_unlock_irq(&css_set_lock);
4805 }
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815 struct task_struct *css_task_iter_next(struct css_task_iter *it)
4816 {
4817 if (it->cur_task) {
4818 put_task_struct(it->cur_task);
4819 it->cur_task = NULL;
4820 }
4821
4822 spin_lock_irq(&css_set_lock);
4823
4824
4825 if (it->flags & CSS_TASK_ITER_SKIPPED)
4826 css_task_iter_advance(it);
4827
4828 if (it->task_pos) {
4829 it->cur_task = list_entry(it->task_pos, struct task_struct,
4830 cg_list);
4831 get_task_struct(it->cur_task);
4832 css_task_iter_advance(it);
4833 }
4834
4835 spin_unlock_irq(&css_set_lock);
4836
4837 return it->cur_task;
4838 }
4839
4840
4841
4842
4843
4844
4845
4846 void css_task_iter_end(struct css_task_iter *it)
4847 {
4848 if (it->cur_cset) {
4849 spin_lock_irq(&css_set_lock);
4850 list_del(&it->iters_node);
4851 put_css_set_locked(it->cur_cset);
4852 spin_unlock_irq(&css_set_lock);
4853 }
4854
4855 if (it->cur_dcset)
4856 put_css_set(it->cur_dcset);
4857
4858 if (it->cur_task)
4859 put_task_struct(it->cur_task);
4860 }
4861
4862 static void cgroup_procs_release(struct kernfs_open_file *of)
4863 {
4864 struct cgroup_file_ctx *ctx = of->priv;
4865
4866 if (ctx->procs.started)
4867 css_task_iter_end(&ctx->procs.iter);
4868 }
4869
4870 static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4871 {
4872 struct kernfs_open_file *of = s->private;
4873 struct cgroup_file_ctx *ctx = of->priv;
4874
4875 if (pos)
4876 (*pos)++;
4877
4878 return css_task_iter_next(&ctx->procs.iter);
4879 }
4880
4881 static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4882 unsigned int iter_flags)
4883 {
4884 struct kernfs_open_file *of = s->private;
4885 struct cgroup *cgrp = seq_css(s)->cgroup;
4886 struct cgroup_file_ctx *ctx = of->priv;
4887 struct css_task_iter *it = &ctx->procs.iter;
4888
4889
4890
4891
4892
4893 if (!ctx->procs.started) {
4894 if (WARN_ON_ONCE((*pos)))
4895 return ERR_PTR(-EINVAL);
4896 css_task_iter_start(&cgrp->self, iter_flags, it);
4897 ctx->procs.started = true;
4898 } else if (!(*pos)) {
4899 css_task_iter_end(it);
4900 css_task_iter_start(&cgrp->self, iter_flags, it);
4901 } else
4902 return it->cur_task;
4903
4904 return cgroup_procs_next(s, NULL, NULL);
4905 }
4906
4907 static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4908 {
4909 struct cgroup *cgrp = seq_css(s)->cgroup;
4910
4911
4912
4913
4914
4915
4916
4917 if (cgroup_is_threaded(cgrp))
4918 return ERR_PTR(-EOPNOTSUPP);
4919
4920 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4921 CSS_TASK_ITER_THREADED);
4922 }
4923
4924 static int cgroup_procs_show(struct seq_file *s, void *v)
4925 {
4926 seq_printf(s, "%d\n", task_pid_vnr(v));
4927 return 0;
4928 }
4929
4930 static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
4931 {
4932 int ret;
4933 struct inode *inode;
4934
4935 lockdep_assert_held(&cgroup_mutex);
4936
4937 inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
4938 if (!inode)
4939 return -ENOMEM;
4940
4941 ret = inode_permission(&init_user_ns, inode, MAY_WRITE);
4942 iput(inode);
4943 return ret;
4944 }
4945
4946 static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4947 struct cgroup *dst_cgrp,
4948 struct super_block *sb,
4949 struct cgroup_namespace *ns)
4950 {
4951 struct cgroup *com_cgrp = src_cgrp;
4952 int ret;
4953
4954 lockdep_assert_held(&cgroup_mutex);
4955
4956
4957 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4958 com_cgrp = cgroup_parent(com_cgrp);
4959
4960
4961 ret = cgroup_may_write(com_cgrp, sb);
4962 if (ret)
4963 return ret;
4964
4965
4966
4967
4968
4969 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4970 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4971 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4972 return -ENOENT;
4973
4974 return 0;
4975 }
4976
4977 static int cgroup_attach_permissions(struct cgroup *src_cgrp,
4978 struct cgroup *dst_cgrp,
4979 struct super_block *sb, bool threadgroup,
4980 struct cgroup_namespace *ns)
4981 {
4982 int ret = 0;
4983
4984 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
4985 if (ret)
4986 return ret;
4987
4988 ret = cgroup_migrate_vet_dst(dst_cgrp);
4989 if (ret)
4990 return ret;
4991
4992 if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp))
4993 ret = -EOPNOTSUPP;
4994
4995 return ret;
4996 }
4997
4998 static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
4999 bool threadgroup)
5000 {
5001 struct cgroup_file_ctx *ctx = of->priv;
5002 struct cgroup *src_cgrp, *dst_cgrp;
5003 struct task_struct *task;
5004 const struct cred *saved_cred;
5005 ssize_t ret;
5006 bool threadgroup_locked;
5007
5008 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
5009 if (!dst_cgrp)
5010 return -ENODEV;
5011
5012 task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
5013 ret = PTR_ERR_OR_ZERO(task);
5014 if (ret)
5015 goto out_unlock;
5016
5017
5018 spin_lock_irq(&css_set_lock);
5019 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
5020 spin_unlock_irq(&css_set_lock);
5021
5022
5023
5024
5025
5026
5027 saved_cred = override_creds(of->file->f_cred);
5028 ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
5029 of->file->f_path.dentry->d_sb,
5030 threadgroup, ctx->ns);
5031 revert_creds(saved_cred);
5032 if (ret)
5033 goto out_finish;
5034
5035 ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
5036
5037 out_finish:
5038 cgroup_procs_write_finish(task, threadgroup_locked);
5039 out_unlock:
5040 cgroup_kn_unlock(of->kn);
5041
5042 return ret;
5043 }
5044
5045 static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
5046 char *buf, size_t nbytes, loff_t off)
5047 {
5048 return __cgroup_procs_write(of, buf, true) ?: nbytes;
5049 }
5050
5051 static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
5052 {
5053 return __cgroup_procs_start(s, pos, 0);
5054 }
5055
5056 static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
5057 char *buf, size_t nbytes, loff_t off)
5058 {
5059 return __cgroup_procs_write(of, buf, false) ?: nbytes;
5060 }
5061
5062
5063 static struct cftype cgroup_base_files[] = {
5064 {
5065 .name = "cgroup.type",
5066 .flags = CFTYPE_NOT_ON_ROOT,
5067 .seq_show = cgroup_type_show,
5068 .write = cgroup_type_write,
5069 },
5070 {
5071 .name = "cgroup.procs",
5072 .flags = CFTYPE_NS_DELEGATABLE,
5073 .file_offset = offsetof(struct cgroup, procs_file),
5074 .release = cgroup_procs_release,
5075 .seq_start = cgroup_procs_start,
5076 .seq_next = cgroup_procs_next,
5077 .seq_show = cgroup_procs_show,
5078 .write = cgroup_procs_write,
5079 },
5080 {
5081 .name = "cgroup.threads",
5082 .flags = CFTYPE_NS_DELEGATABLE,
5083 .release = cgroup_procs_release,
5084 .seq_start = cgroup_threads_start,
5085 .seq_next = cgroup_procs_next,
5086 .seq_show = cgroup_procs_show,
5087 .write = cgroup_threads_write,
5088 },
5089 {
5090 .name = "cgroup.controllers",
5091 .seq_show = cgroup_controllers_show,
5092 },
5093 {
5094 .name = "cgroup.subtree_control",
5095 .flags = CFTYPE_NS_DELEGATABLE,
5096 .seq_show = cgroup_subtree_control_show,
5097 .write = cgroup_subtree_control_write,
5098 },
5099 {
5100 .name = "cgroup.events",
5101 .flags = CFTYPE_NOT_ON_ROOT,
5102 .file_offset = offsetof(struct cgroup, events_file),
5103 .seq_show = cgroup_events_show,
5104 },
5105 {
5106 .name = "cgroup.max.descendants",
5107 .seq_show = cgroup_max_descendants_show,
5108 .write = cgroup_max_descendants_write,
5109 },
5110 {
5111 .name = "cgroup.max.depth",
5112 .seq_show = cgroup_max_depth_show,
5113 .write = cgroup_max_depth_write,
5114 },
5115 {
5116 .name = "cgroup.stat",
5117 .seq_show = cgroup_stat_show,
5118 },
5119 {
5120 .name = "cgroup.freeze",
5121 .flags = CFTYPE_NOT_ON_ROOT,
5122 .seq_show = cgroup_freeze_show,
5123 .write = cgroup_freeze_write,
5124 },
5125 {
5126 .name = "cgroup.kill",
5127 .flags = CFTYPE_NOT_ON_ROOT,
5128 .write = cgroup_kill_write,
5129 },
5130 {
5131 .name = "cpu.stat",
5132 .seq_show = cpu_stat_show,
5133 },
5134 #ifdef CONFIG_PSI
5135 {
5136 .name = "io.pressure",
5137 .flags = CFTYPE_PRESSURE,
5138 .seq_show = cgroup_io_pressure_show,
5139 .write = cgroup_io_pressure_write,
5140 .poll = cgroup_pressure_poll,
5141 .release = cgroup_pressure_release,
5142 },
5143 {
5144 .name = "memory.pressure",
5145 .flags = CFTYPE_PRESSURE,
5146 .seq_show = cgroup_memory_pressure_show,
5147 .write = cgroup_memory_pressure_write,
5148 .poll = cgroup_pressure_poll,
5149 .release = cgroup_pressure_release,
5150 },
5151 {
5152 .name = "cpu.pressure",
5153 .flags = CFTYPE_PRESSURE,
5154 .seq_show = cgroup_cpu_pressure_show,
5155 .write = cgroup_cpu_pressure_write,
5156 .poll = cgroup_pressure_poll,
5157 .release = cgroup_pressure_release,
5158 },
5159 #endif
5160 { }
5161 };
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185 static void css_free_rwork_fn(struct work_struct *work)
5186 {
5187 struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
5188 struct cgroup_subsys_state, destroy_rwork);
5189 struct cgroup_subsys *ss = css->ss;
5190 struct cgroup *cgrp = css->cgroup;
5191
5192 percpu_ref_exit(&css->refcnt);
5193
5194 if (ss) {
5195
5196 struct cgroup_subsys_state *parent = css->parent;
5197 int id = css->id;
5198
5199 ss->css_free(css);
5200 cgroup_idr_remove(&ss->css_idr, id);
5201 cgroup_put(cgrp);
5202
5203 if (parent)
5204 css_put(parent);
5205 } else {
5206
5207 atomic_dec(&cgrp->root->nr_cgrps);
5208 cgroup1_pidlist_destroy_all(cgrp);
5209 cancel_work_sync(&cgrp->release_agent_work);
5210
5211 if (cgroup_parent(cgrp)) {
5212
5213
5214
5215
5216
5217
5218 cgroup_put(cgroup_parent(cgrp));
5219 kernfs_put(cgrp->kn);
5220 psi_cgroup_free(cgrp);
5221 cgroup_rstat_exit(cgrp);
5222 kfree(cgrp);
5223 } else {
5224
5225
5226
5227
5228
5229 cgroup_destroy_root(cgrp->root);
5230 }
5231 }
5232 }
5233
5234 static void css_release_work_fn(struct work_struct *work)
5235 {
5236 struct cgroup_subsys_state *css =
5237 container_of(work, struct cgroup_subsys_state, destroy_work);
5238 struct cgroup_subsys *ss = css->ss;
5239 struct cgroup *cgrp = css->cgroup;
5240
5241 mutex_lock(&cgroup_mutex);
5242
5243 css->flags |= CSS_RELEASED;
5244 list_del_rcu(&css->sibling);
5245
5246 if (ss) {
5247
5248 if (!list_empty(&css->rstat_css_node)) {
5249 cgroup_rstat_flush(cgrp);
5250 list_del_rcu(&css->rstat_css_node);
5251 }
5252
5253 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
5254 if (ss->css_released)
5255 ss->css_released(css);
5256 } else {
5257 struct cgroup *tcgrp;
5258
5259
5260 TRACE_CGROUP_PATH(release, cgrp);
5261
5262 cgroup_rstat_flush(cgrp);
5263
5264 spin_lock_irq(&css_set_lock);
5265 for (tcgrp = cgroup_parent(cgrp); tcgrp;
5266 tcgrp = cgroup_parent(tcgrp))
5267 tcgrp->nr_dying_descendants--;
5268 spin_unlock_irq(&css_set_lock);
5269
5270
5271
5272
5273
5274
5275
5276
5277 if (cgrp->kn)
5278 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
5279 NULL);
5280 }
5281
5282 mutex_unlock(&cgroup_mutex);
5283
5284 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5285 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5286 }
5287
5288 static void css_release(struct percpu_ref *ref)
5289 {
5290 struct cgroup_subsys_state *css =
5291 container_of(ref, struct cgroup_subsys_state, refcnt);
5292
5293 INIT_WORK(&css->destroy_work, css_release_work_fn);
5294 queue_work(cgroup_destroy_wq, &css->destroy_work);
5295 }
5296
5297 static void init_and_link_css(struct cgroup_subsys_state *css,
5298 struct cgroup_subsys *ss, struct cgroup *cgrp)
5299 {
5300 lockdep_assert_held(&cgroup_mutex);
5301
5302 cgroup_get_live(cgrp);
5303
5304 memset(css, 0, sizeof(*css));
5305 css->cgroup = cgrp;
5306 css->ss = ss;
5307 css->id = -1;
5308 INIT_LIST_HEAD(&css->sibling);
5309 INIT_LIST_HEAD(&css->children);
5310 INIT_LIST_HEAD(&css->rstat_css_node);
5311 css->serial_nr = css_serial_nr_next++;
5312 atomic_set(&css->online_cnt, 0);
5313
5314 if (cgroup_parent(cgrp)) {
5315 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
5316 css_get(css->parent);
5317 }
5318
5319 if (ss->css_rstat_flush)
5320 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
5321
5322 BUG_ON(cgroup_css(cgrp, ss));
5323 }
5324
5325
5326 static int online_css(struct cgroup_subsys_state *css)
5327 {
5328 struct cgroup_subsys *ss = css->ss;
5329 int ret = 0;
5330
5331 lockdep_assert_held(&cgroup_mutex);
5332
5333 if (ss->css_online)
5334 ret = ss->css_online(css);
5335 if (!ret) {
5336 css->flags |= CSS_ONLINE;
5337 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
5338
5339 atomic_inc(&css->online_cnt);
5340 if (css->parent)
5341 atomic_inc(&css->parent->online_cnt);
5342 }
5343 return ret;
5344 }
5345
5346
5347 static void offline_css(struct cgroup_subsys_state *css)
5348 {
5349 struct cgroup_subsys *ss = css->ss;
5350
5351 lockdep_assert_held(&cgroup_mutex);
5352
5353 if (!(css->flags & CSS_ONLINE))
5354 return;
5355
5356 if (ss->css_offline)
5357 ss->css_offline(css);
5358
5359 css->flags &= ~CSS_ONLINE;
5360 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
5361
5362 wake_up_all(&css->cgroup->offline_waitq);
5363 }
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374 static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5375 struct cgroup_subsys *ss)
5376 {
5377 struct cgroup *parent = cgroup_parent(cgrp);
5378 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5379 struct cgroup_subsys_state *css;
5380 int err;
5381
5382 lockdep_assert_held(&cgroup_mutex);
5383
5384 css = ss->css_alloc(parent_css);
5385 if (!css)
5386 css = ERR_PTR(-ENOMEM);
5387 if (IS_ERR(css))
5388 return css;
5389
5390 init_and_link_css(css, ss, cgrp);
5391
5392 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5393 if (err)
5394 goto err_free_css;
5395
5396 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5397 if (err < 0)
5398 goto err_free_css;
5399 css->id = err;
5400
5401
5402 list_add_tail_rcu(&css->sibling, &parent_css->children);
5403 cgroup_idr_replace(&ss->css_idr, css, css->id);
5404
5405 err = online_css(css);
5406 if (err)
5407 goto err_list_del;
5408
5409 return css;
5410
5411 err_list_del:
5412 list_del_rcu(&css->sibling);
5413 err_free_css:
5414 list_del_rcu(&css->rstat_css_node);
5415 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5416 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5417 return ERR_PTR(err);
5418 }
5419
5420
5421
5422
5423
5424
5425 static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
5426 umode_t mode)
5427 {
5428 struct cgroup_root *root = parent->root;
5429 struct cgroup *cgrp, *tcgrp;
5430 struct kernfs_node *kn;
5431 int level = parent->level + 1;
5432 int ret;
5433
5434
5435 cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
5436 GFP_KERNEL);
5437 if (!cgrp)
5438 return ERR_PTR(-ENOMEM);
5439
5440 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5441 if (ret)
5442 goto out_free_cgrp;
5443
5444 ret = cgroup_rstat_init(cgrp);
5445 if (ret)
5446 goto out_cancel_ref;
5447
5448
5449 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5450 if (IS_ERR(kn)) {
5451 ret = PTR_ERR(kn);
5452 goto out_stat_exit;
5453 }
5454 cgrp->kn = kn;
5455
5456 init_cgroup_housekeeping(cgrp);
5457
5458 cgrp->self.parent = &parent->self;
5459 cgrp->root = root;
5460 cgrp->level = level;
5461
5462 ret = psi_cgroup_alloc(cgrp);
5463 if (ret)
5464 goto out_kernfs_remove;
5465
5466 ret = cgroup_bpf_inherit(cgrp);
5467 if (ret)
5468 goto out_psi_free;
5469
5470
5471
5472
5473
5474 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5475 if (cgrp->freezer.e_freeze) {
5476
5477
5478
5479
5480
5481
5482 set_bit(CGRP_FREEZE, &cgrp->flags);
5483 set_bit(CGRP_FROZEN, &cgrp->flags);
5484 }
5485
5486 spin_lock_irq(&css_set_lock);
5487 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5488 cgrp->ancestor_ids[tcgrp->level] = cgroup_id(tcgrp);
5489
5490 if (tcgrp != cgrp) {
5491 tcgrp->nr_descendants++;
5492
5493
5494
5495
5496
5497
5498 if (cgrp->freezer.e_freeze)
5499 tcgrp->freezer.nr_frozen_descendants++;
5500 }
5501 }
5502 spin_unlock_irq(&css_set_lock);
5503
5504 if (notify_on_release(parent))
5505 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5506
5507 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5508 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5509
5510 cgrp->self.serial_nr = css_serial_nr_next++;
5511
5512
5513 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5514 atomic_inc(&root->nr_cgrps);
5515 cgroup_get_live(parent);
5516
5517
5518
5519
5520
5521 if (!cgroup_on_dfl(cgrp))
5522 cgrp->subtree_control = cgroup_control(cgrp);
5523
5524 cgroup_propagate_control(cgrp);
5525
5526 return cgrp;
5527
5528 out_psi_free:
5529 psi_cgroup_free(cgrp);
5530 out_kernfs_remove:
5531 kernfs_remove(cgrp->kn);
5532 out_stat_exit:
5533 cgroup_rstat_exit(cgrp);
5534 out_cancel_ref:
5535 percpu_ref_exit(&cgrp->self.refcnt);
5536 out_free_cgrp:
5537 kfree(cgrp);
5538 return ERR_PTR(ret);
5539 }
5540
5541 static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
5542 {
5543 struct cgroup *cgroup;
5544 int ret = false;
5545 int level = 1;
5546
5547 lockdep_assert_held(&cgroup_mutex);
5548
5549 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
5550 if (cgroup->nr_descendants >= cgroup->max_descendants)
5551 goto fail;
5552
5553 if (level > cgroup->max_depth)
5554 goto fail;
5555
5556 level++;
5557 }
5558
5559 ret = true;
5560 fail:
5561 return ret;
5562 }
5563
5564 int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
5565 {
5566 struct cgroup *parent, *cgrp;
5567 int ret;
5568
5569
5570 if (strchr(name, '\n'))
5571 return -EINVAL;
5572
5573 parent = cgroup_kn_lock_live(parent_kn, false);
5574 if (!parent)
5575 return -ENODEV;
5576
5577 if (!cgroup_check_hierarchy_limits(parent)) {
5578 ret = -EAGAIN;
5579 goto out_unlock;
5580 }
5581
5582 cgrp = cgroup_create(parent, name, mode);
5583 if (IS_ERR(cgrp)) {
5584 ret = PTR_ERR(cgrp);
5585 goto out_unlock;
5586 }
5587
5588
5589
5590
5591
5592 kernfs_get(cgrp->kn);
5593
5594 ret = cgroup_kn_set_ugid(cgrp->kn);
5595 if (ret)
5596 goto out_destroy;
5597
5598 ret = css_populate_dir(&cgrp->self);
5599 if (ret)
5600 goto out_destroy;
5601
5602 ret = cgroup_apply_control_enable(cgrp);
5603 if (ret)
5604 goto out_destroy;
5605
5606 TRACE_CGROUP_PATH(mkdir, cgrp);
5607
5608
5609 kernfs_activate(cgrp->kn);
5610
5611 ret = 0;
5612 goto out_unlock;
5613
5614 out_destroy:
5615 cgroup_destroy_locked(cgrp);
5616 out_unlock:
5617 cgroup_kn_unlock(parent_kn);
5618 return ret;
5619 }
5620
5621
5622
5623
5624
5625
5626 static void css_killed_work_fn(struct work_struct *work)
5627 {
5628 struct cgroup_subsys_state *css =
5629 container_of(work, struct cgroup_subsys_state, destroy_work);
5630
5631 mutex_lock(&cgroup_mutex);
5632
5633 do {
5634 offline_css(css);
5635 css_put(css);
5636
5637 css = css->parent;
5638 } while (css && atomic_dec_and_test(&css->online_cnt));
5639
5640 mutex_unlock(&cgroup_mutex);
5641 }
5642
5643
5644 static void css_killed_ref_fn(struct percpu_ref *ref)
5645 {
5646 struct cgroup_subsys_state *css =
5647 container_of(ref, struct cgroup_subsys_state, refcnt);
5648
5649 if (atomic_dec_and_test(&css->online_cnt)) {
5650 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5651 queue_work(cgroup_destroy_wq, &css->destroy_work);
5652 }
5653 }
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664 static void kill_css(struct cgroup_subsys_state *css)
5665 {
5666 lockdep_assert_held(&cgroup_mutex);
5667
5668 if (css->flags & CSS_DYING)
5669 return;
5670
5671 css->flags |= CSS_DYING;
5672
5673
5674
5675
5676
5677 css_clear_dir(css);
5678
5679
5680
5681
5682
5683 css_get(css);
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5696 }
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722 static int cgroup_destroy_locked(struct cgroup *cgrp)
5723 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5724 {
5725 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5726 struct cgroup_subsys_state *css;
5727 struct cgrp_cset_link *link;
5728 int ssid;
5729
5730 lockdep_assert_held(&cgroup_mutex);
5731
5732
5733
5734
5735
5736 if (cgroup_is_populated(cgrp))
5737 return -EBUSY;
5738
5739
5740
5741
5742
5743
5744 if (css_has_online_children(&cgrp->self))
5745 return -EBUSY;
5746
5747
5748
5749
5750
5751
5752
5753 cgrp->self.flags &= ~CSS_ONLINE;
5754
5755 spin_lock_irq(&css_set_lock);
5756 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5757 link->cset->dead = true;
5758 spin_unlock_irq(&css_set_lock);
5759
5760
5761 for_each_css(css, ssid, cgrp)
5762 kill_css(css);
5763
5764
5765 css_clear_dir(&cgrp->self);
5766 kernfs_remove(cgrp->kn);
5767
5768 if (cgroup_is_threaded(cgrp))
5769 parent->nr_threaded_children--;
5770
5771 spin_lock_irq(&css_set_lock);
5772 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5773 tcgrp->nr_descendants--;
5774 tcgrp->nr_dying_descendants++;
5775
5776
5777
5778
5779 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5780 tcgrp->freezer.nr_frozen_descendants--;
5781 }
5782 spin_unlock_irq(&css_set_lock);
5783
5784 cgroup1_check_for_release(parent);
5785
5786 cgroup_bpf_offline(cgrp);
5787
5788
5789 percpu_ref_kill(&cgrp->self.refcnt);
5790
5791 return 0;
5792 };
5793
5794 int cgroup_rmdir(struct kernfs_node *kn)
5795 {
5796 struct cgroup *cgrp;
5797 int ret = 0;
5798
5799 cgrp = cgroup_kn_lock_live(kn, false);
5800 if (!cgrp)
5801 return 0;
5802
5803 ret = cgroup_destroy_locked(cgrp);
5804 if (!ret)
5805 TRACE_CGROUP_PATH(rmdir, cgrp);
5806
5807 cgroup_kn_unlock(kn);
5808 return ret;
5809 }
5810
5811 static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5812 .show_options = cgroup_show_options,
5813 .mkdir = cgroup_mkdir,
5814 .rmdir = cgroup_rmdir,
5815 .show_path = cgroup_show_path,
5816 };
5817
5818 static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5819 {
5820 struct cgroup_subsys_state *css;
5821
5822 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5823
5824 mutex_lock(&cgroup_mutex);
5825
5826 idr_init(&ss->css_idr);
5827 INIT_LIST_HEAD(&ss->cfts);
5828
5829
5830 ss->root = &cgrp_dfl_root;
5831 css = ss->css_alloc(NULL);
5832
5833 BUG_ON(IS_ERR(css));
5834 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5835
5836
5837
5838
5839
5840 css->flags |= CSS_NO_REF;
5841
5842 if (early) {
5843
5844 css->id = 1;
5845 } else {
5846 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5847 BUG_ON(css->id < 0);
5848 }
5849
5850
5851
5852
5853
5854 init_css_set.subsys[ss->id] = css;
5855
5856 have_fork_callback |= (bool)ss->fork << ss->id;
5857 have_exit_callback |= (bool)ss->exit << ss->id;
5858 have_release_callback |= (bool)ss->release << ss->id;
5859 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5860
5861
5862
5863
5864 BUG_ON(!list_empty(&init_task.tasks));
5865
5866 BUG_ON(online_css(css));
5867
5868 mutex_unlock(&cgroup_mutex);
5869 }
5870
5871
5872
5873
5874
5875
5876
5877 int __init cgroup_init_early(void)
5878 {
5879 static struct cgroup_fs_context __initdata ctx;
5880 struct cgroup_subsys *ss;
5881 int i;
5882
5883 ctx.root = &cgrp_dfl_root;
5884 init_cgroup_root(&ctx);
5885 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5886
5887 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5888
5889 for_each_subsys(ss, i) {
5890 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5891 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5892 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5893 ss->id, ss->name);
5894 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5895 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5896
5897 ss->id = i;
5898 ss->name = cgroup_subsys_name[i];
5899 if (!ss->legacy_name)
5900 ss->legacy_name = cgroup_subsys_name[i];
5901
5902 if (ss->early_init)
5903 cgroup_init_subsys(ss, true);
5904 }
5905 return 0;
5906 }
5907
5908
5909
5910
5911
5912
5913
5914 int __init cgroup_init(void)
5915 {
5916 struct cgroup_subsys *ss;
5917 int ssid;
5918
5919 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5920 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5921 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5922
5923 cgroup_rstat_boot();
5924
5925 get_user_ns(init_cgroup_ns.user_ns);
5926
5927 mutex_lock(&cgroup_mutex);
5928
5929
5930
5931
5932
5933 hash_add(css_set_table, &init_css_set.hlist,
5934 css_set_hash(init_css_set.subsys));
5935
5936 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5937
5938 mutex_unlock(&cgroup_mutex);
5939
5940 for_each_subsys(ss, ssid) {
5941 if (ss->early_init) {
5942 struct cgroup_subsys_state *css =
5943 init_css_set.subsys[ss->id];
5944
5945 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5946 GFP_KERNEL);
5947 BUG_ON(css->id < 0);
5948 } else {
5949 cgroup_init_subsys(ss, false);
5950 }
5951
5952 list_add_tail(&init_css_set.e_cset_node[ssid],
5953 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5954
5955
5956
5957
5958
5959
5960 if (!cgroup_ssid_enabled(ssid))
5961 continue;
5962
5963 if (cgroup1_ssid_disabled(ssid))
5964 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5965 ss->name);
5966
5967 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5968
5969
5970 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5971
5972 if (ss->implicit_on_dfl)
5973 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5974 else if (!ss->dfl_cftypes)
5975 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5976
5977 if (ss->threaded)
5978 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5979
5980 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5981 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5982 } else {
5983 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5984 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5985 }
5986
5987 if (ss->bind)
5988 ss->bind(init_css_set.subsys[ssid]);
5989
5990 mutex_lock(&cgroup_mutex);
5991 css_populate_dir(init_css_set.subsys[ssid]);
5992 mutex_unlock(&cgroup_mutex);
5993 }
5994
5995
5996 hash_del(&init_css_set.hlist);
5997 hash_add(css_set_table, &init_css_set.hlist,
5998 css_set_hash(init_css_set.subsys));
5999
6000 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
6001 WARN_ON(register_filesystem(&cgroup_fs_type));
6002 WARN_ON(register_filesystem(&cgroup2_fs_type));
6003 WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
6004 #ifdef CONFIG_CPUSETS
6005 WARN_ON(register_filesystem(&cpuset_fs_type));
6006 #endif
6007
6008 return 0;
6009 }
6010
6011 static int __init cgroup_wq_init(void)
6012 {
6013
6014
6015
6016
6017
6018
6019
6020
6021 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
6022 BUG_ON(!cgroup_destroy_wq);
6023 return 0;
6024 }
6025 core_initcall(cgroup_wq_init);
6026
6027 void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
6028 {
6029 struct kernfs_node *kn;
6030
6031 kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
6032 if (!kn)
6033 return;
6034 kernfs_path(kn, buf, buflen);
6035 kernfs_put(kn);
6036 }
6037
6038
6039
6040
6041
6042
6043 struct cgroup *cgroup_get_from_id(u64 id)
6044 {
6045 struct kernfs_node *kn;
6046 struct cgroup *cgrp = NULL;
6047
6048 kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
6049 if (!kn)
6050 goto out;
6051
6052 if (kernfs_type(kn) != KERNFS_DIR)
6053 goto put;
6054
6055 rcu_read_lock();
6056
6057 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6058 if (cgrp && !cgroup_tryget(cgrp))
6059 cgrp = NULL;
6060
6061 rcu_read_unlock();
6062 put:
6063 kernfs_put(kn);
6064 out:
6065 return cgrp;
6066 }
6067 EXPORT_SYMBOL_GPL(cgroup_get_from_id);
6068
6069
6070
6071
6072
6073
6074 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
6075 struct pid *pid, struct task_struct *tsk)
6076 {
6077 char *buf;
6078 int retval;
6079 struct cgroup_root *root;
6080
6081 retval = -ENOMEM;
6082 buf = kmalloc(PATH_MAX, GFP_KERNEL);
6083 if (!buf)
6084 goto out;
6085
6086 mutex_lock(&cgroup_mutex);
6087 spin_lock_irq(&css_set_lock);
6088
6089 for_each_root(root) {
6090 struct cgroup_subsys *ss;
6091 struct cgroup *cgrp;
6092 int ssid, count = 0;
6093
6094 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
6095 continue;
6096
6097 seq_printf(m, "%d:", root->hierarchy_id);
6098 if (root != &cgrp_dfl_root)
6099 for_each_subsys(ss, ssid)
6100 if (root->subsys_mask & (1 << ssid))
6101 seq_printf(m, "%s%s", count++ ? "," : "",
6102 ss->legacy_name);
6103 if (strlen(root->name))
6104 seq_printf(m, "%sname=%s", count ? "," : "",
6105 root->name);
6106 seq_putc(m, ':');
6107
6108 cgrp = task_cgroup_from_root(tsk, root);
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
6120 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
6121 current->nsproxy->cgroup_ns);
6122 if (retval >= PATH_MAX)
6123 retval = -ENAMETOOLONG;
6124 if (retval < 0)
6125 goto out_unlock;
6126
6127 seq_puts(m, buf);
6128 } else {
6129 seq_puts(m, "/");
6130 }
6131
6132 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
6133 seq_puts(m, " (deleted)\n");
6134 else
6135 seq_putc(m, '\n');
6136 }
6137
6138 retval = 0;
6139 out_unlock:
6140 spin_unlock_irq(&css_set_lock);
6141 mutex_unlock(&cgroup_mutex);
6142 kfree(buf);
6143 out:
6144 return retval;
6145 }
6146
6147
6148
6149
6150
6151
6152
6153
6154 void cgroup_fork(struct task_struct *child)
6155 {
6156 RCU_INIT_POINTER(child->cgroups, &init_css_set);
6157 INIT_LIST_HEAD(&child->cg_list);
6158 }
6159
6160 static struct cgroup *cgroup_get_from_file(struct file *f)
6161 {
6162 struct cgroup_subsys_state *css;
6163 struct cgroup *cgrp;
6164
6165 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
6166 if (IS_ERR(css))
6167 return ERR_CAST(css);
6168
6169 cgrp = css->cgroup;
6170 if (!cgroup_on_dfl(cgrp)) {
6171 cgroup_put(cgrp);
6172 return ERR_PTR(-EBADF);
6173 }
6174
6175 return cgrp;
6176 }
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194 static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
6195 __acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem)
6196 {
6197 int ret;
6198 struct cgroup *dst_cgrp = NULL;
6199 struct css_set *cset;
6200 struct super_block *sb;
6201 struct file *f;
6202
6203 if (kargs->flags & CLONE_INTO_CGROUP)
6204 mutex_lock(&cgroup_mutex);
6205
6206 cgroup_threadgroup_change_begin(current);
6207
6208 spin_lock_irq(&css_set_lock);
6209 cset = task_css_set(current);
6210 get_css_set(cset);
6211 spin_unlock_irq(&css_set_lock);
6212
6213 if (!(kargs->flags & CLONE_INTO_CGROUP)) {
6214 kargs->cset = cset;
6215 return 0;
6216 }
6217
6218 f = fget_raw(kargs->cgroup);
6219 if (!f) {
6220 ret = -EBADF;
6221 goto err;
6222 }
6223 sb = f->f_path.dentry->d_sb;
6224
6225 dst_cgrp = cgroup_get_from_file(f);
6226 if (IS_ERR(dst_cgrp)) {
6227 ret = PTR_ERR(dst_cgrp);
6228 dst_cgrp = NULL;
6229 goto err;
6230 }
6231
6232 if (cgroup_is_dead(dst_cgrp)) {
6233 ret = -ENODEV;
6234 goto err;
6235 }
6236
6237
6238
6239
6240
6241
6242 ret = cgroup_may_write(dst_cgrp, sb);
6243 if (ret)
6244 goto err;
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260 ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
6261 !(kargs->flags & CLONE_THREAD),
6262 current->nsproxy->cgroup_ns);
6263 if (ret)
6264 goto err;
6265
6266 kargs->cset = find_css_set(cset, dst_cgrp);
6267 if (!kargs->cset) {
6268 ret = -ENOMEM;
6269 goto err;
6270 }
6271
6272 put_css_set(cset);
6273 fput(f);
6274 kargs->cgrp = dst_cgrp;
6275 return ret;
6276
6277 err:
6278 cgroup_threadgroup_change_end(current);
6279 mutex_unlock(&cgroup_mutex);
6280 if (f)
6281 fput(f);
6282 if (dst_cgrp)
6283 cgroup_put(dst_cgrp);
6284 put_css_set(cset);
6285 if (kargs->cset)
6286 put_css_set(kargs->cset);
6287 return ret;
6288 }
6289
6290
6291
6292
6293
6294
6295
6296
6297 static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
6298 __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
6299 {
6300 cgroup_threadgroup_change_end(current);
6301
6302 if (kargs->flags & CLONE_INTO_CGROUP) {
6303 struct cgroup *cgrp = kargs->cgrp;
6304 struct css_set *cset = kargs->cset;
6305
6306 mutex_unlock(&cgroup_mutex);
6307
6308 if (cset) {
6309 put_css_set(cset);
6310 kargs->cset = NULL;
6311 }
6312
6313 if (cgrp) {
6314 cgroup_put(cgrp);
6315 kargs->cgrp = NULL;
6316 }
6317 }
6318 }
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331 int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs)
6332 {
6333 struct cgroup_subsys *ss;
6334 int i, j, ret;
6335
6336 ret = cgroup_css_set_fork(kargs);
6337 if (ret)
6338 return ret;
6339
6340 do_each_subsys_mask(ss, i, have_canfork_callback) {
6341 ret = ss->can_fork(child, kargs->cset);
6342 if (ret)
6343 goto out_revert;
6344 } while_each_subsys_mask();
6345
6346 return 0;
6347
6348 out_revert:
6349 for_each_subsys(ss, j) {
6350 if (j >= i)
6351 break;
6352 if (ss->cancel_fork)
6353 ss->cancel_fork(child, kargs->cset);
6354 }
6355
6356 cgroup_css_set_put_fork(kargs);
6357
6358 return ret;
6359 }
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370 void cgroup_cancel_fork(struct task_struct *child,
6371 struct kernel_clone_args *kargs)
6372 {
6373 struct cgroup_subsys *ss;
6374 int i;
6375
6376 for_each_subsys(ss, i)
6377 if (ss->cancel_fork)
6378 ss->cancel_fork(child, kargs->cset);
6379
6380 cgroup_css_set_put_fork(kargs);
6381 }
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391 void cgroup_post_fork(struct task_struct *child,
6392 struct kernel_clone_args *kargs)
6393 __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
6394 {
6395 unsigned long cgrp_flags = 0;
6396 bool kill = false;
6397 struct cgroup_subsys *ss;
6398 struct css_set *cset;
6399 int i;
6400
6401 cset = kargs->cset;
6402 kargs->cset = NULL;
6403
6404 spin_lock_irq(&css_set_lock);
6405
6406
6407 if (likely(child->pid)) {
6408 if (kargs->cgrp)
6409 cgrp_flags = kargs->cgrp->flags;
6410 else
6411 cgrp_flags = cset->dfl_cgrp->flags;
6412
6413 WARN_ON_ONCE(!list_empty(&child->cg_list));
6414 cset->nr_tasks++;
6415 css_set_move_task(child, NULL, cset, false);
6416 } else {
6417 put_css_set(cset);
6418 cset = NULL;
6419 }
6420
6421 if (!(child->flags & PF_KTHREAD)) {
6422 if (unlikely(test_bit(CGRP_FREEZE, &cgrp_flags))) {
6423
6424
6425
6426
6427
6428 spin_lock(&child->sighand->siglock);
6429 WARN_ON_ONCE(child->frozen);
6430 child->jobctl |= JOBCTL_TRAP_FREEZE;
6431 spin_unlock(&child->sighand->siglock);
6432
6433
6434
6435
6436
6437
6438
6439 }
6440
6441
6442
6443
6444
6445
6446 kill = test_bit(CGRP_KILL, &cgrp_flags);
6447 }
6448
6449 spin_unlock_irq(&css_set_lock);
6450
6451
6452
6453
6454
6455
6456 do_each_subsys_mask(ss, i, have_fork_callback) {
6457 ss->fork(child);
6458 } while_each_subsys_mask();
6459
6460
6461 if (kargs->flags & CLONE_NEWCGROUP) {
6462 struct css_set *rcset = child->nsproxy->cgroup_ns->root_cset;
6463
6464 get_css_set(cset);
6465 child->nsproxy->cgroup_ns->root_cset = cset;
6466 put_css_set(rcset);
6467 }
6468
6469
6470 if (unlikely(kill))
6471 do_send_sig_info(SIGKILL, SEND_SIG_NOINFO, child, PIDTYPE_TGID);
6472
6473 cgroup_css_set_put_fork(kargs);
6474 }
6475
6476
6477
6478
6479
6480
6481
6482
6483 void cgroup_exit(struct task_struct *tsk)
6484 {
6485 struct cgroup_subsys *ss;
6486 struct css_set *cset;
6487 int i;
6488
6489 spin_lock_irq(&css_set_lock);
6490
6491 WARN_ON_ONCE(list_empty(&tsk->cg_list));
6492 cset = task_css_set(tsk);
6493 css_set_move_task(tsk, cset, NULL, false);
6494 list_add_tail(&tsk->cg_list, &cset->dying_tasks);
6495 cset->nr_tasks--;
6496
6497 WARN_ON_ONCE(cgroup_task_frozen(tsk));
6498 if (unlikely(!(tsk->flags & PF_KTHREAD) &&
6499 test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
6500 cgroup_update_frozen(task_dfl_cgroup(tsk));
6501
6502 spin_unlock_irq(&css_set_lock);
6503
6504
6505 do_each_subsys_mask(ss, i, have_exit_callback) {
6506 ss->exit(tsk);
6507 } while_each_subsys_mask();
6508 }
6509
6510 void cgroup_release(struct task_struct *task)
6511 {
6512 struct cgroup_subsys *ss;
6513 int ssid;
6514
6515 do_each_subsys_mask(ss, ssid, have_release_callback) {
6516 ss->release(task);
6517 } while_each_subsys_mask();
6518
6519 spin_lock_irq(&css_set_lock);
6520 css_set_skip_task_iters(task_css_set(task), task);
6521 list_del_init(&task->cg_list);
6522 spin_unlock_irq(&css_set_lock);
6523 }
6524
6525 void cgroup_free(struct task_struct *task)
6526 {
6527 struct css_set *cset = task_css_set(task);
6528 put_css_set(cset);
6529 }
6530
6531 static int __init cgroup_disable(char *str)
6532 {
6533 struct cgroup_subsys *ss;
6534 char *token;
6535 int i;
6536
6537 while ((token = strsep(&str, ",")) != NULL) {
6538 if (!*token)
6539 continue;
6540
6541 for_each_subsys(ss, i) {
6542 if (strcmp(token, ss->name) &&
6543 strcmp(token, ss->legacy_name))
6544 continue;
6545
6546 static_branch_disable(cgroup_subsys_enabled_key[i]);
6547 pr_info("Disabling %s control group subsystem\n",
6548 ss->name);
6549 }
6550
6551 for (i = 0; i < OPT_FEATURE_COUNT; i++) {
6552 if (strcmp(token, cgroup_opt_feature_names[i]))
6553 continue;
6554 cgroup_feature_disable_mask |= 1 << i;
6555 pr_info("Disabling %s control group feature\n",
6556 cgroup_opt_feature_names[i]);
6557 break;
6558 }
6559 }
6560 return 1;
6561 }
6562 __setup("cgroup_disable=", cgroup_disable);
6563
6564 void __init __weak enable_debug_cgroup(void) { }
6565
6566 static int __init enable_cgroup_debug(char *str)
6567 {
6568 cgroup_debug = true;
6569 enable_debug_cgroup();
6570 return 1;
6571 }
6572 __setup("cgroup_debug", enable_cgroup_debug);
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6584 struct cgroup_subsys *ss)
6585 {
6586 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6587 struct file_system_type *s_type = dentry->d_sb->s_type;
6588 struct cgroup_subsys_state *css = NULL;
6589 struct cgroup *cgrp;
6590
6591
6592 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6593 !kn || kernfs_type(kn) != KERNFS_DIR)
6594 return ERR_PTR(-EBADF);
6595
6596 rcu_read_lock();
6597
6598
6599
6600
6601
6602
6603 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6604 if (cgrp)
6605 css = cgroup_css(cgrp, ss);
6606
6607 if (!css || !css_tryget_online(css))
6608 css = ERR_PTR(-ENOENT);
6609
6610 rcu_read_unlock();
6611 return css;
6612 }
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6623 {
6624 WARN_ON_ONCE(!rcu_read_lock_held());
6625 return idr_find(&ss->css_idr, id);
6626 }
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637 struct cgroup *cgroup_get_from_path(const char *path)
6638 {
6639 struct kernfs_node *kn;
6640 struct cgroup *cgrp = ERR_PTR(-ENOENT);
6641
6642 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6643 if (!kn)
6644 goto out;
6645
6646 if (kernfs_type(kn) != KERNFS_DIR) {
6647 cgrp = ERR_PTR(-ENOTDIR);
6648 goto out_kernfs;
6649 }
6650
6651 rcu_read_lock();
6652
6653 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6654 if (!cgrp || !cgroup_tryget(cgrp))
6655 cgrp = ERR_PTR(-ENOENT);
6656
6657 rcu_read_unlock();
6658
6659 out_kernfs:
6660 kernfs_put(kn);
6661 out:
6662 return cgrp;
6663 }
6664 EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675 struct cgroup *cgroup_get_from_fd(int fd)
6676 {
6677 struct cgroup *cgrp;
6678 struct file *f;
6679
6680 f = fget_raw(fd);
6681 if (!f)
6682 return ERR_PTR(-EBADF);
6683
6684 cgrp = cgroup_get_from_file(f);
6685 fput(f);
6686 return cgrp;
6687 }
6688 EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
6689
6690 static u64 power_of_ten(int power)
6691 {
6692 u64 v = 1;
6693 while (power--)
6694 v *= 10;
6695 return v;
6696 }
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712 int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
6713 {
6714 s64 whole, frac = 0;
6715 int fstart = 0, fend = 0, flen;
6716
6717 if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
6718 return -EINVAL;
6719 if (frac < 0)
6720 return -EINVAL;
6721
6722 flen = fend > fstart ? fend - fstart : 0;
6723 if (flen < dec_shift)
6724 frac *= power_of_ten(dec_shift - flen);
6725 else
6726 frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
6727
6728 *v = whole * power_of_ten(dec_shift) + frac;
6729 return 0;
6730 }
6731
6732
6733
6734
6735
6736 #ifdef CONFIG_SOCK_CGROUP_DATA
6737
6738 void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6739 {
6740 struct cgroup *cgroup;
6741
6742 rcu_read_lock();
6743
6744 if (in_interrupt()) {
6745 cgroup = &cgrp_dfl_root.cgrp;
6746 cgroup_get(cgroup);
6747 goto out;
6748 }
6749
6750 while (true) {
6751 struct css_set *cset;
6752
6753 cset = task_css_set(current);
6754 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6755 cgroup = cset->dfl_cgrp;
6756 break;
6757 }
6758 cpu_relax();
6759 }
6760 out:
6761 skcd->cgroup = cgroup;
6762 cgroup_bpf_get(cgroup);
6763 rcu_read_unlock();
6764 }
6765
6766 void cgroup_sk_clone(struct sock_cgroup_data *skcd)
6767 {
6768 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6769
6770
6771
6772
6773
6774
6775 cgroup_get(cgrp);
6776 cgroup_bpf_get(cgrp);
6777 }
6778
6779 void cgroup_sk_free(struct sock_cgroup_data *skcd)
6780 {
6781 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6782
6783 cgroup_bpf_put(cgrp);
6784 cgroup_put(cgrp);
6785 }
6786
6787 #endif
6788
6789 #ifdef CONFIG_SYSFS
6790 static ssize_t show_delegatable_files(struct cftype *files, char *buf,
6791 ssize_t size, const char *prefix)
6792 {
6793 struct cftype *cft;
6794 ssize_t ret = 0;
6795
6796 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
6797 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
6798 continue;
6799
6800 if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
6801 continue;
6802
6803 if (prefix)
6804 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
6805
6806 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
6807
6808 if (WARN_ON(ret >= size))
6809 break;
6810 }
6811
6812 return ret;
6813 }
6814
6815 static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
6816 char *buf)
6817 {
6818 struct cgroup_subsys *ss;
6819 int ssid;
6820 ssize_t ret = 0;
6821
6822 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
6823 NULL);
6824
6825 for_each_subsys(ss, ssid)
6826 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
6827 PAGE_SIZE - ret,
6828 cgroup_subsys_name[ssid]);
6829
6830 return ret;
6831 }
6832 static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
6833
6834 static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
6835 char *buf)
6836 {
6837 return snprintf(buf, PAGE_SIZE,
6838 "nsdelegate\n"
6839 "favordynmods\n"
6840 "memory_localevents\n"
6841 "memory_recursiveprot\n");
6842 }
6843 static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
6844
6845 static struct attribute *cgroup_sysfs_attrs[] = {
6846 &cgroup_delegate_attr.attr,
6847 &cgroup_features_attr.attr,
6848 NULL,
6849 };
6850
6851 static const struct attribute_group cgroup_sysfs_attr_group = {
6852 .attrs = cgroup_sysfs_attrs,
6853 .name = "cgroup",
6854 };
6855
6856 static int __init cgroup_sysfs_init(void)
6857 {
6858 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
6859 }
6860 subsys_initcall(cgroup_sysfs_init);
6861
6862 #endif