0001
0002 #include "cgroup-internal.h"
0003
0004 #include <linux/ctype.h>
0005 #include <linux/kmod.h>
0006 #include <linux/sort.h>
0007 #include <linux/delay.h>
0008 #include <linux/mm.h>
0009 #include <linux/sched/signal.h>
0010 #include <linux/sched/task.h>
0011 #include <linux/magic.h>
0012 #include <linux/slab.h>
0013 #include <linux/vmalloc.h>
0014 #include <linux/delayacct.h>
0015 #include <linux/pid_namespace.h>
0016 #include <linux/cgroupstats.h>
0017 #include <linux/fs_parser.h>
0018
0019 #include <trace/events/cgroup.h>
0020
0021
0022
0023
0024
0025
0026
0027 #define CGROUP_PIDLIST_DESTROY_DELAY HZ
0028
0029
0030 static u16 cgroup_no_v1_mask;
0031
0032
0033 static bool cgroup_no_v1_named;
0034
0035
0036
0037
0038
0039 static struct workqueue_struct *cgroup_pidlist_destroy_wq;
0040
0041
0042 static DEFINE_SPINLOCK(release_agent_path_lock);
0043
0044 bool cgroup1_ssid_disabled(int ssid)
0045 {
0046 return cgroup_no_v1_mask & (1 << ssid);
0047 }
0048
0049
0050
0051
0052
0053
0054
0055
0056 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
0057 {
0058 struct cgroup_root *root;
0059 int retval = 0;
0060
0061 mutex_lock(&cgroup_mutex);
0062 cpus_read_lock();
0063 percpu_down_write(&cgroup_threadgroup_rwsem);
0064 for_each_root(root) {
0065 struct cgroup *from_cgrp;
0066
0067 spin_lock_irq(&css_set_lock);
0068 from_cgrp = task_cgroup_from_root(from, root);
0069 spin_unlock_irq(&css_set_lock);
0070
0071 retval = cgroup_attach_task(from_cgrp, tsk, false);
0072 if (retval)
0073 break;
0074 }
0075 percpu_up_write(&cgroup_threadgroup_rwsem);
0076 cpus_read_unlock();
0077 mutex_unlock(&cgroup_mutex);
0078
0079 return retval;
0080 }
0081 EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
0097 {
0098 DEFINE_CGROUP_MGCTX(mgctx);
0099 struct cgrp_cset_link *link;
0100 struct css_task_iter it;
0101 struct task_struct *task;
0102 int ret;
0103
0104 if (cgroup_on_dfl(to))
0105 return -EINVAL;
0106
0107 ret = cgroup_migrate_vet_dst(to);
0108 if (ret)
0109 return ret;
0110
0111 mutex_lock(&cgroup_mutex);
0112
0113 percpu_down_write(&cgroup_threadgroup_rwsem);
0114
0115
0116 spin_lock_irq(&css_set_lock);
0117 list_for_each_entry(link, &from->cset_links, cset_link)
0118 cgroup_migrate_add_src(link->cset, to, &mgctx);
0119 spin_unlock_irq(&css_set_lock);
0120
0121 ret = cgroup_migrate_prepare_dst(&mgctx);
0122 if (ret)
0123 goto out_err;
0124
0125
0126
0127
0128
0129 do {
0130 css_task_iter_start(&from->self, 0, &it);
0131
0132 do {
0133 task = css_task_iter_next(&it);
0134 } while (task && (task->flags & PF_EXITING));
0135
0136 if (task)
0137 get_task_struct(task);
0138 css_task_iter_end(&it);
0139
0140 if (task) {
0141 ret = cgroup_migrate(task, false, &mgctx);
0142 if (!ret)
0143 TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
0144 put_task_struct(task);
0145 }
0146 } while (task && !ret);
0147 out_err:
0148 cgroup_migrate_finish(&mgctx);
0149 percpu_up_write(&cgroup_threadgroup_rwsem);
0150 mutex_unlock(&cgroup_mutex);
0151 return ret;
0152 }
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 enum cgroup_filetype {
0166 CGROUP_FILE_PROCS,
0167 CGROUP_FILE_TASKS,
0168 };
0169
0170
0171
0172
0173
0174
0175
0176 struct cgroup_pidlist {
0177
0178
0179
0180
0181 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
0182
0183 pid_t *list;
0184
0185 int length;
0186
0187 struct list_head links;
0188
0189 struct cgroup *owner;
0190
0191 struct delayed_work destroy_dwork;
0192 };
0193
0194
0195
0196
0197
0198 void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
0199 {
0200 struct cgroup_pidlist *l, *tmp_l;
0201
0202 mutex_lock(&cgrp->pidlist_mutex);
0203 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
0204 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
0205 mutex_unlock(&cgrp->pidlist_mutex);
0206
0207 flush_workqueue(cgroup_pidlist_destroy_wq);
0208 BUG_ON(!list_empty(&cgrp->pidlists));
0209 }
0210
0211 static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
0212 {
0213 struct delayed_work *dwork = to_delayed_work(work);
0214 struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
0215 destroy_dwork);
0216 struct cgroup_pidlist *tofree = NULL;
0217
0218 mutex_lock(&l->owner->pidlist_mutex);
0219
0220
0221
0222
0223
0224 if (!delayed_work_pending(dwork)) {
0225 list_del(&l->links);
0226 kvfree(l->list);
0227 put_pid_ns(l->key.ns);
0228 tofree = l;
0229 }
0230
0231 mutex_unlock(&l->owner->pidlist_mutex);
0232 kfree(tofree);
0233 }
0234
0235
0236
0237
0238
0239 static int pidlist_uniq(pid_t *list, int length)
0240 {
0241 int src, dest = 1;
0242
0243
0244
0245
0246
0247 if (length == 0 || length == 1)
0248 return length;
0249
0250 for (src = 1; src < length; src++) {
0251
0252 while (list[src] == list[src-1]) {
0253 src++;
0254 if (src == length)
0255 goto after;
0256 }
0257
0258 list[dest] = list[src];
0259 dest++;
0260 }
0261 after:
0262 return dest;
0263 }
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274 static int cmppid(const void *a, const void *b)
0275 {
0276 return *(pid_t *)a - *(pid_t *)b;
0277 }
0278
0279 static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
0280 enum cgroup_filetype type)
0281 {
0282 struct cgroup_pidlist *l;
0283
0284 struct pid_namespace *ns = task_active_pid_ns(current);
0285
0286 lockdep_assert_held(&cgrp->pidlist_mutex);
0287
0288 list_for_each_entry(l, &cgrp->pidlists, links)
0289 if (l->key.type == type && l->key.ns == ns)
0290 return l;
0291 return NULL;
0292 }
0293
0294
0295
0296
0297
0298
0299
0300 static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
0301 enum cgroup_filetype type)
0302 {
0303 struct cgroup_pidlist *l;
0304
0305 lockdep_assert_held(&cgrp->pidlist_mutex);
0306
0307 l = cgroup_pidlist_find(cgrp, type);
0308 if (l)
0309 return l;
0310
0311
0312 l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
0313 if (!l)
0314 return l;
0315
0316 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
0317 l->key.type = type;
0318
0319 l->key.ns = get_pid_ns(task_active_pid_ns(current));
0320 l->owner = cgrp;
0321 list_add(&l->links, &cgrp->pidlists);
0322 return l;
0323 }
0324
0325
0326
0327
0328 static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
0329 struct cgroup_pidlist **lp)
0330 {
0331 pid_t *array;
0332 int length;
0333 int pid, n = 0;
0334 struct css_task_iter it;
0335 struct task_struct *tsk;
0336 struct cgroup_pidlist *l;
0337
0338 lockdep_assert_held(&cgrp->pidlist_mutex);
0339
0340
0341
0342
0343
0344
0345
0346 length = cgroup_task_count(cgrp);
0347 array = kvmalloc_array(length, sizeof(pid_t), GFP_KERNEL);
0348 if (!array)
0349 return -ENOMEM;
0350
0351 css_task_iter_start(&cgrp->self, 0, &it);
0352 while ((tsk = css_task_iter_next(&it))) {
0353 if (unlikely(n == length))
0354 break;
0355
0356 if (type == CGROUP_FILE_PROCS)
0357 pid = task_tgid_vnr(tsk);
0358 else
0359 pid = task_pid_vnr(tsk);
0360 if (pid > 0)
0361 array[n++] = pid;
0362 }
0363 css_task_iter_end(&it);
0364 length = n;
0365
0366 sort(array, length, sizeof(pid_t), cmppid, NULL);
0367 if (type == CGROUP_FILE_PROCS)
0368 length = pidlist_uniq(array, length);
0369
0370 l = cgroup_pidlist_find_create(cgrp, type);
0371 if (!l) {
0372 kvfree(array);
0373 return -ENOMEM;
0374 }
0375
0376
0377 kvfree(l->list);
0378 l->list = array;
0379 l->length = length;
0380 *lp = l;
0381 return 0;
0382 }
0383
0384
0385
0386
0387
0388
0389
0390 static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
0391 {
0392
0393
0394
0395
0396
0397
0398 struct kernfs_open_file *of = s->private;
0399 struct cgroup_file_ctx *ctx = of->priv;
0400 struct cgroup *cgrp = seq_css(s)->cgroup;
0401 struct cgroup_pidlist *l;
0402 enum cgroup_filetype type = seq_cft(s)->private;
0403 int index = 0, pid = *pos;
0404 int *iter, ret;
0405
0406 mutex_lock(&cgrp->pidlist_mutex);
0407
0408
0409
0410
0411
0412
0413
0414 if (ctx->procs1.pidlist)
0415 ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
0416
0417
0418
0419
0420
0421 if (!ctx->procs1.pidlist) {
0422 ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
0423 if (ret)
0424 return ERR_PTR(ret);
0425 }
0426 l = ctx->procs1.pidlist;
0427
0428 if (pid) {
0429 int end = l->length;
0430
0431 while (index < end) {
0432 int mid = (index + end) / 2;
0433 if (l->list[mid] == pid) {
0434 index = mid;
0435 break;
0436 } else if (l->list[mid] <= pid)
0437 index = mid + 1;
0438 else
0439 end = mid;
0440 }
0441 }
0442
0443 if (index >= l->length)
0444 return NULL;
0445
0446 iter = l->list + index;
0447 *pos = *iter;
0448 return iter;
0449 }
0450
0451 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
0452 {
0453 struct kernfs_open_file *of = s->private;
0454 struct cgroup_file_ctx *ctx = of->priv;
0455 struct cgroup_pidlist *l = ctx->procs1.pidlist;
0456
0457 if (l)
0458 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
0459 CGROUP_PIDLIST_DESTROY_DELAY);
0460 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
0461 }
0462
0463 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
0464 {
0465 struct kernfs_open_file *of = s->private;
0466 struct cgroup_file_ctx *ctx = of->priv;
0467 struct cgroup_pidlist *l = ctx->procs1.pidlist;
0468 pid_t *p = v;
0469 pid_t *end = l->list + l->length;
0470
0471
0472
0473
0474 p++;
0475 if (p >= end) {
0476 (*pos)++;
0477 return NULL;
0478 } else {
0479 *pos = *p;
0480 return p;
0481 }
0482 }
0483
0484 static int cgroup_pidlist_show(struct seq_file *s, void *v)
0485 {
0486 seq_printf(s, "%d\n", *(int *)v);
0487
0488 return 0;
0489 }
0490
0491 static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
0492 char *buf, size_t nbytes, loff_t off,
0493 bool threadgroup)
0494 {
0495 struct cgroup *cgrp;
0496 struct task_struct *task;
0497 const struct cred *cred, *tcred;
0498 ssize_t ret;
0499 bool locked;
0500
0501 cgrp = cgroup_kn_lock_live(of->kn, false);
0502 if (!cgrp)
0503 return -ENODEV;
0504
0505 task = cgroup_procs_write_start(buf, threadgroup, &locked);
0506 ret = PTR_ERR_OR_ZERO(task);
0507 if (ret)
0508 goto out_unlock;
0509
0510
0511
0512
0513
0514
0515 cred = of->file->f_cred;
0516 tcred = get_task_cred(task);
0517 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
0518 !uid_eq(cred->euid, tcred->uid) &&
0519 !uid_eq(cred->euid, tcred->suid))
0520 ret = -EACCES;
0521 put_cred(tcred);
0522 if (ret)
0523 goto out_finish;
0524
0525 ret = cgroup_attach_task(cgrp, task, threadgroup);
0526
0527 out_finish:
0528 cgroup_procs_write_finish(task, locked);
0529 out_unlock:
0530 cgroup_kn_unlock(of->kn);
0531
0532 return ret ?: nbytes;
0533 }
0534
0535 static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
0536 char *buf, size_t nbytes, loff_t off)
0537 {
0538 return __cgroup1_procs_write(of, buf, nbytes, off, true);
0539 }
0540
0541 static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
0542 char *buf, size_t nbytes, loff_t off)
0543 {
0544 return __cgroup1_procs_write(of, buf, nbytes, off, false);
0545 }
0546
0547 static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
0548 char *buf, size_t nbytes, loff_t off)
0549 {
0550 struct cgroup *cgrp;
0551 struct cgroup_file_ctx *ctx;
0552
0553 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
0554
0555
0556
0557
0558
0559 ctx = of->priv;
0560 if ((ctx->ns->user_ns != &init_user_ns) ||
0561 !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
0562 return -EPERM;
0563
0564 cgrp = cgroup_kn_lock_live(of->kn, false);
0565 if (!cgrp)
0566 return -ENODEV;
0567 spin_lock(&release_agent_path_lock);
0568 strlcpy(cgrp->root->release_agent_path, strstrip(buf),
0569 sizeof(cgrp->root->release_agent_path));
0570 spin_unlock(&release_agent_path_lock);
0571 cgroup_kn_unlock(of->kn);
0572 return nbytes;
0573 }
0574
0575 static int cgroup_release_agent_show(struct seq_file *seq, void *v)
0576 {
0577 struct cgroup *cgrp = seq_css(seq)->cgroup;
0578
0579 spin_lock(&release_agent_path_lock);
0580 seq_puts(seq, cgrp->root->release_agent_path);
0581 spin_unlock(&release_agent_path_lock);
0582 seq_putc(seq, '\n');
0583 return 0;
0584 }
0585
0586 static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
0587 {
0588 seq_puts(seq, "0\n");
0589 return 0;
0590 }
0591
0592 static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
0593 struct cftype *cft)
0594 {
0595 return notify_on_release(css->cgroup);
0596 }
0597
0598 static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
0599 struct cftype *cft, u64 val)
0600 {
0601 if (val)
0602 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
0603 else
0604 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
0605 return 0;
0606 }
0607
0608 static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
0609 struct cftype *cft)
0610 {
0611 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
0612 }
0613
0614 static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
0615 struct cftype *cft, u64 val)
0616 {
0617 if (val)
0618 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
0619 else
0620 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
0621 return 0;
0622 }
0623
0624
0625 struct cftype cgroup1_base_files[] = {
0626 {
0627 .name = "cgroup.procs",
0628 .seq_start = cgroup_pidlist_start,
0629 .seq_next = cgroup_pidlist_next,
0630 .seq_stop = cgroup_pidlist_stop,
0631 .seq_show = cgroup_pidlist_show,
0632 .private = CGROUP_FILE_PROCS,
0633 .write = cgroup1_procs_write,
0634 },
0635 {
0636 .name = "cgroup.clone_children",
0637 .read_u64 = cgroup_clone_children_read,
0638 .write_u64 = cgroup_clone_children_write,
0639 },
0640 {
0641 .name = "cgroup.sane_behavior",
0642 .flags = CFTYPE_ONLY_ON_ROOT,
0643 .seq_show = cgroup_sane_behavior_show,
0644 },
0645 {
0646 .name = "tasks",
0647 .seq_start = cgroup_pidlist_start,
0648 .seq_next = cgroup_pidlist_next,
0649 .seq_stop = cgroup_pidlist_stop,
0650 .seq_show = cgroup_pidlist_show,
0651 .private = CGROUP_FILE_TASKS,
0652 .write = cgroup1_tasks_write,
0653 },
0654 {
0655 .name = "notify_on_release",
0656 .read_u64 = cgroup_read_notify_on_release,
0657 .write_u64 = cgroup_write_notify_on_release,
0658 },
0659 {
0660 .name = "release_agent",
0661 .flags = CFTYPE_ONLY_ON_ROOT,
0662 .seq_show = cgroup_release_agent_show,
0663 .write = cgroup_release_agent_write,
0664 .max_write_len = PATH_MAX - 1,
0665 },
0666 { }
0667 };
0668
0669
0670 int proc_cgroupstats_show(struct seq_file *m, void *v)
0671 {
0672 struct cgroup_subsys *ss;
0673 int i;
0674
0675 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
0676
0677
0678
0679
0680
0681 for_each_subsys(ss, i)
0682 seq_printf(m, "%s\t%d\t%d\t%d\n",
0683 ss->legacy_name, ss->root->hierarchy_id,
0684 atomic_read(&ss->root->nr_cgrps),
0685 cgroup_ssid_enabled(i));
0686
0687 return 0;
0688 }
0689
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
0702 {
0703 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
0704 struct cgroup *cgrp;
0705 struct css_task_iter it;
0706 struct task_struct *tsk;
0707
0708
0709 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
0710 kernfs_type(kn) != KERNFS_DIR)
0711 return -EINVAL;
0712
0713
0714
0715
0716
0717
0718 rcu_read_lock();
0719 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
0720 if (!cgrp || !cgroup_tryget(cgrp)) {
0721 rcu_read_unlock();
0722 return -ENOENT;
0723 }
0724 rcu_read_unlock();
0725
0726 css_task_iter_start(&cgrp->self, 0, &it);
0727 while ((tsk = css_task_iter_next(&it))) {
0728 switch (READ_ONCE(tsk->__state)) {
0729 case TASK_RUNNING:
0730 stats->nr_running++;
0731 break;
0732 case TASK_INTERRUPTIBLE:
0733 stats->nr_sleeping++;
0734 break;
0735 case TASK_UNINTERRUPTIBLE:
0736 stats->nr_uninterruptible++;
0737 break;
0738 case TASK_STOPPED:
0739 stats->nr_stopped++;
0740 break;
0741 default:
0742 if (tsk->in_iowait)
0743 stats->nr_io_wait++;
0744 break;
0745 }
0746 }
0747 css_task_iter_end(&it);
0748
0749 cgroup_put(cgrp);
0750 return 0;
0751 }
0752
0753 void cgroup1_check_for_release(struct cgroup *cgrp)
0754 {
0755 if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
0756 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
0757 schedule_work(&cgrp->release_agent_work);
0758 }
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783 void cgroup1_release_agent(struct work_struct *work)
0784 {
0785 struct cgroup *cgrp =
0786 container_of(work, struct cgroup, release_agent_work);
0787 char *pathbuf, *agentbuf;
0788 char *argv[3], *envp[3];
0789 int ret;
0790
0791
0792 if (!cgrp->root->release_agent_path[0])
0793 return;
0794
0795
0796 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
0797 agentbuf = kmalloc(PATH_MAX, GFP_KERNEL);
0798 if (!pathbuf || !agentbuf)
0799 goto out_free;
0800
0801 spin_lock(&release_agent_path_lock);
0802 strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
0803 spin_unlock(&release_agent_path_lock);
0804 if (!agentbuf[0])
0805 goto out_free;
0806
0807 ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
0808 if (ret < 0 || ret >= PATH_MAX)
0809 goto out_free;
0810
0811 argv[0] = agentbuf;
0812 argv[1] = pathbuf;
0813 argv[2] = NULL;
0814
0815
0816 envp[0] = "HOME=/";
0817 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
0818 envp[2] = NULL;
0819
0820 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
0821 out_free:
0822 kfree(agentbuf);
0823 kfree(pathbuf);
0824 }
0825
0826
0827
0828
0829 static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
0830 const char *new_name_str)
0831 {
0832 struct cgroup *cgrp = kn->priv;
0833 int ret;
0834
0835
0836 if (strchr(new_name_str, '\n'))
0837 return -EINVAL;
0838
0839 if (kernfs_type(kn) != KERNFS_DIR)
0840 return -ENOTDIR;
0841 if (kn->parent != new_parent)
0842 return -EIO;
0843
0844
0845
0846
0847
0848
0849 kernfs_break_active_protection(new_parent);
0850 kernfs_break_active_protection(kn);
0851
0852 mutex_lock(&cgroup_mutex);
0853
0854 ret = kernfs_rename(kn, new_parent, new_name_str);
0855 if (!ret)
0856 TRACE_CGROUP_PATH(rename, cgrp);
0857
0858 mutex_unlock(&cgroup_mutex);
0859
0860 kernfs_unbreak_active_protection(kn);
0861 kernfs_unbreak_active_protection(new_parent);
0862 return ret;
0863 }
0864
0865 static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
0866 {
0867 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
0868 struct cgroup_subsys *ss;
0869 int ssid;
0870
0871 for_each_subsys(ss, ssid)
0872 if (root->subsys_mask & (1 << ssid))
0873 seq_show_option(seq, ss->legacy_name, NULL);
0874 if (root->flags & CGRP_ROOT_NOPREFIX)
0875 seq_puts(seq, ",noprefix");
0876 if (root->flags & CGRP_ROOT_XATTR)
0877 seq_puts(seq, ",xattr");
0878 if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
0879 seq_puts(seq, ",cpuset_v2_mode");
0880 if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
0881 seq_puts(seq, ",favordynmods");
0882
0883 spin_lock(&release_agent_path_lock);
0884 if (strlen(root->release_agent_path))
0885 seq_show_option(seq, "release_agent",
0886 root->release_agent_path);
0887 spin_unlock(&release_agent_path_lock);
0888
0889 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
0890 seq_puts(seq, ",clone_children");
0891 if (strlen(root->name))
0892 seq_show_option(seq, "name", root->name);
0893 return 0;
0894 }
0895
0896 enum cgroup1_param {
0897 Opt_all,
0898 Opt_clone_children,
0899 Opt_cpuset_v2_mode,
0900 Opt_name,
0901 Opt_none,
0902 Opt_noprefix,
0903 Opt_release_agent,
0904 Opt_xattr,
0905 Opt_favordynmods,
0906 Opt_nofavordynmods,
0907 };
0908
0909 const struct fs_parameter_spec cgroup1_fs_parameters[] = {
0910 fsparam_flag ("all", Opt_all),
0911 fsparam_flag ("clone_children", Opt_clone_children),
0912 fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
0913 fsparam_string("name", Opt_name),
0914 fsparam_flag ("none", Opt_none),
0915 fsparam_flag ("noprefix", Opt_noprefix),
0916 fsparam_string("release_agent", Opt_release_agent),
0917 fsparam_flag ("xattr", Opt_xattr),
0918 fsparam_flag ("favordynmods", Opt_favordynmods),
0919 fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
0920 {}
0921 };
0922
0923 int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
0924 {
0925 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
0926 struct cgroup_subsys *ss;
0927 struct fs_parse_result result;
0928 int opt, i;
0929
0930 opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
0931 if (opt == -ENOPARAM) {
0932 int ret;
0933
0934 ret = vfs_parse_fs_param_source(fc, param);
0935 if (ret != -ENOPARAM)
0936 return ret;
0937 for_each_subsys(ss, i) {
0938 if (strcmp(param->key, ss->legacy_name))
0939 continue;
0940 if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i))
0941 return invalfc(fc, "Disabled controller '%s'",
0942 param->key);
0943 ctx->subsys_mask |= (1 << i);
0944 return 0;
0945 }
0946 return invalfc(fc, "Unknown subsys name '%s'", param->key);
0947 }
0948 if (opt < 0)
0949 return opt;
0950
0951 switch (opt) {
0952 case Opt_none:
0953
0954 ctx->none = true;
0955 break;
0956 case Opt_all:
0957 ctx->all_ss = true;
0958 break;
0959 case Opt_noprefix:
0960 ctx->flags |= CGRP_ROOT_NOPREFIX;
0961 break;
0962 case Opt_clone_children:
0963 ctx->cpuset_clone_children = true;
0964 break;
0965 case Opt_cpuset_v2_mode:
0966 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
0967 break;
0968 case Opt_xattr:
0969 ctx->flags |= CGRP_ROOT_XATTR;
0970 break;
0971 case Opt_favordynmods:
0972 ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
0973 break;
0974 case Opt_nofavordynmods:
0975 ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
0976 break;
0977 case Opt_release_agent:
0978
0979 if (ctx->release_agent)
0980 return invalfc(fc, "release_agent respecified");
0981
0982
0983
0984
0985 if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
0986 return invalfc(fc, "Setting release_agent not allowed");
0987 ctx->release_agent = param->string;
0988 param->string = NULL;
0989 break;
0990 case Opt_name:
0991
0992 if (cgroup_no_v1_named)
0993 return -ENOENT;
0994
0995 if (!param->size)
0996 return invalfc(fc, "Empty name");
0997 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1)
0998 return invalfc(fc, "Name too long");
0999
1000 for (i = 0; i < param->size; i++) {
1001 char c = param->string[i];
1002 if (isalnum(c))
1003 continue;
1004 if ((c == '.') || (c == '-') || (c == '_'))
1005 continue;
1006 return invalfc(fc, "Invalid name");
1007 }
1008
1009 if (ctx->name)
1010 return invalfc(fc, "name respecified");
1011 ctx->name = param->string;
1012 param->string = NULL;
1013 break;
1014 }
1015 return 0;
1016 }
1017
1018 static int check_cgroupfs_options(struct fs_context *fc)
1019 {
1020 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1021 u16 mask = U16_MAX;
1022 u16 enabled = 0;
1023 struct cgroup_subsys *ss;
1024 int i;
1025
1026 #ifdef CONFIG_CPUSETS
1027 mask = ~((u16)1 << cpuset_cgrp_id);
1028 #endif
1029 for_each_subsys(ss, i)
1030 if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
1031 enabled |= 1 << i;
1032
1033 ctx->subsys_mask &= enabled;
1034
1035
1036
1037
1038
1039 if (!ctx->subsys_mask && !ctx->none && !ctx->name)
1040 ctx->all_ss = true;
1041
1042 if (ctx->all_ss) {
1043
1044 if (ctx->subsys_mask)
1045 return invalfc(fc, "subsys name conflicts with all");
1046
1047 ctx->subsys_mask = enabled;
1048 }
1049
1050
1051
1052
1053
1054 if (!ctx->subsys_mask && !ctx->name)
1055 return invalfc(fc, "Need name or subsystem set");
1056
1057
1058
1059
1060
1061
1062 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
1063 return invalfc(fc, "noprefix used incorrectly");
1064
1065
1066 if (ctx->subsys_mask && ctx->none)
1067 return invalfc(fc, "none used incorrectly");
1068
1069 return 0;
1070 }
1071
1072 int cgroup1_reconfigure(struct fs_context *fc)
1073 {
1074 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1075 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb);
1076 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1077 int ret = 0;
1078 u16 added_mask, removed_mask;
1079
1080 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1081
1082
1083 ret = check_cgroupfs_options(fc);
1084 if (ret)
1085 goto out_unlock;
1086
1087 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent)
1088 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1089 task_tgid_nr(current), current->comm);
1090
1091 added_mask = ctx->subsys_mask & ~root->subsys_mask;
1092 removed_mask = root->subsys_mask & ~ctx->subsys_mask;
1093
1094
1095 if ((ctx->flags ^ root->flags) ||
1096 (ctx->name && strcmp(ctx->name, root->name))) {
1097 errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
1098 ctx->flags, ctx->name ?: "", root->flags, root->name);
1099 ret = -EINVAL;
1100 goto out_unlock;
1101 }
1102
1103
1104 if (!list_empty(&root->cgrp.self.children)) {
1105 ret = -EBUSY;
1106 goto out_unlock;
1107 }
1108
1109 ret = rebind_subsystems(root, added_mask);
1110 if (ret)
1111 goto out_unlock;
1112
1113 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1114
1115 if (ctx->release_agent) {
1116 spin_lock(&release_agent_path_lock);
1117 strcpy(root->release_agent_path, ctx->release_agent);
1118 spin_unlock(&release_agent_path_lock);
1119 }
1120
1121 trace_cgroup_remount(root);
1122
1123 out_unlock:
1124 mutex_unlock(&cgroup_mutex);
1125 return ret;
1126 }
1127
1128 struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1129 .rename = cgroup1_rename,
1130 .show_options = cgroup1_show_options,
1131 .mkdir = cgroup_mkdir,
1132 .rmdir = cgroup_rmdir,
1133 .show_path = cgroup_show_path,
1134 };
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144 static int cgroup1_root_to_use(struct fs_context *fc)
1145 {
1146 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1147 struct cgroup_root *root;
1148 struct cgroup_subsys *ss;
1149 int i, ret;
1150
1151
1152 ret = check_cgroupfs_options(fc);
1153 if (ret)
1154 return ret;
1155
1156
1157
1158
1159
1160
1161
1162
1163 for_each_subsys(ss, i) {
1164 if (!(ctx->subsys_mask & (1 << i)) ||
1165 ss->root == &cgrp_dfl_root)
1166 continue;
1167
1168 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt))
1169 return 1;
1170 cgroup_put(&ss->root->cgrp);
1171 }
1172
1173 for_each_root(root) {
1174 bool name_match = false;
1175
1176 if (root == &cgrp_dfl_root)
1177 continue;
1178
1179
1180
1181
1182
1183
1184 if (ctx->name) {
1185 if (strcmp(ctx->name, root->name))
1186 continue;
1187 name_match = true;
1188 }
1189
1190
1191
1192
1193
1194 if ((ctx->subsys_mask || ctx->none) &&
1195 (ctx->subsys_mask != root->subsys_mask)) {
1196 if (!name_match)
1197 continue;
1198 return -EBUSY;
1199 }
1200
1201 if (root->flags ^ ctx->flags)
1202 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1203
1204 ctx->root = root;
1205 return 0;
1206 }
1207
1208
1209
1210
1211
1212
1213 if (!ctx->subsys_mask && !ctx->none)
1214 return invalfc(fc, "No subsys list or none specified");
1215
1216
1217 if (ctx->ns != &init_cgroup_ns)
1218 return -EPERM;
1219
1220 root = kzalloc(sizeof(*root), GFP_KERNEL);
1221 if (!root)
1222 return -ENOMEM;
1223
1224 ctx->root = root;
1225 init_cgroup_root(ctx);
1226
1227 ret = cgroup_setup_root(root, ctx->subsys_mask);
1228 if (!ret)
1229 cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
1230 else
1231 cgroup_free_root(root);
1232
1233 return ret;
1234 }
1235
1236 int cgroup1_get_tree(struct fs_context *fc)
1237 {
1238 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1239 int ret;
1240
1241
1242 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN))
1243 return -EPERM;
1244
1245 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1246
1247 ret = cgroup1_root_to_use(fc);
1248 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
1249 ret = 1;
1250
1251 mutex_unlock(&cgroup_mutex);
1252
1253 if (!ret)
1254 ret = cgroup_do_get_tree(fc);
1255
1256 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
1257 fc_drop_locked(fc);
1258 ret = 1;
1259 }
1260
1261 if (unlikely(ret > 0)) {
1262 msleep(10);
1263 return restart_syscall();
1264 }
1265 return ret;
1266 }
1267
1268 static int __init cgroup1_wq_init(void)
1269 {
1270
1271
1272
1273
1274 cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
1275 0, 1);
1276 BUG_ON(!cgroup_pidlist_destroy_wq);
1277 return 0;
1278 }
1279 core_initcall(cgroup1_wq_init);
1280
1281 static int __init cgroup_no_v1(char *str)
1282 {
1283 struct cgroup_subsys *ss;
1284 char *token;
1285 int i;
1286
1287 while ((token = strsep(&str, ",")) != NULL) {
1288 if (!*token)
1289 continue;
1290
1291 if (!strcmp(token, "all")) {
1292 cgroup_no_v1_mask = U16_MAX;
1293 continue;
1294 }
1295
1296 if (!strcmp(token, "named")) {
1297 cgroup_no_v1_named = true;
1298 continue;
1299 }
1300
1301 for_each_subsys(ss, i) {
1302 if (strcmp(token, ss->name) &&
1303 strcmp(token, ss->legacy_name))
1304 continue;
1305
1306 cgroup_no_v1_mask |= 1 << i;
1307 }
1308 }
1309 return 1;
1310 }
1311 __setup("cgroup_no_v1=", cgroup_no_v1);