0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/kernel.h>
0010 #include <linux/taskstats_kern.h>
0011 #include <linux/tsacct_kern.h>
0012 #include <linux/acct.h>
0013 #include <linux/delayacct.h>
0014 #include <linux/cpumask.h>
0015 #include <linux/percpu.h>
0016 #include <linux/slab.h>
0017 #include <linux/cgroupstats.h>
0018 #include <linux/cgroup.h>
0019 #include <linux/fs.h>
0020 #include <linux/file.h>
0021 #include <linux/pid_namespace.h>
0022 #include <net/genetlink.h>
0023 #include <linux/atomic.h>
0024 #include <linux/sched/cputime.h>
0025
0026
0027
0028
0029
0030 #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
0031
0032 static DEFINE_PER_CPU(__u32, taskstats_seqnum);
0033 static int family_registered;
0034 struct kmem_cache *taskstats_cache;
0035
0036 static struct genl_family family;
0037
0038 static const struct nla_policy taskstats_cmd_get_policy[] = {
0039 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
0040 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
0041 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
0042 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
0043
0044 static const struct nla_policy cgroupstats_cmd_get_policy[] = {
0045 [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
0046 };
0047
0048 struct listener {
0049 struct list_head list;
0050 pid_t pid;
0051 char valid;
0052 };
0053
0054 struct listener_list {
0055 struct rw_semaphore sem;
0056 struct list_head list;
0057 };
0058 static DEFINE_PER_CPU(struct listener_list, listener_array);
0059
0060 enum actions {
0061 REGISTER,
0062 DEREGISTER,
0063 CPU_DONT_CARE
0064 };
0065
0066 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
0067 size_t size)
0068 {
0069 struct sk_buff *skb;
0070 void *reply;
0071
0072
0073
0074
0075 skb = genlmsg_new(size, GFP_KERNEL);
0076 if (!skb)
0077 return -ENOMEM;
0078
0079 if (!info) {
0080 int seq = this_cpu_inc_return(taskstats_seqnum) - 1;
0081
0082 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
0083 } else
0084 reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
0085 if (reply == NULL) {
0086 nlmsg_free(skb);
0087 return -EINVAL;
0088 }
0089
0090 *skbp = skb;
0091 return 0;
0092 }
0093
0094
0095
0096
0097 static int send_reply(struct sk_buff *skb, struct genl_info *info)
0098 {
0099 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
0100 void *reply = genlmsg_data(genlhdr);
0101
0102 genlmsg_end(skb, reply);
0103
0104 return genlmsg_reply(skb, info);
0105 }
0106
0107
0108
0109
0110 static void send_cpu_listeners(struct sk_buff *skb,
0111 struct listener_list *listeners)
0112 {
0113 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
0114 struct listener *s, *tmp;
0115 struct sk_buff *skb_next, *skb_cur = skb;
0116 void *reply = genlmsg_data(genlhdr);
0117 int delcount = 0;
0118
0119 genlmsg_end(skb, reply);
0120
0121 down_read(&listeners->sem);
0122 list_for_each_entry(s, &listeners->list, list) {
0123 int rc;
0124
0125 skb_next = NULL;
0126 if (!list_is_last(&s->list, &listeners->list)) {
0127 skb_next = skb_clone(skb_cur, GFP_KERNEL);
0128 if (!skb_next)
0129 break;
0130 }
0131 rc = genlmsg_unicast(&init_net, skb_cur, s->pid);
0132 if (rc == -ECONNREFUSED) {
0133 s->valid = 0;
0134 delcount++;
0135 }
0136 skb_cur = skb_next;
0137 }
0138 up_read(&listeners->sem);
0139
0140 if (skb_cur)
0141 nlmsg_free(skb_cur);
0142
0143 if (!delcount)
0144 return;
0145
0146
0147 down_write(&listeners->sem);
0148 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
0149 if (!s->valid) {
0150 list_del(&s->list);
0151 kfree(s);
0152 }
0153 }
0154 up_write(&listeners->sem);
0155 }
0156
0157 static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk)
0158 {
0159
0160 struct file *exe_file = get_task_exe_file(tsk);
0161
0162 if (exe_file) {
0163
0164 stats->ac_exe_dev =
0165 huge_encode_dev(exe_file->f_inode->i_sb->s_dev);
0166 stats->ac_exe_inode = exe_file->f_inode->i_ino;
0167 fput(exe_file);
0168 } else {
0169 stats->ac_exe_dev = 0;
0170 stats->ac_exe_inode = 0;
0171 }
0172 }
0173
0174 static void fill_stats(struct user_namespace *user_ns,
0175 struct pid_namespace *pid_ns,
0176 struct task_struct *tsk, struct taskstats *stats)
0177 {
0178 memset(stats, 0, sizeof(*stats));
0179
0180
0181
0182
0183
0184
0185
0186 delayacct_add_tsk(stats, tsk);
0187
0188
0189 stats->version = TASKSTATS_VERSION;
0190 stats->nvcsw = tsk->nvcsw;
0191 stats->nivcsw = tsk->nivcsw;
0192 bacct_add_tsk(user_ns, pid_ns, stats, tsk);
0193
0194
0195 xacct_add_tsk(stats, tsk);
0196
0197
0198 exe_add_tsk(stats, tsk);
0199 }
0200
0201 static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
0202 {
0203 struct task_struct *tsk;
0204
0205 tsk = find_get_task_by_vpid(pid);
0206 if (!tsk)
0207 return -ESRCH;
0208 fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats);
0209 put_task_struct(tsk);
0210 return 0;
0211 }
0212
0213 static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
0214 {
0215 struct task_struct *tsk, *first;
0216 unsigned long flags;
0217 int rc = -ESRCH;
0218 u64 delta, utime, stime;
0219 u64 start_time;
0220
0221
0222
0223
0224
0225 rcu_read_lock();
0226 first = find_task_by_vpid(tgid);
0227
0228 if (!first || !lock_task_sighand(first, &flags))
0229 goto out;
0230
0231 if (first->signal->stats)
0232 memcpy(stats, first->signal->stats, sizeof(*stats));
0233 else
0234 memset(stats, 0, sizeof(*stats));
0235
0236 tsk = first;
0237 start_time = ktime_get_ns();
0238 do {
0239 if (tsk->exit_state)
0240 continue;
0241
0242
0243
0244
0245
0246
0247 delayacct_add_tsk(stats, tsk);
0248
0249
0250 delta = start_time - tsk->start_time;
0251
0252 do_div(delta, NSEC_PER_USEC);
0253 stats->ac_etime += delta;
0254
0255 task_cputime(tsk, &utime, &stime);
0256 stats->ac_utime += div_u64(utime, NSEC_PER_USEC);
0257 stats->ac_stime += div_u64(stime, NSEC_PER_USEC);
0258
0259 stats->nvcsw += tsk->nvcsw;
0260 stats->nivcsw += tsk->nivcsw;
0261 } while_each_thread(first, tsk);
0262
0263 unlock_task_sighand(first, &flags);
0264 rc = 0;
0265 out:
0266 rcu_read_unlock();
0267
0268 stats->version = TASKSTATS_VERSION;
0269
0270
0271
0272
0273 return rc;
0274 }
0275
0276 static void fill_tgid_exit(struct task_struct *tsk)
0277 {
0278 unsigned long flags;
0279
0280 spin_lock_irqsave(&tsk->sighand->siglock, flags);
0281 if (!tsk->signal->stats)
0282 goto ret;
0283
0284
0285
0286
0287
0288
0289
0290 delayacct_add_tsk(tsk->signal->stats, tsk);
0291 ret:
0292 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
0293 return;
0294 }
0295
0296 static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
0297 {
0298 struct listener_list *listeners;
0299 struct listener *s, *tmp, *s2;
0300 unsigned int cpu;
0301 int ret = 0;
0302
0303 if (!cpumask_subset(mask, cpu_possible_mask))
0304 return -EINVAL;
0305
0306 if (current_user_ns() != &init_user_ns)
0307 return -EINVAL;
0308
0309 if (task_active_pid_ns(current) != &init_pid_ns)
0310 return -EINVAL;
0311
0312 if (isadd == REGISTER) {
0313 for_each_cpu(cpu, mask) {
0314 s = kmalloc_node(sizeof(struct listener),
0315 GFP_KERNEL, cpu_to_node(cpu));
0316 if (!s) {
0317 ret = -ENOMEM;
0318 goto cleanup;
0319 }
0320 s->pid = pid;
0321 s->valid = 1;
0322
0323 listeners = &per_cpu(listener_array, cpu);
0324 down_write(&listeners->sem);
0325 list_for_each_entry(s2, &listeners->list, list) {
0326 if (s2->pid == pid && s2->valid)
0327 goto exists;
0328 }
0329 list_add(&s->list, &listeners->list);
0330 s = NULL;
0331 exists:
0332 up_write(&listeners->sem);
0333 kfree(s);
0334 }
0335 return 0;
0336 }
0337
0338
0339 cleanup:
0340 for_each_cpu(cpu, mask) {
0341 listeners = &per_cpu(listener_array, cpu);
0342 down_write(&listeners->sem);
0343 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
0344 if (s->pid == pid) {
0345 list_del(&s->list);
0346 kfree(s);
0347 break;
0348 }
0349 }
0350 up_write(&listeners->sem);
0351 }
0352 return ret;
0353 }
0354
0355 static int parse(struct nlattr *na, struct cpumask *mask)
0356 {
0357 char *data;
0358 int len;
0359 int ret;
0360
0361 if (na == NULL)
0362 return 1;
0363 len = nla_len(na);
0364 if (len > TASKSTATS_CPUMASK_MAXLEN)
0365 return -E2BIG;
0366 if (len < 1)
0367 return -EINVAL;
0368 data = kmalloc(len, GFP_KERNEL);
0369 if (!data)
0370 return -ENOMEM;
0371 nla_strscpy(data, na, len);
0372 ret = cpulist_parse(data, mask);
0373 kfree(data);
0374 return ret;
0375 }
0376
0377 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
0378 {
0379 struct nlattr *na, *ret;
0380 int aggr;
0381
0382 aggr = (type == TASKSTATS_TYPE_PID)
0383 ? TASKSTATS_TYPE_AGGR_PID
0384 : TASKSTATS_TYPE_AGGR_TGID;
0385
0386 na = nla_nest_start_noflag(skb, aggr);
0387 if (!na)
0388 goto err;
0389
0390 if (nla_put(skb, type, sizeof(pid), &pid) < 0) {
0391 nla_nest_cancel(skb, na);
0392 goto err;
0393 }
0394 ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS,
0395 sizeof(struct taskstats), TASKSTATS_TYPE_NULL);
0396 if (!ret) {
0397 nla_nest_cancel(skb, na);
0398 goto err;
0399 }
0400 nla_nest_end(skb, na);
0401
0402 return nla_data(ret);
0403 err:
0404 return NULL;
0405 }
0406
0407 static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
0408 {
0409 int rc = 0;
0410 struct sk_buff *rep_skb;
0411 struct cgroupstats *stats;
0412 struct nlattr *na;
0413 size_t size;
0414 u32 fd;
0415 struct fd f;
0416
0417 na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
0418 if (!na)
0419 return -EINVAL;
0420
0421 fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
0422 f = fdget(fd);
0423 if (!f.file)
0424 return 0;
0425
0426 size = nla_total_size(sizeof(struct cgroupstats));
0427
0428 rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
0429 size);
0430 if (rc < 0)
0431 goto err;
0432
0433 na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
0434 sizeof(struct cgroupstats));
0435 if (na == NULL) {
0436 nlmsg_free(rep_skb);
0437 rc = -EMSGSIZE;
0438 goto err;
0439 }
0440
0441 stats = nla_data(na);
0442 memset(stats, 0, sizeof(*stats));
0443
0444 rc = cgroupstats_build(stats, f.file->f_path.dentry);
0445 if (rc < 0) {
0446 nlmsg_free(rep_skb);
0447 goto err;
0448 }
0449
0450 rc = send_reply(rep_skb, info);
0451
0452 err:
0453 fdput(f);
0454 return rc;
0455 }
0456
0457 static int cmd_attr_register_cpumask(struct genl_info *info)
0458 {
0459 cpumask_var_t mask;
0460 int rc;
0461
0462 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
0463 return -ENOMEM;
0464 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
0465 if (rc < 0)
0466 goto out;
0467 rc = add_del_listener(info->snd_portid, mask, REGISTER);
0468 out:
0469 free_cpumask_var(mask);
0470 return rc;
0471 }
0472
0473 static int cmd_attr_deregister_cpumask(struct genl_info *info)
0474 {
0475 cpumask_var_t mask;
0476 int rc;
0477
0478 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
0479 return -ENOMEM;
0480 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
0481 if (rc < 0)
0482 goto out;
0483 rc = add_del_listener(info->snd_portid, mask, DEREGISTER);
0484 out:
0485 free_cpumask_var(mask);
0486 return rc;
0487 }
0488
0489 static size_t taskstats_packet_size(void)
0490 {
0491 size_t size;
0492
0493 size = nla_total_size(sizeof(u32)) +
0494 nla_total_size_64bit(sizeof(struct taskstats)) +
0495 nla_total_size(0);
0496
0497 return size;
0498 }
0499
0500 static int cmd_attr_pid(struct genl_info *info)
0501 {
0502 struct taskstats *stats;
0503 struct sk_buff *rep_skb;
0504 size_t size;
0505 u32 pid;
0506 int rc;
0507
0508 size = taskstats_packet_size();
0509
0510 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
0511 if (rc < 0)
0512 return rc;
0513
0514 rc = -EINVAL;
0515 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
0516 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
0517 if (!stats)
0518 goto err;
0519
0520 rc = fill_stats_for_pid(pid, stats);
0521 if (rc < 0)
0522 goto err;
0523 return send_reply(rep_skb, info);
0524 err:
0525 nlmsg_free(rep_skb);
0526 return rc;
0527 }
0528
0529 static int cmd_attr_tgid(struct genl_info *info)
0530 {
0531 struct taskstats *stats;
0532 struct sk_buff *rep_skb;
0533 size_t size;
0534 u32 tgid;
0535 int rc;
0536
0537 size = taskstats_packet_size();
0538
0539 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
0540 if (rc < 0)
0541 return rc;
0542
0543 rc = -EINVAL;
0544 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
0545 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
0546 if (!stats)
0547 goto err;
0548
0549 rc = fill_stats_for_tgid(tgid, stats);
0550 if (rc < 0)
0551 goto err;
0552 return send_reply(rep_skb, info);
0553 err:
0554 nlmsg_free(rep_skb);
0555 return rc;
0556 }
0557
0558 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
0559 {
0560 if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK])
0561 return cmd_attr_register_cpumask(info);
0562 else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK])
0563 return cmd_attr_deregister_cpumask(info);
0564 else if (info->attrs[TASKSTATS_CMD_ATTR_PID])
0565 return cmd_attr_pid(info);
0566 else if (info->attrs[TASKSTATS_CMD_ATTR_TGID])
0567 return cmd_attr_tgid(info);
0568 else
0569 return -EINVAL;
0570 }
0571
0572 static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
0573 {
0574 struct signal_struct *sig = tsk->signal;
0575 struct taskstats *stats_new, *stats;
0576
0577
0578 stats = smp_load_acquire(&sig->stats);
0579 if (stats || thread_group_empty(tsk))
0580 return stats;
0581
0582
0583 stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
0584
0585 spin_lock_irq(&tsk->sighand->siglock);
0586 stats = sig->stats;
0587 if (!stats) {
0588
0589
0590
0591
0592 smp_store_release(&sig->stats, stats_new);
0593 stats = stats_new;
0594 stats_new = NULL;
0595 }
0596 spin_unlock_irq(&tsk->sighand->siglock);
0597
0598 if (stats_new)
0599 kmem_cache_free(taskstats_cache, stats_new);
0600
0601 return stats;
0602 }
0603
0604
0605 void taskstats_exit(struct task_struct *tsk, int group_dead)
0606 {
0607 int rc;
0608 struct listener_list *listeners;
0609 struct taskstats *stats;
0610 struct sk_buff *rep_skb;
0611 size_t size;
0612 int is_thread_group;
0613
0614 if (!family_registered)
0615 return;
0616
0617
0618
0619
0620 size = taskstats_packet_size();
0621
0622 is_thread_group = !!taskstats_tgid_alloc(tsk);
0623 if (is_thread_group) {
0624
0625 size = 2 * size;
0626
0627 fill_tgid_exit(tsk);
0628 }
0629
0630 listeners = raw_cpu_ptr(&listener_array);
0631 if (list_empty(&listeners->list))
0632 return;
0633
0634 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size);
0635 if (rc < 0)
0636 return;
0637
0638 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID,
0639 task_pid_nr_ns(tsk, &init_pid_ns));
0640 if (!stats)
0641 goto err;
0642
0643 fill_stats(&init_user_ns, &init_pid_ns, tsk, stats);
0644 if (group_dead)
0645 stats->ac_flag |= AGROUP;
0646
0647
0648
0649
0650 if (!is_thread_group || !group_dead)
0651 goto send;
0652
0653 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID,
0654 task_tgid_nr_ns(tsk, &init_pid_ns));
0655 if (!stats)
0656 goto err;
0657
0658 memcpy(stats, tsk->signal->stats, sizeof(*stats));
0659
0660 send:
0661 send_cpu_listeners(rep_skb, listeners);
0662 return;
0663 err:
0664 nlmsg_free(rep_skb);
0665 }
0666
0667 static const struct genl_ops taskstats_ops[] = {
0668 {
0669 .cmd = TASKSTATS_CMD_GET,
0670 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0671 .doit = taskstats_user_cmd,
0672 .policy = taskstats_cmd_get_policy,
0673 .maxattr = ARRAY_SIZE(taskstats_cmd_get_policy) - 1,
0674 .flags = GENL_ADMIN_PERM,
0675 },
0676 {
0677 .cmd = CGROUPSTATS_CMD_GET,
0678 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0679 .doit = cgroupstats_user_cmd,
0680 .policy = cgroupstats_cmd_get_policy,
0681 .maxattr = ARRAY_SIZE(cgroupstats_cmd_get_policy) - 1,
0682 },
0683 };
0684
0685 static struct genl_family family __ro_after_init = {
0686 .name = TASKSTATS_GENL_NAME,
0687 .version = TASKSTATS_GENL_VERSION,
0688 .module = THIS_MODULE,
0689 .ops = taskstats_ops,
0690 .n_ops = ARRAY_SIZE(taskstats_ops),
0691 .netnsok = true,
0692 };
0693
0694
0695 void __init taskstats_init_early(void)
0696 {
0697 unsigned int i;
0698
0699 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
0700 for_each_possible_cpu(i) {
0701 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
0702 init_rwsem(&(per_cpu(listener_array, i).sem));
0703 }
0704 }
0705
0706 static int __init taskstats_init(void)
0707 {
0708 int rc;
0709
0710 rc = genl_register_family(&family);
0711 if (rc)
0712 return rc;
0713
0714 family_registered = 1;
0715 pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
0716 return 0;
0717 }
0718
0719
0720
0721
0722
0723 late_initcall(taskstats_init);