0001
0002
0003
0004 #include <linux/init.h>
0005 #include <linux/namei.h>
0006 #include <linux/pid_namespace.h>
0007 #include <linux/fs.h>
0008 #include <linux/fdtable.h>
0009 #include <linux/filter.h>
0010 #include <linux/btf_ids.h>
0011 #include "mmap_unlock_work.h"
0012
0013 struct bpf_iter_seq_task_common {
0014 struct pid_namespace *ns;
0015 };
0016
0017 struct bpf_iter_seq_task_info {
0018
0019
0020
0021 struct bpf_iter_seq_task_common common;
0022 u32 tid;
0023 };
0024
0025 static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
0026 u32 *tid,
0027 bool skip_if_dup_files)
0028 {
0029 struct task_struct *task = NULL;
0030 struct pid *pid;
0031
0032 rcu_read_lock();
0033 retry:
0034 pid = find_ge_pid(*tid, ns);
0035 if (pid) {
0036 *tid = pid_nr_ns(pid, ns);
0037 task = get_pid_task(pid, PIDTYPE_PID);
0038 if (!task) {
0039 ++*tid;
0040 goto retry;
0041 } else if (skip_if_dup_files && !thread_group_leader(task) &&
0042 task->files == task->group_leader->files) {
0043 put_task_struct(task);
0044 task = NULL;
0045 ++*tid;
0046 goto retry;
0047 }
0048 }
0049 rcu_read_unlock();
0050
0051 return task;
0052 }
0053
0054 static void *task_seq_start(struct seq_file *seq, loff_t *pos)
0055 {
0056 struct bpf_iter_seq_task_info *info = seq->private;
0057 struct task_struct *task;
0058
0059 task = task_seq_get_next(info->common.ns, &info->tid, false);
0060 if (!task)
0061 return NULL;
0062
0063 if (*pos == 0)
0064 ++*pos;
0065 return task;
0066 }
0067
0068 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0069 {
0070 struct bpf_iter_seq_task_info *info = seq->private;
0071 struct task_struct *task;
0072
0073 ++*pos;
0074 ++info->tid;
0075 put_task_struct((struct task_struct *)v);
0076 task = task_seq_get_next(info->common.ns, &info->tid, false);
0077 if (!task)
0078 return NULL;
0079
0080 return task;
0081 }
0082
0083 struct bpf_iter__task {
0084 __bpf_md_ptr(struct bpf_iter_meta *, meta);
0085 __bpf_md_ptr(struct task_struct *, task);
0086 };
0087
0088 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
0089
0090 static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
0091 bool in_stop)
0092 {
0093 struct bpf_iter_meta meta;
0094 struct bpf_iter__task ctx;
0095 struct bpf_prog *prog;
0096
0097 meta.seq = seq;
0098 prog = bpf_iter_get_info(&meta, in_stop);
0099 if (!prog)
0100 return 0;
0101
0102 ctx.meta = &meta;
0103 ctx.task = task;
0104 return bpf_iter_run_prog(prog, &ctx);
0105 }
0106
0107 static int task_seq_show(struct seq_file *seq, void *v)
0108 {
0109 return __task_seq_show(seq, v, false);
0110 }
0111
0112 static void task_seq_stop(struct seq_file *seq, void *v)
0113 {
0114 if (!v)
0115 (void)__task_seq_show(seq, v, true);
0116 else
0117 put_task_struct((struct task_struct *)v);
0118 }
0119
0120 static const struct seq_operations task_seq_ops = {
0121 .start = task_seq_start,
0122 .next = task_seq_next,
0123 .stop = task_seq_stop,
0124 .show = task_seq_show,
0125 };
0126
0127 struct bpf_iter_seq_task_file_info {
0128
0129
0130
0131 struct bpf_iter_seq_task_common common;
0132 struct task_struct *task;
0133 u32 tid;
0134 u32 fd;
0135 };
0136
0137 static struct file *
0138 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
0139 {
0140 struct pid_namespace *ns = info->common.ns;
0141 u32 curr_tid = info->tid;
0142 struct task_struct *curr_task;
0143 unsigned int curr_fd = info->fd;
0144
0145
0146
0147
0148
0149 again:
0150 if (info->task) {
0151 curr_task = info->task;
0152 curr_fd = info->fd;
0153 } else {
0154 curr_task = task_seq_get_next(ns, &curr_tid, true);
0155 if (!curr_task) {
0156 info->task = NULL;
0157 info->tid = curr_tid;
0158 return NULL;
0159 }
0160
0161
0162 info->task = curr_task;
0163 if (curr_tid == info->tid) {
0164 curr_fd = info->fd;
0165 } else {
0166 info->tid = curr_tid;
0167 curr_fd = 0;
0168 }
0169 }
0170
0171 rcu_read_lock();
0172 for (;; curr_fd++) {
0173 struct file *f;
0174 f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
0175 if (!f)
0176 break;
0177 if (!get_file_rcu(f))
0178 continue;
0179
0180
0181 info->fd = curr_fd;
0182 rcu_read_unlock();
0183 return f;
0184 }
0185
0186
0187 rcu_read_unlock();
0188 put_task_struct(curr_task);
0189 info->task = NULL;
0190 info->fd = 0;
0191 curr_tid = ++(info->tid);
0192 goto again;
0193 }
0194
0195 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
0196 {
0197 struct bpf_iter_seq_task_file_info *info = seq->private;
0198 struct file *file;
0199
0200 info->task = NULL;
0201 file = task_file_seq_get_next(info);
0202 if (file && *pos == 0)
0203 ++*pos;
0204
0205 return file;
0206 }
0207
0208 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0209 {
0210 struct bpf_iter_seq_task_file_info *info = seq->private;
0211
0212 ++*pos;
0213 ++info->fd;
0214 fput((struct file *)v);
0215 return task_file_seq_get_next(info);
0216 }
0217
0218 struct bpf_iter__task_file {
0219 __bpf_md_ptr(struct bpf_iter_meta *, meta);
0220 __bpf_md_ptr(struct task_struct *, task);
0221 u32 fd __aligned(8);
0222 __bpf_md_ptr(struct file *, file);
0223 };
0224
0225 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
0226 struct task_struct *task, u32 fd,
0227 struct file *file)
0228
0229 static int __task_file_seq_show(struct seq_file *seq, struct file *file,
0230 bool in_stop)
0231 {
0232 struct bpf_iter_seq_task_file_info *info = seq->private;
0233 struct bpf_iter__task_file ctx;
0234 struct bpf_iter_meta meta;
0235 struct bpf_prog *prog;
0236
0237 meta.seq = seq;
0238 prog = bpf_iter_get_info(&meta, in_stop);
0239 if (!prog)
0240 return 0;
0241
0242 ctx.meta = &meta;
0243 ctx.task = info->task;
0244 ctx.fd = info->fd;
0245 ctx.file = file;
0246 return bpf_iter_run_prog(prog, &ctx);
0247 }
0248
0249 static int task_file_seq_show(struct seq_file *seq, void *v)
0250 {
0251 return __task_file_seq_show(seq, v, false);
0252 }
0253
0254 static void task_file_seq_stop(struct seq_file *seq, void *v)
0255 {
0256 struct bpf_iter_seq_task_file_info *info = seq->private;
0257
0258 if (!v) {
0259 (void)__task_file_seq_show(seq, v, true);
0260 } else {
0261 fput((struct file *)v);
0262 put_task_struct(info->task);
0263 info->task = NULL;
0264 }
0265 }
0266
0267 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
0268 {
0269 struct bpf_iter_seq_task_common *common = priv_data;
0270
0271 common->ns = get_pid_ns(task_active_pid_ns(current));
0272 return 0;
0273 }
0274
0275 static void fini_seq_pidns(void *priv_data)
0276 {
0277 struct bpf_iter_seq_task_common *common = priv_data;
0278
0279 put_pid_ns(common->ns);
0280 }
0281
0282 static const struct seq_operations task_file_seq_ops = {
0283 .start = task_file_seq_start,
0284 .next = task_file_seq_next,
0285 .stop = task_file_seq_stop,
0286 .show = task_file_seq_show,
0287 };
0288
0289 struct bpf_iter_seq_task_vma_info {
0290
0291
0292
0293 struct bpf_iter_seq_task_common common;
0294 struct task_struct *task;
0295 struct vm_area_struct *vma;
0296 u32 tid;
0297 unsigned long prev_vm_start;
0298 unsigned long prev_vm_end;
0299 };
0300
0301 enum bpf_task_vma_iter_find_op {
0302 task_vma_iter_first_vma,
0303 task_vma_iter_next_vma,
0304 task_vma_iter_find_vma,
0305 };
0306
0307 static struct vm_area_struct *
0308 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
0309 {
0310 struct pid_namespace *ns = info->common.ns;
0311 enum bpf_task_vma_iter_find_op op;
0312 struct vm_area_struct *curr_vma;
0313 struct task_struct *curr_task;
0314 u32 curr_tid = info->tid;
0315
0316
0317
0318
0319
0320
0321 if (info->task) {
0322 curr_task = info->task;
0323 curr_vma = info->vma;
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362 if (mmap_lock_is_contended(curr_task->mm)) {
0363 info->prev_vm_start = curr_vma->vm_start;
0364 info->prev_vm_end = curr_vma->vm_end;
0365 op = task_vma_iter_find_vma;
0366 mmap_read_unlock(curr_task->mm);
0367 if (mmap_read_lock_killable(curr_task->mm))
0368 goto finish;
0369 } else {
0370 op = task_vma_iter_next_vma;
0371 }
0372 } else {
0373 again:
0374 curr_task = task_seq_get_next(ns, &curr_tid, true);
0375 if (!curr_task) {
0376 info->tid = curr_tid + 1;
0377 goto finish;
0378 }
0379
0380 if (curr_tid != info->tid) {
0381 info->tid = curr_tid;
0382
0383 op = task_vma_iter_first_vma;
0384 } else {
0385
0386
0387
0388
0389
0390
0391 op = task_vma_iter_find_vma;
0392 }
0393
0394 if (!curr_task->mm)
0395 goto next_task;
0396
0397 if (mmap_read_lock_killable(curr_task->mm))
0398 goto finish;
0399 }
0400
0401 switch (op) {
0402 case task_vma_iter_first_vma:
0403 curr_vma = curr_task->mm->mmap;
0404 break;
0405 case task_vma_iter_next_vma:
0406 curr_vma = curr_vma->vm_next;
0407 break;
0408 case task_vma_iter_find_vma:
0409
0410
0411
0412
0413 curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
0414
0415
0416
0417 if (curr_vma &&
0418 curr_vma->vm_start == info->prev_vm_start &&
0419 curr_vma->vm_end == info->prev_vm_end)
0420 curr_vma = curr_vma->vm_next;
0421 break;
0422 }
0423 if (!curr_vma) {
0424
0425 mmap_read_unlock(curr_task->mm);
0426 goto next_task;
0427 }
0428 info->task = curr_task;
0429 info->vma = curr_vma;
0430 return curr_vma;
0431
0432 next_task:
0433 put_task_struct(curr_task);
0434 info->task = NULL;
0435 curr_tid++;
0436 goto again;
0437
0438 finish:
0439 if (curr_task)
0440 put_task_struct(curr_task);
0441 info->task = NULL;
0442 info->vma = NULL;
0443 return NULL;
0444 }
0445
0446 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
0447 {
0448 struct bpf_iter_seq_task_vma_info *info = seq->private;
0449 struct vm_area_struct *vma;
0450
0451 vma = task_vma_seq_get_next(info);
0452 if (vma && *pos == 0)
0453 ++*pos;
0454
0455 return vma;
0456 }
0457
0458 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0459 {
0460 struct bpf_iter_seq_task_vma_info *info = seq->private;
0461
0462 ++*pos;
0463 return task_vma_seq_get_next(info);
0464 }
0465
0466 struct bpf_iter__task_vma {
0467 __bpf_md_ptr(struct bpf_iter_meta *, meta);
0468 __bpf_md_ptr(struct task_struct *, task);
0469 __bpf_md_ptr(struct vm_area_struct *, vma);
0470 };
0471
0472 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
0473 struct task_struct *task, struct vm_area_struct *vma)
0474
0475 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
0476 {
0477 struct bpf_iter_seq_task_vma_info *info = seq->private;
0478 struct bpf_iter__task_vma ctx;
0479 struct bpf_iter_meta meta;
0480 struct bpf_prog *prog;
0481
0482 meta.seq = seq;
0483 prog = bpf_iter_get_info(&meta, in_stop);
0484 if (!prog)
0485 return 0;
0486
0487 ctx.meta = &meta;
0488 ctx.task = info->task;
0489 ctx.vma = info->vma;
0490 return bpf_iter_run_prog(prog, &ctx);
0491 }
0492
0493 static int task_vma_seq_show(struct seq_file *seq, void *v)
0494 {
0495 return __task_vma_seq_show(seq, false);
0496 }
0497
0498 static void task_vma_seq_stop(struct seq_file *seq, void *v)
0499 {
0500 struct bpf_iter_seq_task_vma_info *info = seq->private;
0501
0502 if (!v) {
0503 (void)__task_vma_seq_show(seq, true);
0504 } else {
0505
0506
0507
0508
0509
0510
0511
0512 info->prev_vm_start = ~0UL;
0513 info->prev_vm_end = info->vma->vm_end;
0514 mmap_read_unlock(info->task->mm);
0515 put_task_struct(info->task);
0516 info->task = NULL;
0517 }
0518 }
0519
0520 static const struct seq_operations task_vma_seq_ops = {
0521 .start = task_vma_seq_start,
0522 .next = task_vma_seq_next,
0523 .stop = task_vma_seq_stop,
0524 .show = task_vma_seq_show,
0525 };
0526
0527 static const struct bpf_iter_seq_info task_seq_info = {
0528 .seq_ops = &task_seq_ops,
0529 .init_seq_private = init_seq_pidns,
0530 .fini_seq_private = fini_seq_pidns,
0531 .seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
0532 };
0533
0534 static struct bpf_iter_reg task_reg_info = {
0535 .target = "task",
0536 .feature = BPF_ITER_RESCHED,
0537 .ctx_arg_info_size = 1,
0538 .ctx_arg_info = {
0539 { offsetof(struct bpf_iter__task, task),
0540 PTR_TO_BTF_ID_OR_NULL },
0541 },
0542 .seq_info = &task_seq_info,
0543 };
0544
0545 static const struct bpf_iter_seq_info task_file_seq_info = {
0546 .seq_ops = &task_file_seq_ops,
0547 .init_seq_private = init_seq_pidns,
0548 .fini_seq_private = fini_seq_pidns,
0549 .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
0550 };
0551
0552 static struct bpf_iter_reg task_file_reg_info = {
0553 .target = "task_file",
0554 .feature = BPF_ITER_RESCHED,
0555 .ctx_arg_info_size = 2,
0556 .ctx_arg_info = {
0557 { offsetof(struct bpf_iter__task_file, task),
0558 PTR_TO_BTF_ID_OR_NULL },
0559 { offsetof(struct bpf_iter__task_file, file),
0560 PTR_TO_BTF_ID_OR_NULL },
0561 },
0562 .seq_info = &task_file_seq_info,
0563 };
0564
0565 static const struct bpf_iter_seq_info task_vma_seq_info = {
0566 .seq_ops = &task_vma_seq_ops,
0567 .init_seq_private = init_seq_pidns,
0568 .fini_seq_private = fini_seq_pidns,
0569 .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info),
0570 };
0571
0572 static struct bpf_iter_reg task_vma_reg_info = {
0573 .target = "task_vma",
0574 .feature = BPF_ITER_RESCHED,
0575 .ctx_arg_info_size = 2,
0576 .ctx_arg_info = {
0577 { offsetof(struct bpf_iter__task_vma, task),
0578 PTR_TO_BTF_ID_OR_NULL },
0579 { offsetof(struct bpf_iter__task_vma, vma),
0580 PTR_TO_BTF_ID_OR_NULL },
0581 },
0582 .seq_info = &task_vma_seq_info,
0583 };
0584
0585 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
0586 bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
0587 {
0588 struct mmap_unlock_irq_work *work = NULL;
0589 struct vm_area_struct *vma;
0590 bool irq_work_busy = false;
0591 struct mm_struct *mm;
0592 int ret = -ENOENT;
0593
0594 if (flags)
0595 return -EINVAL;
0596
0597 if (!task)
0598 return -ENOENT;
0599
0600 mm = task->mm;
0601 if (!mm)
0602 return -ENOENT;
0603
0604 irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
0605
0606 if (irq_work_busy || !mmap_read_trylock(mm))
0607 return -EBUSY;
0608
0609 vma = find_vma(mm, start);
0610
0611 if (vma && vma->vm_start <= start && vma->vm_end > start) {
0612 callback_fn((u64)(long)task, (u64)(long)vma,
0613 (u64)(long)callback_ctx, 0, 0);
0614 ret = 0;
0615 }
0616 bpf_mmap_unlock_mm(work, mm);
0617 return ret;
0618 }
0619
0620 const struct bpf_func_proto bpf_find_vma_proto = {
0621 .func = bpf_find_vma,
0622 .ret_type = RET_INTEGER,
0623 .arg1_type = ARG_PTR_TO_BTF_ID,
0624 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
0625 .arg2_type = ARG_ANYTHING,
0626 .arg3_type = ARG_PTR_TO_FUNC,
0627 .arg4_type = ARG_PTR_TO_STACK_OR_NULL,
0628 .arg5_type = ARG_ANYTHING,
0629 };
0630
0631 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
0632
0633 static void do_mmap_read_unlock(struct irq_work *entry)
0634 {
0635 struct mmap_unlock_irq_work *work;
0636
0637 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
0638 return;
0639
0640 work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
0641 mmap_read_unlock_non_owner(work->mm);
0642 }
0643
0644 static int __init task_iter_init(void)
0645 {
0646 struct mmap_unlock_irq_work *work;
0647 int ret, cpu;
0648
0649 for_each_possible_cpu(cpu) {
0650 work = per_cpu_ptr(&mmap_unlock_work, cpu);
0651 init_irq_work(&work->irq_work, do_mmap_read_unlock);
0652 }
0653
0654 task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
0655 ret = bpf_iter_reg_target(&task_reg_info);
0656 if (ret)
0657 return ret;
0658
0659 task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
0660 task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
0661 ret = bpf_iter_reg_target(&task_file_reg_info);
0662 if (ret)
0663 return ret;
0664
0665 task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
0666 task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
0667 return bpf_iter_reg_target(&task_vma_reg_info);
0668 }
0669 late_initcall(task_iter_init);