0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #define pr_fmt(fmt) "seccomp: " fmt
0017
0018 #include <linux/refcount.h>
0019 #include <linux/audit.h>
0020 #include <linux/compat.h>
0021 #include <linux/coredump.h>
0022 #include <linux/kmemleak.h>
0023 #include <linux/nospec.h>
0024 #include <linux/prctl.h>
0025 #include <linux/sched.h>
0026 #include <linux/sched/task_stack.h>
0027 #include <linux/seccomp.h>
0028 #include <linux/slab.h>
0029 #include <linux/syscalls.h>
0030 #include <linux/sysctl.h>
0031
0032
0033 #define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
0034
0035 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
0036 #include <asm/syscall.h>
0037 #endif
0038
0039 #ifdef CONFIG_SECCOMP_FILTER
0040 #include <linux/file.h>
0041 #include <linux/filter.h>
0042 #include <linux/pid.h>
0043 #include <linux/ptrace.h>
0044 #include <linux/capability.h>
0045 #include <linux/uaccess.h>
0046 #include <linux/anon_inodes.h>
0047 #include <linux/lockdep.h>
0048
0049
0050
0051
0052
0053
0054
0055 #define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
0056
0057 enum notify_state {
0058 SECCOMP_NOTIFY_INIT,
0059 SECCOMP_NOTIFY_SENT,
0060 SECCOMP_NOTIFY_REPLIED,
0061 };
0062
0063 struct seccomp_knotif {
0064
0065 struct task_struct *task;
0066
0067
0068 u64 id;
0069
0070
0071
0072
0073
0074
0075 const struct seccomp_data *data;
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 enum notify_state state;
0086
0087
0088 int error;
0089 long val;
0090 u32 flags;
0091
0092
0093
0094
0095
0096 struct completion ready;
0097
0098 struct list_head list;
0099
0100
0101 struct list_head addfd;
0102 };
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120 struct seccomp_kaddfd {
0121 struct file *file;
0122 int fd;
0123 unsigned int flags;
0124 __u32 ioctl_flags;
0125
0126 union {
0127 bool setfd;
0128
0129 int ret;
0130 };
0131 struct completion completion;
0132 struct list_head list;
0133 };
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 struct notification {
0148 struct semaphore request;
0149 u64 next_id;
0150 struct list_head notifications;
0151 };
0152
0153 #ifdef SECCOMP_ARCH_NATIVE
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 struct action_cache {
0166 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
0167 #ifdef SECCOMP_ARCH_COMPAT
0168 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
0169 #endif
0170 };
0171 #else
0172 struct action_cache { };
0173
0174 static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
0175 const struct seccomp_data *sd)
0176 {
0177 return false;
0178 }
0179
0180 static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
0181 {
0182 }
0183 #endif
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221 struct seccomp_filter {
0222 refcount_t refs;
0223 refcount_t users;
0224 bool log;
0225 bool wait_killable_recv;
0226 struct action_cache cache;
0227 struct seccomp_filter *prev;
0228 struct bpf_prog *prog;
0229 struct notification *notif;
0230 struct mutex notify_lock;
0231 wait_queue_head_t wqh;
0232 };
0233
0234
0235 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
0236
0237
0238
0239
0240
0241 static void populate_seccomp_data(struct seccomp_data *sd)
0242 {
0243
0244
0245
0246
0247 struct task_struct *task = current;
0248 struct pt_regs *regs = task_pt_regs(task);
0249 unsigned long args[6];
0250
0251 sd->nr = syscall_get_nr(task, regs);
0252 sd->arch = syscall_get_arch(task);
0253 syscall_get_arguments(task, regs, args);
0254 sd->args[0] = args[0];
0255 sd->args[1] = args[1];
0256 sd->args[2] = args[2];
0257 sd->args[3] = args[3];
0258 sd->args[4] = args[4];
0259 sd->args[5] = args[5];
0260 sd->instruction_pointer = KSTK_EIP(task);
0261 }
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275 static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
0276 {
0277 int pc;
0278 for (pc = 0; pc < flen; pc++) {
0279 struct sock_filter *ftest = &filter[pc];
0280 u16 code = ftest->code;
0281 u32 k = ftest->k;
0282
0283 switch (code) {
0284 case BPF_LD | BPF_W | BPF_ABS:
0285 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
0286
0287 if (k >= sizeof(struct seccomp_data) || k & 3)
0288 return -EINVAL;
0289 continue;
0290 case BPF_LD | BPF_W | BPF_LEN:
0291 ftest->code = BPF_LD | BPF_IMM;
0292 ftest->k = sizeof(struct seccomp_data);
0293 continue;
0294 case BPF_LDX | BPF_W | BPF_LEN:
0295 ftest->code = BPF_LDX | BPF_IMM;
0296 ftest->k = sizeof(struct seccomp_data);
0297 continue;
0298
0299 case BPF_RET | BPF_K:
0300 case BPF_RET | BPF_A:
0301 case BPF_ALU | BPF_ADD | BPF_K:
0302 case BPF_ALU | BPF_ADD | BPF_X:
0303 case BPF_ALU | BPF_SUB | BPF_K:
0304 case BPF_ALU | BPF_SUB | BPF_X:
0305 case BPF_ALU | BPF_MUL | BPF_K:
0306 case BPF_ALU | BPF_MUL | BPF_X:
0307 case BPF_ALU | BPF_DIV | BPF_K:
0308 case BPF_ALU | BPF_DIV | BPF_X:
0309 case BPF_ALU | BPF_AND | BPF_K:
0310 case BPF_ALU | BPF_AND | BPF_X:
0311 case BPF_ALU | BPF_OR | BPF_K:
0312 case BPF_ALU | BPF_OR | BPF_X:
0313 case BPF_ALU | BPF_XOR | BPF_K:
0314 case BPF_ALU | BPF_XOR | BPF_X:
0315 case BPF_ALU | BPF_LSH | BPF_K:
0316 case BPF_ALU | BPF_LSH | BPF_X:
0317 case BPF_ALU | BPF_RSH | BPF_K:
0318 case BPF_ALU | BPF_RSH | BPF_X:
0319 case BPF_ALU | BPF_NEG:
0320 case BPF_LD | BPF_IMM:
0321 case BPF_LDX | BPF_IMM:
0322 case BPF_MISC | BPF_TAX:
0323 case BPF_MISC | BPF_TXA:
0324 case BPF_LD | BPF_MEM:
0325 case BPF_LDX | BPF_MEM:
0326 case BPF_ST:
0327 case BPF_STX:
0328 case BPF_JMP | BPF_JA:
0329 case BPF_JMP | BPF_JEQ | BPF_K:
0330 case BPF_JMP | BPF_JEQ | BPF_X:
0331 case BPF_JMP | BPF_JGE | BPF_K:
0332 case BPF_JMP | BPF_JGE | BPF_X:
0333 case BPF_JMP | BPF_JGT | BPF_K:
0334 case BPF_JMP | BPF_JGT | BPF_X:
0335 case BPF_JMP | BPF_JSET | BPF_K:
0336 case BPF_JMP | BPF_JSET | BPF_X:
0337 continue;
0338 default:
0339 return -EINVAL;
0340 }
0341 }
0342 return 0;
0343 }
0344
0345 #ifdef SECCOMP_ARCH_NATIVE
0346 static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
0347 size_t bitmap_size,
0348 int syscall_nr)
0349 {
0350 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
0351 return false;
0352 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
0353
0354 return test_bit(syscall_nr, bitmap);
0355 }
0356
0357
0358
0359
0360
0361
0362
0363
0364 static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
0365 const struct seccomp_data *sd)
0366 {
0367 int syscall_nr = sd->nr;
0368 const struct action_cache *cache = &sfilter->cache;
0369
0370 #ifndef SECCOMP_ARCH_COMPAT
0371
0372 return seccomp_cache_check_allow_bitmap(cache->allow_native,
0373 SECCOMP_ARCH_NATIVE_NR,
0374 syscall_nr);
0375 #else
0376 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
0377 return seccomp_cache_check_allow_bitmap(cache->allow_native,
0378 SECCOMP_ARCH_NATIVE_NR,
0379 syscall_nr);
0380 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
0381 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
0382 SECCOMP_ARCH_COMPAT_NR,
0383 syscall_nr);
0384 #endif
0385
0386 WARN_ON_ONCE(true);
0387 return false;
0388 }
0389 #endif
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400 #define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
0401 static u32 seccomp_run_filters(const struct seccomp_data *sd,
0402 struct seccomp_filter **match)
0403 {
0404 u32 ret = SECCOMP_RET_ALLOW;
0405
0406 struct seccomp_filter *f =
0407 READ_ONCE(current->seccomp.filter);
0408
0409
0410 if (WARN_ON(f == NULL))
0411 return SECCOMP_RET_KILL_PROCESS;
0412
0413 if (seccomp_cache_check_allow(f, sd))
0414 return SECCOMP_RET_ALLOW;
0415
0416
0417
0418
0419
0420 for (; f; f = f->prev) {
0421 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
0422
0423 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
0424 ret = cur_ret;
0425 *match = f;
0426 }
0427 }
0428 return ret;
0429 }
0430 #endif
0431
0432 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
0433 {
0434 assert_spin_locked(¤t->sighand->siglock);
0435
0436 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
0437 return false;
0438
0439 return true;
0440 }
0441
0442 void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
0443
0444 static inline void seccomp_assign_mode(struct task_struct *task,
0445 unsigned long seccomp_mode,
0446 unsigned long flags)
0447 {
0448 assert_spin_locked(&task->sighand->siglock);
0449
0450 task->seccomp.mode = seccomp_mode;
0451
0452
0453
0454
0455 smp_mb__before_atomic();
0456
0457 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
0458 arch_seccomp_spec_mitigate(task);
0459 set_task_syscall_work(task, SECCOMP);
0460 }
0461
0462 #ifdef CONFIG_SECCOMP_FILTER
0463
0464 static int is_ancestor(struct seccomp_filter *parent,
0465 struct seccomp_filter *child)
0466 {
0467
0468 if (parent == NULL)
0469 return 1;
0470 for (; child; child = child->prev)
0471 if (child == parent)
0472 return 1;
0473 return 0;
0474 }
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485 static inline pid_t seccomp_can_sync_threads(void)
0486 {
0487 struct task_struct *thread, *caller;
0488
0489 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
0490 assert_spin_locked(¤t->sighand->siglock);
0491
0492
0493 caller = current;
0494 for_each_thread(caller, thread) {
0495 pid_t failed;
0496
0497
0498 if (thread == caller)
0499 continue;
0500
0501 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
0502 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
0503 is_ancestor(thread->seccomp.filter,
0504 caller->seccomp.filter)))
0505 continue;
0506
0507
0508 failed = task_pid_vnr(thread);
0509
0510 if (WARN_ON(failed == 0))
0511 failed = -ESRCH;
0512 return failed;
0513 }
0514
0515 return 0;
0516 }
0517
0518 static inline void seccomp_filter_free(struct seccomp_filter *filter)
0519 {
0520 if (filter) {
0521 bpf_prog_destroy(filter->prog);
0522 kfree(filter);
0523 }
0524 }
0525
0526 static void __seccomp_filter_orphan(struct seccomp_filter *orig)
0527 {
0528 while (orig && refcount_dec_and_test(&orig->users)) {
0529 if (waitqueue_active(&orig->wqh))
0530 wake_up_poll(&orig->wqh, EPOLLHUP);
0531 orig = orig->prev;
0532 }
0533 }
0534
0535 static void __put_seccomp_filter(struct seccomp_filter *orig)
0536 {
0537
0538 while (orig && refcount_dec_and_test(&orig->refs)) {
0539 struct seccomp_filter *freeme = orig;
0540 orig = orig->prev;
0541 seccomp_filter_free(freeme);
0542 }
0543 }
0544
0545 static void __seccomp_filter_release(struct seccomp_filter *orig)
0546 {
0547
0548 __seccomp_filter_orphan(orig);
0549
0550 __put_seccomp_filter(orig);
0551 }
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562 void seccomp_filter_release(struct task_struct *tsk)
0563 {
0564 struct seccomp_filter *orig = tsk->seccomp.filter;
0565
0566
0567 WARN_ON(tsk->sighand != NULL);
0568
0569
0570 tsk->seccomp.filter = NULL;
0571 __seccomp_filter_release(orig);
0572 }
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582 static inline void seccomp_sync_threads(unsigned long flags)
0583 {
0584 struct task_struct *thread, *caller;
0585
0586 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
0587 assert_spin_locked(¤t->sighand->siglock);
0588
0589
0590 caller = current;
0591 for_each_thread(caller, thread) {
0592
0593 if (thread == caller)
0594 continue;
0595
0596
0597 get_seccomp_filter(caller);
0598
0599
0600
0601
0602
0603
0604 __seccomp_filter_release(thread->seccomp.filter);
0605
0606
0607 smp_store_release(&thread->seccomp.filter,
0608 caller->seccomp.filter);
0609 atomic_set(&thread->seccomp.filter_count,
0610 atomic_read(&caller->seccomp.filter_count));
0611
0612
0613
0614
0615
0616
0617
0618 if (task_no_new_privs(caller))
0619 task_set_no_new_privs(thread);
0620
0621
0622
0623
0624
0625
0626
0627 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
0628 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
0629 flags);
0630 }
0631 }
0632
0633
0634
0635
0636
0637
0638
0639 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
0640 {
0641 struct seccomp_filter *sfilter;
0642 int ret;
0643 const bool save_orig =
0644 #if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
0645 true;
0646 #else
0647 false;
0648 #endif
0649
0650 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
0651 return ERR_PTR(-EINVAL);
0652
0653 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
0654
0655
0656
0657
0658
0659
0660
0661 if (!task_no_new_privs(current) &&
0662 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
0663 return ERR_PTR(-EACCES);
0664
0665
0666 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
0667 if (!sfilter)
0668 return ERR_PTR(-ENOMEM);
0669
0670 mutex_init(&sfilter->notify_lock);
0671 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
0672 seccomp_check_filter, save_orig);
0673 if (ret < 0) {
0674 kfree(sfilter);
0675 return ERR_PTR(ret);
0676 }
0677
0678 refcount_set(&sfilter->refs, 1);
0679 refcount_set(&sfilter->users, 1);
0680 init_waitqueue_head(&sfilter->wqh);
0681
0682 return sfilter;
0683 }
0684
0685
0686
0687
0688
0689
0690
0691 static struct seccomp_filter *
0692 seccomp_prepare_user_filter(const char __user *user_filter)
0693 {
0694 struct sock_fprog fprog;
0695 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
0696
0697 #ifdef CONFIG_COMPAT
0698 if (in_compat_syscall()) {
0699 struct compat_sock_fprog fprog32;
0700 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
0701 goto out;
0702 fprog.len = fprog32.len;
0703 fprog.filter = compat_ptr(fprog32.filter);
0704 } else
0705 #endif
0706 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
0707 goto out;
0708 filter = seccomp_prepare_filter(&fprog);
0709 out:
0710 return filter;
0711 }
0712
0713 #ifdef SECCOMP_ARCH_NATIVE
0714
0715
0716
0717
0718
0719
0720 static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
0721 struct seccomp_data *sd)
0722 {
0723 unsigned int reg_value = 0;
0724 unsigned int pc;
0725 bool op_res;
0726
0727 if (WARN_ON_ONCE(!fprog))
0728 return false;
0729
0730 for (pc = 0; pc < fprog->len; pc++) {
0731 struct sock_filter *insn = &fprog->filter[pc];
0732 u16 code = insn->code;
0733 u32 k = insn->k;
0734
0735 switch (code) {
0736 case BPF_LD | BPF_W | BPF_ABS:
0737 switch (k) {
0738 case offsetof(struct seccomp_data, nr):
0739 reg_value = sd->nr;
0740 break;
0741 case offsetof(struct seccomp_data, arch):
0742 reg_value = sd->arch;
0743 break;
0744 default:
0745
0746 return false;
0747 }
0748 break;
0749 case BPF_RET | BPF_K:
0750
0751 return k == SECCOMP_RET_ALLOW;
0752 case BPF_JMP | BPF_JA:
0753 pc += insn->k;
0754 break;
0755 case BPF_JMP | BPF_JEQ | BPF_K:
0756 case BPF_JMP | BPF_JGE | BPF_K:
0757 case BPF_JMP | BPF_JGT | BPF_K:
0758 case BPF_JMP | BPF_JSET | BPF_K:
0759 switch (BPF_OP(code)) {
0760 case BPF_JEQ:
0761 op_res = reg_value == k;
0762 break;
0763 case BPF_JGE:
0764 op_res = reg_value >= k;
0765 break;
0766 case BPF_JGT:
0767 op_res = reg_value > k;
0768 break;
0769 case BPF_JSET:
0770 op_res = !!(reg_value & k);
0771 break;
0772 default:
0773
0774 return false;
0775 }
0776
0777 pc += op_res ? insn->jt : insn->jf;
0778 break;
0779 case BPF_ALU | BPF_AND | BPF_K:
0780 reg_value &= k;
0781 break;
0782 default:
0783
0784 return false;
0785 }
0786 }
0787
0788
0789 WARN_ON(1);
0790 return false;
0791 }
0792
0793 static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
0794 void *bitmap, const void *bitmap_prev,
0795 size_t bitmap_size, int arch)
0796 {
0797 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
0798 struct seccomp_data sd;
0799 int nr;
0800
0801 if (bitmap_prev) {
0802
0803 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
0804 } else {
0805
0806 bitmap_fill(bitmap, bitmap_size);
0807 }
0808
0809 for (nr = 0; nr < bitmap_size; nr++) {
0810
0811 if (!test_bit(nr, bitmap))
0812 continue;
0813
0814 sd.nr = nr;
0815 sd.arch = arch;
0816
0817
0818 if (seccomp_is_const_allow(fprog, &sd))
0819 continue;
0820
0821
0822
0823
0824
0825 __clear_bit(nr, bitmap);
0826 }
0827 }
0828
0829
0830
0831
0832
0833
0834
0835 static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
0836 {
0837 struct action_cache *cache = &sfilter->cache;
0838 const struct action_cache *cache_prev =
0839 sfilter->prev ? &sfilter->prev->cache : NULL;
0840
0841 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
0842 cache_prev ? cache_prev->allow_native : NULL,
0843 SECCOMP_ARCH_NATIVE_NR,
0844 SECCOMP_ARCH_NATIVE);
0845
0846 #ifdef SECCOMP_ARCH_COMPAT
0847 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
0848 cache_prev ? cache_prev->allow_compat : NULL,
0849 SECCOMP_ARCH_COMPAT_NR,
0850 SECCOMP_ARCH_COMPAT);
0851 #endif
0852 }
0853 #endif
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866
0867 static long seccomp_attach_filter(unsigned int flags,
0868 struct seccomp_filter *filter)
0869 {
0870 unsigned long total_insns;
0871 struct seccomp_filter *walker;
0872
0873 assert_spin_locked(¤t->sighand->siglock);
0874
0875
0876 total_insns = filter->prog->len;
0877 for (walker = current->seccomp.filter; walker; walker = walker->prev)
0878 total_insns += walker->prog->len + 4;
0879 if (total_insns > MAX_INSNS_PER_PATH)
0880 return -ENOMEM;
0881
0882
0883 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
0884 int ret;
0885
0886 ret = seccomp_can_sync_threads();
0887 if (ret) {
0888 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
0889 return -ESRCH;
0890 else
0891 return ret;
0892 }
0893 }
0894
0895
0896 if (flags & SECCOMP_FILTER_FLAG_LOG)
0897 filter->log = true;
0898
0899
0900 if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
0901 filter->wait_killable_recv = true;
0902
0903
0904
0905
0906
0907 filter->prev = current->seccomp.filter;
0908 seccomp_cache_prepare(filter);
0909 current->seccomp.filter = filter;
0910 atomic_inc(¤t->seccomp.filter_count);
0911
0912
0913 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
0914 seccomp_sync_threads(flags);
0915
0916 return 0;
0917 }
0918
0919 static void __get_seccomp_filter(struct seccomp_filter *filter)
0920 {
0921 refcount_inc(&filter->refs);
0922 }
0923
0924
0925 void get_seccomp_filter(struct task_struct *tsk)
0926 {
0927 struct seccomp_filter *orig = tsk->seccomp.filter;
0928 if (!orig)
0929 return;
0930 __get_seccomp_filter(orig);
0931 refcount_inc(&orig->users);
0932 }
0933
0934 #endif
0935
0936
0937 #define SECCOMP_LOG_KILL_PROCESS (1 << 0)
0938 #define SECCOMP_LOG_KILL_THREAD (1 << 1)
0939 #define SECCOMP_LOG_TRAP (1 << 2)
0940 #define SECCOMP_LOG_ERRNO (1 << 3)
0941 #define SECCOMP_LOG_TRACE (1 << 4)
0942 #define SECCOMP_LOG_LOG (1 << 5)
0943 #define SECCOMP_LOG_ALLOW (1 << 6)
0944 #define SECCOMP_LOG_USER_NOTIF (1 << 7)
0945
0946 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
0947 SECCOMP_LOG_KILL_THREAD |
0948 SECCOMP_LOG_TRAP |
0949 SECCOMP_LOG_ERRNO |
0950 SECCOMP_LOG_USER_NOTIF |
0951 SECCOMP_LOG_TRACE |
0952 SECCOMP_LOG_LOG;
0953
0954 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
0955 bool requested)
0956 {
0957 bool log = false;
0958
0959 switch (action) {
0960 case SECCOMP_RET_ALLOW:
0961 break;
0962 case SECCOMP_RET_TRAP:
0963 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
0964 break;
0965 case SECCOMP_RET_ERRNO:
0966 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
0967 break;
0968 case SECCOMP_RET_TRACE:
0969 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
0970 break;
0971 case SECCOMP_RET_USER_NOTIF:
0972 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
0973 break;
0974 case SECCOMP_RET_LOG:
0975 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
0976 break;
0977 case SECCOMP_RET_KILL_THREAD:
0978 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
0979 break;
0980 case SECCOMP_RET_KILL_PROCESS:
0981 default:
0982 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
0983 }
0984
0985
0986
0987
0988
0989
0990
0991 if (!log)
0992 return;
0993
0994 audit_seccomp(syscall, signr, action);
0995 }
0996
0997
0998
0999
1000
1001
1002 static const int mode1_syscalls[] = {
1003 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
1004 -1,
1005 };
1006
1007 static void __secure_computing_strict(int this_syscall)
1008 {
1009 const int *allowed_syscalls = mode1_syscalls;
1010 #ifdef CONFIG_COMPAT
1011 if (in_compat_syscall())
1012 allowed_syscalls = get_compat_mode1_syscalls();
1013 #endif
1014 do {
1015 if (*allowed_syscalls == this_syscall)
1016 return;
1017 } while (*++allowed_syscalls != -1);
1018
1019 #ifdef SECCOMP_DEBUG
1020 dump_stack();
1021 #endif
1022 current->seccomp.mode = SECCOMP_MODE_DEAD;
1023 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1024 do_exit(SIGKILL);
1025 }
1026
1027 #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1028 void secure_computing_strict(int this_syscall)
1029 {
1030 int mode = current->seccomp.mode;
1031
1032 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1033 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1034 return;
1035
1036 if (mode == SECCOMP_MODE_DISABLED)
1037 return;
1038 else if (mode == SECCOMP_MODE_STRICT)
1039 __secure_computing_strict(this_syscall);
1040 else
1041 BUG();
1042 }
1043 #else
1044
1045 #ifdef CONFIG_SECCOMP_FILTER
1046 static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1047 {
1048
1049
1050
1051
1052 lockdep_assert_held(&filter->notify_lock);
1053 return filter->notif->next_id++;
1054 }
1055
1056 static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1057 {
1058 int fd;
1059
1060
1061
1062
1063
1064 list_del_init(&addfd->list);
1065 if (!addfd->setfd)
1066 fd = receive_fd(addfd->file, addfd->flags);
1067 else
1068 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1069 addfd->ret = fd;
1070
1071 if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1072
1073 if (fd < 0) {
1074 n->state = SECCOMP_NOTIFY_SENT;
1075 } else {
1076
1077 n->flags = 0;
1078 n->error = 0;
1079 n->val = fd;
1080 }
1081 }
1082
1083
1084
1085
1086
1087 complete(&addfd->completion);
1088 }
1089
1090 static bool should_sleep_killable(struct seccomp_filter *match,
1091 struct seccomp_knotif *n)
1092 {
1093 return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT;
1094 }
1095
1096 static int seccomp_do_user_notification(int this_syscall,
1097 struct seccomp_filter *match,
1098 const struct seccomp_data *sd)
1099 {
1100 int err;
1101 u32 flags = 0;
1102 long ret = 0;
1103 struct seccomp_knotif n = {};
1104 struct seccomp_kaddfd *addfd, *tmp;
1105
1106 mutex_lock(&match->notify_lock);
1107 err = -ENOSYS;
1108 if (!match->notif)
1109 goto out;
1110
1111 n.task = current;
1112 n.state = SECCOMP_NOTIFY_INIT;
1113 n.data = sd;
1114 n.id = seccomp_next_notify_id(match);
1115 init_completion(&n.ready);
1116 list_add_tail(&n.list, &match->notif->notifications);
1117 INIT_LIST_HEAD(&n.addfd);
1118
1119 up(&match->notif->request);
1120 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1121
1122
1123
1124
1125 do {
1126 bool wait_killable = should_sleep_killable(match, &n);
1127
1128 mutex_unlock(&match->notify_lock);
1129 if (wait_killable)
1130 err = wait_for_completion_killable(&n.ready);
1131 else
1132 err = wait_for_completion_interruptible(&n.ready);
1133 mutex_lock(&match->notify_lock);
1134
1135 if (err != 0) {
1136
1137
1138
1139
1140 if (!wait_killable && should_sleep_killable(match, &n))
1141 continue;
1142
1143 goto interrupted;
1144 }
1145
1146 addfd = list_first_entry_or_null(&n.addfd,
1147 struct seccomp_kaddfd, list);
1148
1149 if (addfd)
1150 seccomp_handle_addfd(addfd, &n);
1151
1152 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1153
1154 ret = n.val;
1155 err = n.error;
1156 flags = n.flags;
1157
1158 interrupted:
1159
1160 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1161
1162 addfd->ret = -ESRCH;
1163 list_del_init(&addfd->list);
1164 complete(&addfd->completion);
1165 }
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177 if (match->notif)
1178 list_del(&n.list);
1179 out:
1180 mutex_unlock(&match->notify_lock);
1181
1182
1183 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1184 return 0;
1185
1186 syscall_set_return_value(current, current_pt_regs(),
1187 err, ret);
1188 return -1;
1189 }
1190
1191 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1192 const bool recheck_after_trace)
1193 {
1194 u32 filter_ret, action;
1195 struct seccomp_filter *match = NULL;
1196 int data;
1197 struct seccomp_data sd_local;
1198
1199
1200
1201
1202
1203 smp_rmb();
1204
1205 if (!sd) {
1206 populate_seccomp_data(&sd_local);
1207 sd = &sd_local;
1208 }
1209
1210 filter_ret = seccomp_run_filters(sd, &match);
1211 data = filter_ret & SECCOMP_RET_DATA;
1212 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1213
1214 switch (action) {
1215 case SECCOMP_RET_ERRNO:
1216
1217 if (data > MAX_ERRNO)
1218 data = MAX_ERRNO;
1219 syscall_set_return_value(current, current_pt_regs(),
1220 -data, 0);
1221 goto skip;
1222
1223 case SECCOMP_RET_TRAP:
1224
1225 syscall_rollback(current, current_pt_regs());
1226
1227 force_sig_seccomp(this_syscall, data, false);
1228 goto skip;
1229
1230 case SECCOMP_RET_TRACE:
1231
1232 if (recheck_after_trace)
1233 return 0;
1234
1235
1236 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1237 syscall_set_return_value(current,
1238 current_pt_regs(),
1239 -ENOSYS, 0);
1240 goto skip;
1241 }
1242
1243
1244 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 if (fatal_signal_pending(current))
1256 goto skip;
1257
1258 this_syscall = syscall_get_nr(current, current_pt_regs());
1259 if (this_syscall < 0)
1260 goto skip;
1261
1262
1263
1264
1265
1266
1267
1268 if (__seccomp_filter(this_syscall, NULL, true))
1269 return -1;
1270
1271 return 0;
1272
1273 case SECCOMP_RET_USER_NOTIF:
1274 if (seccomp_do_user_notification(this_syscall, match, sd))
1275 goto skip;
1276
1277 return 0;
1278
1279 case SECCOMP_RET_LOG:
1280 seccomp_log(this_syscall, 0, action, true);
1281 return 0;
1282
1283 case SECCOMP_RET_ALLOW:
1284
1285
1286
1287
1288
1289 return 0;
1290
1291 case SECCOMP_RET_KILL_THREAD:
1292 case SECCOMP_RET_KILL_PROCESS:
1293 default:
1294 current->seccomp.mode = SECCOMP_MODE_DEAD;
1295 seccomp_log(this_syscall, SIGSYS, action, true);
1296
1297 if (action != SECCOMP_RET_KILL_THREAD ||
1298 (atomic_read(¤t->signal->live) == 1)) {
1299
1300 syscall_rollback(current, current_pt_regs());
1301
1302 force_sig_seccomp(this_syscall, data, true);
1303 } else {
1304 do_exit(SIGSYS);
1305 }
1306 return -1;
1307 }
1308
1309 unreachable();
1310
1311 skip:
1312 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1313 return -1;
1314 }
1315 #else
1316 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1317 const bool recheck_after_trace)
1318 {
1319 BUG();
1320
1321 return -1;
1322 }
1323 #endif
1324
1325 int __secure_computing(const struct seccomp_data *sd)
1326 {
1327 int mode = current->seccomp.mode;
1328 int this_syscall;
1329
1330 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1331 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1332 return 0;
1333
1334 this_syscall = sd ? sd->nr :
1335 syscall_get_nr(current, current_pt_regs());
1336
1337 switch (mode) {
1338 case SECCOMP_MODE_STRICT:
1339 __secure_computing_strict(this_syscall);
1340 return 0;
1341 case SECCOMP_MODE_FILTER:
1342 return __seccomp_filter(this_syscall, sd, false);
1343
1344 case SECCOMP_MODE_DEAD:
1345 WARN_ON_ONCE(1);
1346 do_exit(SIGKILL);
1347 return -1;
1348 default:
1349 BUG();
1350 }
1351 }
1352 #endif
1353
1354 long prctl_get_seccomp(void)
1355 {
1356 return current->seccomp.mode;
1357 }
1358
1359
1360
1361
1362
1363
1364
1365
1366 static long seccomp_set_mode_strict(void)
1367 {
1368 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1369 long ret = -EINVAL;
1370
1371 spin_lock_irq(¤t->sighand->siglock);
1372
1373 if (!seccomp_may_assign_mode(seccomp_mode))
1374 goto out;
1375
1376 #ifdef TIF_NOTSC
1377 disable_TSC();
1378 #endif
1379 seccomp_assign_mode(current, seccomp_mode, 0);
1380 ret = 0;
1381
1382 out:
1383 spin_unlock_irq(¤t->sighand->siglock);
1384
1385 return ret;
1386 }
1387
1388 #ifdef CONFIG_SECCOMP_FILTER
1389 static void seccomp_notify_free(struct seccomp_filter *filter)
1390 {
1391 kfree(filter->notif);
1392 filter->notif = NULL;
1393 }
1394
1395 static void seccomp_notify_detach(struct seccomp_filter *filter)
1396 {
1397 struct seccomp_knotif *knotif;
1398
1399 if (!filter)
1400 return;
1401
1402 mutex_lock(&filter->notify_lock);
1403
1404
1405
1406
1407
1408 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1409 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1410 continue;
1411
1412 knotif->state = SECCOMP_NOTIFY_REPLIED;
1413 knotif->error = -ENOSYS;
1414 knotif->val = 0;
1415
1416
1417
1418
1419
1420
1421 complete(&knotif->ready);
1422 }
1423
1424 seccomp_notify_free(filter);
1425 mutex_unlock(&filter->notify_lock);
1426 }
1427
1428 static int seccomp_notify_release(struct inode *inode, struct file *file)
1429 {
1430 struct seccomp_filter *filter = file->private_data;
1431
1432 seccomp_notify_detach(filter);
1433 __put_seccomp_filter(filter);
1434 return 0;
1435 }
1436
1437
1438 static inline struct seccomp_knotif *
1439 find_notification(struct seccomp_filter *filter, u64 id)
1440 {
1441 struct seccomp_knotif *cur;
1442
1443 lockdep_assert_held(&filter->notify_lock);
1444
1445 list_for_each_entry(cur, &filter->notif->notifications, list) {
1446 if (cur->id == id)
1447 return cur;
1448 }
1449
1450 return NULL;
1451 }
1452
1453
1454 static long seccomp_notify_recv(struct seccomp_filter *filter,
1455 void __user *buf)
1456 {
1457 struct seccomp_knotif *knotif = NULL, *cur;
1458 struct seccomp_notif unotif;
1459 ssize_t ret;
1460
1461
1462 ret = check_zeroed_user(buf, sizeof(unotif));
1463 if (ret < 0)
1464 return ret;
1465 if (!ret)
1466 return -EINVAL;
1467
1468 memset(&unotif, 0, sizeof(unotif));
1469
1470 ret = down_interruptible(&filter->notif->request);
1471 if (ret < 0)
1472 return ret;
1473
1474 mutex_lock(&filter->notify_lock);
1475 list_for_each_entry(cur, &filter->notif->notifications, list) {
1476 if (cur->state == SECCOMP_NOTIFY_INIT) {
1477 knotif = cur;
1478 break;
1479 }
1480 }
1481
1482
1483
1484
1485
1486
1487 if (!knotif) {
1488 ret = -ENOENT;
1489 goto out;
1490 }
1491
1492 unotif.id = knotif->id;
1493 unotif.pid = task_pid_vnr(knotif->task);
1494 unotif.data = *(knotif->data);
1495
1496 knotif->state = SECCOMP_NOTIFY_SENT;
1497 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1498 ret = 0;
1499 out:
1500 mutex_unlock(&filter->notify_lock);
1501
1502 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1503 ret = -EFAULT;
1504
1505
1506
1507
1508
1509
1510
1511 mutex_lock(&filter->notify_lock);
1512 knotif = find_notification(filter, unotif.id);
1513 if (knotif) {
1514
1515 if (should_sleep_killable(filter, knotif))
1516 complete(&knotif->ready);
1517 knotif->state = SECCOMP_NOTIFY_INIT;
1518 up(&filter->notif->request);
1519 }
1520 mutex_unlock(&filter->notify_lock);
1521 }
1522
1523 return ret;
1524 }
1525
1526 static long seccomp_notify_send(struct seccomp_filter *filter,
1527 void __user *buf)
1528 {
1529 struct seccomp_notif_resp resp = {};
1530 struct seccomp_knotif *knotif;
1531 long ret;
1532
1533 if (copy_from_user(&resp, buf, sizeof(resp)))
1534 return -EFAULT;
1535
1536 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1537 return -EINVAL;
1538
1539 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1540 (resp.error || resp.val))
1541 return -EINVAL;
1542
1543 ret = mutex_lock_interruptible(&filter->notify_lock);
1544 if (ret < 0)
1545 return ret;
1546
1547 knotif = find_notification(filter, resp.id);
1548 if (!knotif) {
1549 ret = -ENOENT;
1550 goto out;
1551 }
1552
1553
1554 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1555 ret = -EINPROGRESS;
1556 goto out;
1557 }
1558
1559 ret = 0;
1560 knotif->state = SECCOMP_NOTIFY_REPLIED;
1561 knotif->error = resp.error;
1562 knotif->val = resp.val;
1563 knotif->flags = resp.flags;
1564 complete(&knotif->ready);
1565 out:
1566 mutex_unlock(&filter->notify_lock);
1567 return ret;
1568 }
1569
1570 static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1571 void __user *buf)
1572 {
1573 struct seccomp_knotif *knotif;
1574 u64 id;
1575 long ret;
1576
1577 if (copy_from_user(&id, buf, sizeof(id)))
1578 return -EFAULT;
1579
1580 ret = mutex_lock_interruptible(&filter->notify_lock);
1581 if (ret < 0)
1582 return ret;
1583
1584 knotif = find_notification(filter, id);
1585 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1586 ret = 0;
1587 else
1588 ret = -ENOENT;
1589
1590 mutex_unlock(&filter->notify_lock);
1591 return ret;
1592 }
1593
1594 static long seccomp_notify_addfd(struct seccomp_filter *filter,
1595 struct seccomp_notif_addfd __user *uaddfd,
1596 unsigned int size)
1597 {
1598 struct seccomp_notif_addfd addfd;
1599 struct seccomp_knotif *knotif;
1600 struct seccomp_kaddfd kaddfd;
1601 int ret;
1602
1603 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1604 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1605
1606 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1607 return -EINVAL;
1608
1609 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1610 if (ret)
1611 return ret;
1612
1613 if (addfd.newfd_flags & ~O_CLOEXEC)
1614 return -EINVAL;
1615
1616 if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1617 return -EINVAL;
1618
1619 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1620 return -EINVAL;
1621
1622 kaddfd.file = fget(addfd.srcfd);
1623 if (!kaddfd.file)
1624 return -EBADF;
1625
1626 kaddfd.ioctl_flags = addfd.flags;
1627 kaddfd.flags = addfd.newfd_flags;
1628 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1629 kaddfd.fd = addfd.newfd;
1630 init_completion(&kaddfd.completion);
1631
1632 ret = mutex_lock_interruptible(&filter->notify_lock);
1633 if (ret < 0)
1634 goto out;
1635
1636 knotif = find_notification(filter, addfd.id);
1637 if (!knotif) {
1638 ret = -ENOENT;
1639 goto out_unlock;
1640 }
1641
1642
1643
1644
1645
1646
1647 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1648 ret = -EINPROGRESS;
1649 goto out_unlock;
1650 }
1651
1652 if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1653
1654
1655
1656
1657
1658
1659
1660 if (!list_empty(&knotif->addfd)) {
1661 ret = -EBUSY;
1662 goto out_unlock;
1663 }
1664
1665
1666 knotif->state = SECCOMP_NOTIFY_REPLIED;
1667 }
1668
1669 list_add(&kaddfd.list, &knotif->addfd);
1670 complete(&knotif->ready);
1671 mutex_unlock(&filter->notify_lock);
1672
1673
1674 ret = wait_for_completion_interruptible(&kaddfd.completion);
1675 if (ret == 0) {
1676
1677
1678
1679
1680
1681
1682
1683 ret = kaddfd.ret;
1684 goto out;
1685 }
1686
1687 mutex_lock(&filter->notify_lock);
1688
1689
1690
1691
1692
1693
1694
1695 if (list_empty(&kaddfd.list))
1696 ret = kaddfd.ret;
1697 else
1698 list_del(&kaddfd.list);
1699
1700 out_unlock:
1701 mutex_unlock(&filter->notify_lock);
1702 out:
1703 fput(kaddfd.file);
1704
1705 return ret;
1706 }
1707
1708 static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1709 unsigned long arg)
1710 {
1711 struct seccomp_filter *filter = file->private_data;
1712 void __user *buf = (void __user *)arg;
1713
1714
1715 switch (cmd) {
1716 case SECCOMP_IOCTL_NOTIF_RECV:
1717 return seccomp_notify_recv(filter, buf);
1718 case SECCOMP_IOCTL_NOTIF_SEND:
1719 return seccomp_notify_send(filter, buf);
1720 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1721 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1722 return seccomp_notify_id_valid(filter, buf);
1723 }
1724
1725
1726 #define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1727 switch (EA_IOCTL(cmd)) {
1728 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1729 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1730 default:
1731 return -EINVAL;
1732 }
1733 }
1734
1735 static __poll_t seccomp_notify_poll(struct file *file,
1736 struct poll_table_struct *poll_tab)
1737 {
1738 struct seccomp_filter *filter = file->private_data;
1739 __poll_t ret = 0;
1740 struct seccomp_knotif *cur;
1741
1742 poll_wait(file, &filter->wqh, poll_tab);
1743
1744 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1745 return EPOLLERR;
1746
1747 list_for_each_entry(cur, &filter->notif->notifications, list) {
1748 if (cur->state == SECCOMP_NOTIFY_INIT)
1749 ret |= EPOLLIN | EPOLLRDNORM;
1750 if (cur->state == SECCOMP_NOTIFY_SENT)
1751 ret |= EPOLLOUT | EPOLLWRNORM;
1752 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1753 break;
1754 }
1755
1756 mutex_unlock(&filter->notify_lock);
1757
1758 if (refcount_read(&filter->users) == 0)
1759 ret |= EPOLLHUP;
1760
1761 return ret;
1762 }
1763
1764 static const struct file_operations seccomp_notify_ops = {
1765 .poll = seccomp_notify_poll,
1766 .release = seccomp_notify_release,
1767 .unlocked_ioctl = seccomp_notify_ioctl,
1768 .compat_ioctl = seccomp_notify_ioctl,
1769 };
1770
1771 static struct file *init_listener(struct seccomp_filter *filter)
1772 {
1773 struct file *ret;
1774
1775 ret = ERR_PTR(-ENOMEM);
1776 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1777 if (!filter->notif)
1778 goto out;
1779
1780 sema_init(&filter->notif->request, 0);
1781 filter->notif->next_id = get_random_u64();
1782 INIT_LIST_HEAD(&filter->notif->notifications);
1783
1784 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1785 filter, O_RDWR);
1786 if (IS_ERR(ret))
1787 goto out_notif;
1788
1789
1790 __get_seccomp_filter(filter);
1791
1792 out_notif:
1793 if (IS_ERR(ret))
1794 seccomp_notify_free(filter);
1795 out:
1796 return ret;
1797 }
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807 static bool has_duplicate_listener(struct seccomp_filter *new_child)
1808 {
1809 struct seccomp_filter *cur;
1810
1811
1812 lockdep_assert_held(¤t->sighand->siglock);
1813
1814 if (!new_child->notif)
1815 return false;
1816 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1817 if (cur->notif)
1818 return true;
1819 }
1820
1821 return false;
1822 }
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 static long seccomp_set_mode_filter(unsigned int flags,
1838 const char __user *filter)
1839 {
1840 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1841 struct seccomp_filter *prepared = NULL;
1842 long ret = -EINVAL;
1843 int listener = -1;
1844 struct file *listener_f = NULL;
1845
1846
1847 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1848 return -EINVAL;
1849
1850
1851
1852
1853
1854
1855
1856
1857 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1858 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1859 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1860 return -EINVAL;
1861
1862
1863
1864
1865
1866 if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) &&
1867 ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0))
1868 return -EINVAL;
1869
1870
1871 prepared = seccomp_prepare_user_filter(filter);
1872 if (IS_ERR(prepared))
1873 return PTR_ERR(prepared);
1874
1875 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1876 listener = get_unused_fd_flags(O_CLOEXEC);
1877 if (listener < 0) {
1878 ret = listener;
1879 goto out_free;
1880 }
1881
1882 listener_f = init_listener(prepared);
1883 if (IS_ERR(listener_f)) {
1884 put_unused_fd(listener);
1885 ret = PTR_ERR(listener_f);
1886 goto out_free;
1887 }
1888 }
1889
1890
1891
1892
1893
1894 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1895 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1896 goto out_put_fd;
1897
1898 spin_lock_irq(¤t->sighand->siglock);
1899
1900 if (!seccomp_may_assign_mode(seccomp_mode))
1901 goto out;
1902
1903 if (has_duplicate_listener(prepared)) {
1904 ret = -EBUSY;
1905 goto out;
1906 }
1907
1908 ret = seccomp_attach_filter(flags, prepared);
1909 if (ret)
1910 goto out;
1911
1912 prepared = NULL;
1913
1914 seccomp_assign_mode(current, seccomp_mode, flags);
1915 out:
1916 spin_unlock_irq(¤t->sighand->siglock);
1917 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1918 mutex_unlock(¤t->signal->cred_guard_mutex);
1919 out_put_fd:
1920 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1921 if (ret) {
1922 listener_f->private_data = NULL;
1923 fput(listener_f);
1924 put_unused_fd(listener);
1925 seccomp_notify_detach(prepared);
1926 } else {
1927 fd_install(listener, listener_f);
1928 ret = listener;
1929 }
1930 }
1931 out_free:
1932 seccomp_filter_free(prepared);
1933 return ret;
1934 }
1935 #else
1936 static inline long seccomp_set_mode_filter(unsigned int flags,
1937 const char __user *filter)
1938 {
1939 return -EINVAL;
1940 }
1941 #endif
1942
1943 static long seccomp_get_action_avail(const char __user *uaction)
1944 {
1945 u32 action;
1946
1947 if (copy_from_user(&action, uaction, sizeof(action)))
1948 return -EFAULT;
1949
1950 switch (action) {
1951 case SECCOMP_RET_KILL_PROCESS:
1952 case SECCOMP_RET_KILL_THREAD:
1953 case SECCOMP_RET_TRAP:
1954 case SECCOMP_RET_ERRNO:
1955 case SECCOMP_RET_USER_NOTIF:
1956 case SECCOMP_RET_TRACE:
1957 case SECCOMP_RET_LOG:
1958 case SECCOMP_RET_ALLOW:
1959 break;
1960 default:
1961 return -EOPNOTSUPP;
1962 }
1963
1964 return 0;
1965 }
1966
1967 static long seccomp_get_notif_sizes(void __user *usizes)
1968 {
1969 struct seccomp_notif_sizes sizes = {
1970 .seccomp_notif = sizeof(struct seccomp_notif),
1971 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1972 .seccomp_data = sizeof(struct seccomp_data),
1973 };
1974
1975 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1976 return -EFAULT;
1977
1978 return 0;
1979 }
1980
1981
1982 static long do_seccomp(unsigned int op, unsigned int flags,
1983 void __user *uargs)
1984 {
1985 switch (op) {
1986 case SECCOMP_SET_MODE_STRICT:
1987 if (flags != 0 || uargs != NULL)
1988 return -EINVAL;
1989 return seccomp_set_mode_strict();
1990 case SECCOMP_SET_MODE_FILTER:
1991 return seccomp_set_mode_filter(flags, uargs);
1992 case SECCOMP_GET_ACTION_AVAIL:
1993 if (flags != 0)
1994 return -EINVAL;
1995
1996 return seccomp_get_action_avail(uargs);
1997 case SECCOMP_GET_NOTIF_SIZES:
1998 if (flags != 0)
1999 return -EINVAL;
2000
2001 return seccomp_get_notif_sizes(uargs);
2002 default:
2003 return -EINVAL;
2004 }
2005 }
2006
2007 SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
2008 void __user *, uargs)
2009 {
2010 return do_seccomp(op, flags, uargs);
2011 }
2012
2013
2014
2015
2016
2017
2018
2019
2020 long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
2021 {
2022 unsigned int op;
2023 void __user *uargs;
2024
2025 switch (seccomp_mode) {
2026 case SECCOMP_MODE_STRICT:
2027 op = SECCOMP_SET_MODE_STRICT;
2028
2029
2030
2031
2032
2033 uargs = NULL;
2034 break;
2035 case SECCOMP_MODE_FILTER:
2036 op = SECCOMP_SET_MODE_FILTER;
2037 uargs = filter;
2038 break;
2039 default:
2040 return -EINVAL;
2041 }
2042
2043
2044 return do_seccomp(op, 0, uargs);
2045 }
2046
2047 #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2048 static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2049 unsigned long filter_off)
2050 {
2051 struct seccomp_filter *orig, *filter;
2052 unsigned long count;
2053
2054
2055
2056
2057
2058 spin_lock_irq(&task->sighand->siglock);
2059
2060 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2061 spin_unlock_irq(&task->sighand->siglock);
2062 return ERR_PTR(-EINVAL);
2063 }
2064
2065 orig = task->seccomp.filter;
2066 __get_seccomp_filter(orig);
2067 spin_unlock_irq(&task->sighand->siglock);
2068
2069 count = 0;
2070 for (filter = orig; filter; filter = filter->prev)
2071 count++;
2072
2073 if (filter_off >= count) {
2074 filter = ERR_PTR(-ENOENT);
2075 goto out;
2076 }
2077
2078 count -= filter_off;
2079 for (filter = orig; filter && count > 1; filter = filter->prev)
2080 count--;
2081
2082 if (WARN_ON(count != 1 || !filter)) {
2083 filter = ERR_PTR(-ENOENT);
2084 goto out;
2085 }
2086
2087 __get_seccomp_filter(filter);
2088
2089 out:
2090 __put_seccomp_filter(orig);
2091 return filter;
2092 }
2093
2094 long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2095 void __user *data)
2096 {
2097 struct seccomp_filter *filter;
2098 struct sock_fprog_kern *fprog;
2099 long ret;
2100
2101 if (!capable(CAP_SYS_ADMIN) ||
2102 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2103 return -EACCES;
2104 }
2105
2106 filter = get_nth_filter(task, filter_off);
2107 if (IS_ERR(filter))
2108 return PTR_ERR(filter);
2109
2110 fprog = filter->prog->orig_prog;
2111 if (!fprog) {
2112
2113
2114
2115
2116 ret = -EMEDIUMTYPE;
2117 goto out;
2118 }
2119
2120 ret = fprog->len;
2121 if (!data)
2122 goto out;
2123
2124 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2125 ret = -EFAULT;
2126
2127 out:
2128 __put_seccomp_filter(filter);
2129 return ret;
2130 }
2131
2132 long seccomp_get_metadata(struct task_struct *task,
2133 unsigned long size, void __user *data)
2134 {
2135 long ret;
2136 struct seccomp_filter *filter;
2137 struct seccomp_metadata kmd = {};
2138
2139 if (!capable(CAP_SYS_ADMIN) ||
2140 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2141 return -EACCES;
2142 }
2143
2144 size = min_t(unsigned long, size, sizeof(kmd));
2145
2146 if (size < sizeof(kmd.filter_off))
2147 return -EINVAL;
2148
2149 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2150 return -EFAULT;
2151
2152 filter = get_nth_filter(task, kmd.filter_off);
2153 if (IS_ERR(filter))
2154 return PTR_ERR(filter);
2155
2156 if (filter->log)
2157 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2158
2159 ret = size;
2160 if (copy_to_user(data, &kmd, size))
2161 ret = -EFAULT;
2162
2163 __put_seccomp_filter(filter);
2164 return ret;
2165 }
2166 #endif
2167
2168 #ifdef CONFIG_SYSCTL
2169
2170
2171 #define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2172 #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2173 #define SECCOMP_RET_TRAP_NAME "trap"
2174 #define SECCOMP_RET_ERRNO_NAME "errno"
2175 #define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2176 #define SECCOMP_RET_TRACE_NAME "trace"
2177 #define SECCOMP_RET_LOG_NAME "log"
2178 #define SECCOMP_RET_ALLOW_NAME "allow"
2179
2180 static const char seccomp_actions_avail[] =
2181 SECCOMP_RET_KILL_PROCESS_NAME " "
2182 SECCOMP_RET_KILL_THREAD_NAME " "
2183 SECCOMP_RET_TRAP_NAME " "
2184 SECCOMP_RET_ERRNO_NAME " "
2185 SECCOMP_RET_USER_NOTIF_NAME " "
2186 SECCOMP_RET_TRACE_NAME " "
2187 SECCOMP_RET_LOG_NAME " "
2188 SECCOMP_RET_ALLOW_NAME;
2189
2190 struct seccomp_log_name {
2191 u32 log;
2192 const char *name;
2193 };
2194
2195 static const struct seccomp_log_name seccomp_log_names[] = {
2196 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2197 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2198 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2199 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2200 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2201 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2202 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2203 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2204 { }
2205 };
2206
2207 static bool seccomp_names_from_actions_logged(char *names, size_t size,
2208 u32 actions_logged,
2209 const char *sep)
2210 {
2211 const struct seccomp_log_name *cur;
2212 bool append_sep = false;
2213
2214 for (cur = seccomp_log_names; cur->name && size; cur++) {
2215 ssize_t ret;
2216
2217 if (!(actions_logged & cur->log))
2218 continue;
2219
2220 if (append_sep) {
2221 ret = strscpy(names, sep, size);
2222 if (ret < 0)
2223 return false;
2224
2225 names += ret;
2226 size -= ret;
2227 } else
2228 append_sep = true;
2229
2230 ret = strscpy(names, cur->name, size);
2231 if (ret < 0)
2232 return false;
2233
2234 names += ret;
2235 size -= ret;
2236 }
2237
2238 return true;
2239 }
2240
2241 static bool seccomp_action_logged_from_name(u32 *action_logged,
2242 const char *name)
2243 {
2244 const struct seccomp_log_name *cur;
2245
2246 for (cur = seccomp_log_names; cur->name; cur++) {
2247 if (!strcmp(cur->name, name)) {
2248 *action_logged = cur->log;
2249 return true;
2250 }
2251 }
2252
2253 return false;
2254 }
2255
2256 static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2257 {
2258 char *name;
2259
2260 *actions_logged = 0;
2261 while ((name = strsep(&names, " ")) && *name) {
2262 u32 action_logged = 0;
2263
2264 if (!seccomp_action_logged_from_name(&action_logged, name))
2265 return false;
2266
2267 *actions_logged |= action_logged;
2268 }
2269
2270 return true;
2271 }
2272
2273 static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2274 size_t *lenp, loff_t *ppos)
2275 {
2276 char names[sizeof(seccomp_actions_avail)];
2277 struct ctl_table table;
2278
2279 memset(names, 0, sizeof(names));
2280
2281 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2282 seccomp_actions_logged, " "))
2283 return -EINVAL;
2284
2285 table = *ro_table;
2286 table.data = names;
2287 table.maxlen = sizeof(names);
2288 return proc_dostring(&table, 0, buffer, lenp, ppos);
2289 }
2290
2291 static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2292 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2293 {
2294 char names[sizeof(seccomp_actions_avail)];
2295 struct ctl_table table;
2296 int ret;
2297
2298 if (!capable(CAP_SYS_ADMIN))
2299 return -EPERM;
2300
2301 memset(names, 0, sizeof(names));
2302
2303 table = *ro_table;
2304 table.data = names;
2305 table.maxlen = sizeof(names);
2306 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2307 if (ret)
2308 return ret;
2309
2310 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2311 return -EINVAL;
2312
2313 if (*actions_logged & SECCOMP_LOG_ALLOW)
2314 return -EINVAL;
2315
2316 seccomp_actions_logged = *actions_logged;
2317 return 0;
2318 }
2319
2320 static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2321 int ret)
2322 {
2323 char names[sizeof(seccomp_actions_avail)];
2324 char old_names[sizeof(seccomp_actions_avail)];
2325 const char *new = names;
2326 const char *old = old_names;
2327
2328 if (!audit_enabled)
2329 return;
2330
2331 memset(names, 0, sizeof(names));
2332 memset(old_names, 0, sizeof(old_names));
2333
2334 if (ret)
2335 new = "?";
2336 else if (!actions_logged)
2337 new = "(none)";
2338 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2339 actions_logged, ","))
2340 new = "?";
2341
2342 if (!old_actions_logged)
2343 old = "(none)";
2344 else if (!seccomp_names_from_actions_logged(old_names,
2345 sizeof(old_names),
2346 old_actions_logged, ","))
2347 old = "?";
2348
2349 return audit_seccomp_actions_logged(new, old, !ret);
2350 }
2351
2352 static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2353 void *buffer, size_t *lenp,
2354 loff_t *ppos)
2355 {
2356 int ret;
2357
2358 if (write) {
2359 u32 actions_logged = 0;
2360 u32 old_actions_logged = seccomp_actions_logged;
2361
2362 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2363 &actions_logged);
2364 audit_actions_logged(actions_logged, old_actions_logged, ret);
2365 } else
2366 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2367
2368 return ret;
2369 }
2370
2371 static struct ctl_path seccomp_sysctl_path[] = {
2372 { .procname = "kernel", },
2373 { .procname = "seccomp", },
2374 { }
2375 };
2376
2377 static struct ctl_table seccomp_sysctl_table[] = {
2378 {
2379 .procname = "actions_avail",
2380 .data = (void *) &seccomp_actions_avail,
2381 .maxlen = sizeof(seccomp_actions_avail),
2382 .mode = 0444,
2383 .proc_handler = proc_dostring,
2384 },
2385 {
2386 .procname = "actions_logged",
2387 .mode = 0644,
2388 .proc_handler = seccomp_actions_logged_handler,
2389 },
2390 { }
2391 };
2392
2393 static int __init seccomp_sysctl_init(void)
2394 {
2395 struct ctl_table_header *hdr;
2396
2397 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2398 if (!hdr)
2399 pr_warn("sysctl registration failed\n");
2400 else
2401 kmemleak_not_leak(hdr);
2402
2403 return 0;
2404 }
2405
2406 device_initcall(seccomp_sysctl_init)
2407
2408 #endif
2409
2410 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
2411
2412 static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2413 const void *bitmap, size_t bitmap_size)
2414 {
2415 int nr;
2416
2417 for (nr = 0; nr < bitmap_size; nr++) {
2418 bool cached = test_bit(nr, bitmap);
2419 char *status = cached ? "ALLOW" : "FILTER";
2420
2421 seq_printf(m, "%s %d %s\n", name, nr, status);
2422 }
2423 }
2424
2425 int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2426 struct pid *pid, struct task_struct *task)
2427 {
2428 struct seccomp_filter *f;
2429 unsigned long flags;
2430
2431
2432
2433
2434
2435 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2436 return -EACCES;
2437
2438 if (!lock_task_sighand(task, &flags))
2439 return -ESRCH;
2440
2441 f = READ_ONCE(task->seccomp.filter);
2442 if (!f) {
2443 unlock_task_sighand(task, &flags);
2444 return 0;
2445 }
2446
2447
2448 __get_seccomp_filter(f);
2449 unlock_task_sighand(task, &flags);
2450
2451 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2452 f->cache.allow_native,
2453 SECCOMP_ARCH_NATIVE_NR);
2454
2455 #ifdef SECCOMP_ARCH_COMPAT
2456 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2457 f->cache.allow_compat,
2458 SECCOMP_ARCH_COMPAT_NR);
2459 #endif
2460
2461 __put_seccomp_filter(f);
2462 return 0;
2463 }
2464 #endif