0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #include <linux/kernel_read_file.h>
0027 #include <linux/slab.h>
0028 #include <linux/file.h>
0029 #include <linux/fdtable.h>
0030 #include <linux/mm.h>
0031 #include <linux/vmacache.h>
0032 #include <linux/stat.h>
0033 #include <linux/fcntl.h>
0034 #include <linux/swap.h>
0035 #include <linux/string.h>
0036 #include <linux/init.h>
0037 #include <linux/sched/mm.h>
0038 #include <linux/sched/coredump.h>
0039 #include <linux/sched/signal.h>
0040 #include <linux/sched/numa_balancing.h>
0041 #include <linux/sched/task.h>
0042 #include <linux/pagemap.h>
0043 #include <linux/perf_event.h>
0044 #include <linux/highmem.h>
0045 #include <linux/spinlock.h>
0046 #include <linux/key.h>
0047 #include <linux/personality.h>
0048 #include <linux/binfmts.h>
0049 #include <linux/utsname.h>
0050 #include <linux/pid_namespace.h>
0051 #include <linux/module.h>
0052 #include <linux/namei.h>
0053 #include <linux/mount.h>
0054 #include <linux/security.h>
0055 #include <linux/syscalls.h>
0056 #include <linux/tsacct_kern.h>
0057 #include <linux/cn_proc.h>
0058 #include <linux/audit.h>
0059 #include <linux/kmod.h>
0060 #include <linux/fsnotify.h>
0061 #include <linux/fs_struct.h>
0062 #include <linux/oom.h>
0063 #include <linux/compat.h>
0064 #include <linux/vmalloc.h>
0065 #include <linux/io_uring.h>
0066 #include <linux/syscall_user_dispatch.h>
0067 #include <linux/coredump.h>
0068
0069 #include <linux/uaccess.h>
0070 #include <asm/mmu_context.h>
0071 #include <asm/tlb.h>
0072
0073 #include <trace/events/task.h>
0074 #include "internal.h"
0075
0076 #include <trace/events/sched.h>
0077
0078 static int bprm_creds_from_file(struct linux_binprm *bprm);
0079
0080 int suid_dumpable = 0;
0081
0082 static LIST_HEAD(formats);
0083 static DEFINE_RWLOCK(binfmt_lock);
0084
0085 void __register_binfmt(struct linux_binfmt * fmt, int insert)
0086 {
0087 write_lock(&binfmt_lock);
0088 insert ? list_add(&fmt->lh, &formats) :
0089 list_add_tail(&fmt->lh, &formats);
0090 write_unlock(&binfmt_lock);
0091 }
0092
0093 EXPORT_SYMBOL(__register_binfmt);
0094
0095 void unregister_binfmt(struct linux_binfmt * fmt)
0096 {
0097 write_lock(&binfmt_lock);
0098 list_del(&fmt->lh);
0099 write_unlock(&binfmt_lock);
0100 }
0101
0102 EXPORT_SYMBOL(unregister_binfmt);
0103
0104 static inline void put_binfmt(struct linux_binfmt * fmt)
0105 {
0106 module_put(fmt->module);
0107 }
0108
0109 bool path_noexec(const struct path *path)
0110 {
0111 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
0112 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
0113 }
0114
0115 #ifdef CONFIG_USELIB
0116
0117
0118
0119
0120
0121
0122 SYSCALL_DEFINE1(uselib, const char __user *, library)
0123 {
0124 struct linux_binfmt *fmt;
0125 struct file *file;
0126 struct filename *tmp = getname(library);
0127 int error = PTR_ERR(tmp);
0128 static const struct open_flags uselib_flags = {
0129 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
0130 .acc_mode = MAY_READ | MAY_EXEC,
0131 .intent = LOOKUP_OPEN,
0132 .lookup_flags = LOOKUP_FOLLOW,
0133 };
0134
0135 if (IS_ERR(tmp))
0136 goto out;
0137
0138 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
0139 putname(tmp);
0140 error = PTR_ERR(file);
0141 if (IS_ERR(file))
0142 goto out;
0143
0144
0145
0146
0147
0148
0149 error = -EACCES;
0150 if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
0151 path_noexec(&file->f_path)))
0152 goto exit;
0153
0154 fsnotify_open(file);
0155
0156 error = -ENOEXEC;
0157
0158 read_lock(&binfmt_lock);
0159 list_for_each_entry(fmt, &formats, lh) {
0160 if (!fmt->load_shlib)
0161 continue;
0162 if (!try_module_get(fmt->module))
0163 continue;
0164 read_unlock(&binfmt_lock);
0165 error = fmt->load_shlib(file);
0166 read_lock(&binfmt_lock);
0167 put_binfmt(fmt);
0168 if (error != -ENOEXEC)
0169 break;
0170 }
0171 read_unlock(&binfmt_lock);
0172 exit:
0173 fput(file);
0174 out:
0175 return error;
0176 }
0177 #endif
0178
0179 #ifdef CONFIG_MMU
0180
0181
0182
0183
0184
0185
0186 static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
0187 {
0188 struct mm_struct *mm = current->mm;
0189 long diff = (long)(pages - bprm->vma_pages);
0190
0191 if (!mm || !diff)
0192 return;
0193
0194 bprm->vma_pages = pages;
0195 add_mm_counter(mm, MM_ANONPAGES, diff);
0196 }
0197
0198 static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
0199 int write)
0200 {
0201 struct page *page;
0202 int ret;
0203 unsigned int gup_flags = FOLL_FORCE;
0204
0205 #ifdef CONFIG_STACK_GROWSUP
0206 if (write) {
0207 ret = expand_downwards(bprm->vma, pos);
0208 if (ret < 0)
0209 return NULL;
0210 }
0211 #endif
0212
0213 if (write)
0214 gup_flags |= FOLL_WRITE;
0215
0216
0217
0218
0219
0220 mmap_read_lock(bprm->mm);
0221 ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
0222 &page, NULL, NULL);
0223 mmap_read_unlock(bprm->mm);
0224 if (ret <= 0)
0225 return NULL;
0226
0227 if (write)
0228 acct_arg_size(bprm, vma_pages(bprm->vma));
0229
0230 return page;
0231 }
0232
0233 static void put_arg_page(struct page *page)
0234 {
0235 put_page(page);
0236 }
0237
0238 static void free_arg_pages(struct linux_binprm *bprm)
0239 {
0240 }
0241
0242 static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
0243 struct page *page)
0244 {
0245 flush_cache_page(bprm->vma, pos, page_to_pfn(page));
0246 }
0247
0248 static int __bprm_mm_init(struct linux_binprm *bprm)
0249 {
0250 int err;
0251 struct vm_area_struct *vma = NULL;
0252 struct mm_struct *mm = bprm->mm;
0253
0254 bprm->vma = vma = vm_area_alloc(mm);
0255 if (!vma)
0256 return -ENOMEM;
0257 vma_set_anonymous(vma);
0258
0259 if (mmap_write_lock_killable(mm)) {
0260 err = -EINTR;
0261 goto err_free;
0262 }
0263
0264
0265
0266
0267
0268
0269
0270 BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
0271 vma->vm_end = STACK_TOP_MAX;
0272 vma->vm_start = vma->vm_end - PAGE_SIZE;
0273 vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
0274 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
0275
0276 err = insert_vm_struct(mm, vma);
0277 if (err)
0278 goto err;
0279
0280 mm->stack_vm = mm->total_vm = 1;
0281 mmap_write_unlock(mm);
0282 bprm->p = vma->vm_end - sizeof(void *);
0283 return 0;
0284 err:
0285 mmap_write_unlock(mm);
0286 err_free:
0287 bprm->vma = NULL;
0288 vm_area_free(vma);
0289 return err;
0290 }
0291
0292 static bool valid_arg_len(struct linux_binprm *bprm, long len)
0293 {
0294 return len <= MAX_ARG_STRLEN;
0295 }
0296
0297 #else
0298
0299 static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
0300 {
0301 }
0302
0303 static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
0304 int write)
0305 {
0306 struct page *page;
0307
0308 page = bprm->page[pos / PAGE_SIZE];
0309 if (!page && write) {
0310 page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
0311 if (!page)
0312 return NULL;
0313 bprm->page[pos / PAGE_SIZE] = page;
0314 }
0315
0316 return page;
0317 }
0318
0319 static void put_arg_page(struct page *page)
0320 {
0321 }
0322
0323 static void free_arg_page(struct linux_binprm *bprm, int i)
0324 {
0325 if (bprm->page[i]) {
0326 __free_page(bprm->page[i]);
0327 bprm->page[i] = NULL;
0328 }
0329 }
0330
0331 static void free_arg_pages(struct linux_binprm *bprm)
0332 {
0333 int i;
0334
0335 for (i = 0; i < MAX_ARG_PAGES; i++)
0336 free_arg_page(bprm, i);
0337 }
0338
0339 static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
0340 struct page *page)
0341 {
0342 }
0343
0344 static int __bprm_mm_init(struct linux_binprm *bprm)
0345 {
0346 bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
0347 return 0;
0348 }
0349
0350 static bool valid_arg_len(struct linux_binprm *bprm, long len)
0351 {
0352 return len <= bprm->p;
0353 }
0354
0355 #endif
0356
0357
0358
0359
0360
0361
0362
0363 static int bprm_mm_init(struct linux_binprm *bprm)
0364 {
0365 int err;
0366 struct mm_struct *mm = NULL;
0367
0368 bprm->mm = mm = mm_alloc();
0369 err = -ENOMEM;
0370 if (!mm)
0371 goto err;
0372
0373
0374 task_lock(current->group_leader);
0375 bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
0376 task_unlock(current->group_leader);
0377
0378 err = __bprm_mm_init(bprm);
0379 if (err)
0380 goto err;
0381
0382 return 0;
0383
0384 err:
0385 if (mm) {
0386 bprm->mm = NULL;
0387 mmdrop(mm);
0388 }
0389
0390 return err;
0391 }
0392
0393 struct user_arg_ptr {
0394 #ifdef CONFIG_COMPAT
0395 bool is_compat;
0396 #endif
0397 union {
0398 const char __user *const __user *native;
0399 #ifdef CONFIG_COMPAT
0400 const compat_uptr_t __user *compat;
0401 #endif
0402 } ptr;
0403 };
0404
0405 static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
0406 {
0407 const char __user *native;
0408
0409 #ifdef CONFIG_COMPAT
0410 if (unlikely(argv.is_compat)) {
0411 compat_uptr_t compat;
0412
0413 if (get_user(compat, argv.ptr.compat + nr))
0414 return ERR_PTR(-EFAULT);
0415
0416 return compat_ptr(compat);
0417 }
0418 #endif
0419
0420 if (get_user(native, argv.ptr.native + nr))
0421 return ERR_PTR(-EFAULT);
0422
0423 return native;
0424 }
0425
0426
0427
0428
0429 static int count(struct user_arg_ptr argv, int max)
0430 {
0431 int i = 0;
0432
0433 if (argv.ptr.native != NULL) {
0434 for (;;) {
0435 const char __user *p = get_user_arg_ptr(argv, i);
0436
0437 if (!p)
0438 break;
0439
0440 if (IS_ERR(p))
0441 return -EFAULT;
0442
0443 if (i >= max)
0444 return -E2BIG;
0445 ++i;
0446
0447 if (fatal_signal_pending(current))
0448 return -ERESTARTNOHAND;
0449 cond_resched();
0450 }
0451 }
0452 return i;
0453 }
0454
0455 static int count_strings_kernel(const char *const *argv)
0456 {
0457 int i;
0458
0459 if (!argv)
0460 return 0;
0461
0462 for (i = 0; argv[i]; ++i) {
0463 if (i >= MAX_ARG_STRINGS)
0464 return -E2BIG;
0465 if (fatal_signal_pending(current))
0466 return -ERESTARTNOHAND;
0467 cond_resched();
0468 }
0469 return i;
0470 }
0471
0472 static int bprm_stack_limits(struct linux_binprm *bprm)
0473 {
0474 unsigned long limit, ptr_size;
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484 limit = _STK_LIM / 4 * 3;
0485 limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
0486
0487
0488
0489
0490 limit = max_t(unsigned long, limit, ARG_MAX);
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504 ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
0505 if (limit <= ptr_size)
0506 return -E2BIG;
0507 limit -= ptr_size;
0508
0509 bprm->argmin = bprm->p - limit;
0510 return 0;
0511 }
0512
0513
0514
0515
0516
0517
0518 static int copy_strings(int argc, struct user_arg_ptr argv,
0519 struct linux_binprm *bprm)
0520 {
0521 struct page *kmapped_page = NULL;
0522 char *kaddr = NULL;
0523 unsigned long kpos = 0;
0524 int ret;
0525
0526 while (argc-- > 0) {
0527 const char __user *str;
0528 int len;
0529 unsigned long pos;
0530
0531 ret = -EFAULT;
0532 str = get_user_arg_ptr(argv, argc);
0533 if (IS_ERR(str))
0534 goto out;
0535
0536 len = strnlen_user(str, MAX_ARG_STRLEN);
0537 if (!len)
0538 goto out;
0539
0540 ret = -E2BIG;
0541 if (!valid_arg_len(bprm, len))
0542 goto out;
0543
0544
0545 pos = bprm->p;
0546 str += len;
0547 bprm->p -= len;
0548 #ifdef CONFIG_MMU
0549 if (bprm->p < bprm->argmin)
0550 goto out;
0551 #endif
0552
0553 while (len > 0) {
0554 int offset, bytes_to_copy;
0555
0556 if (fatal_signal_pending(current)) {
0557 ret = -ERESTARTNOHAND;
0558 goto out;
0559 }
0560 cond_resched();
0561
0562 offset = pos % PAGE_SIZE;
0563 if (offset == 0)
0564 offset = PAGE_SIZE;
0565
0566 bytes_to_copy = offset;
0567 if (bytes_to_copy > len)
0568 bytes_to_copy = len;
0569
0570 offset -= bytes_to_copy;
0571 pos -= bytes_to_copy;
0572 str -= bytes_to_copy;
0573 len -= bytes_to_copy;
0574
0575 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
0576 struct page *page;
0577
0578 page = get_arg_page(bprm, pos, 1);
0579 if (!page) {
0580 ret = -E2BIG;
0581 goto out;
0582 }
0583
0584 if (kmapped_page) {
0585 flush_dcache_page(kmapped_page);
0586 kunmap_local(kaddr);
0587 put_arg_page(kmapped_page);
0588 }
0589 kmapped_page = page;
0590 kaddr = kmap_local_page(kmapped_page);
0591 kpos = pos & PAGE_MASK;
0592 flush_arg_page(bprm, kpos, kmapped_page);
0593 }
0594 if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
0595 ret = -EFAULT;
0596 goto out;
0597 }
0598 }
0599 }
0600 ret = 0;
0601 out:
0602 if (kmapped_page) {
0603 flush_dcache_page(kmapped_page);
0604 kunmap_local(kaddr);
0605 put_arg_page(kmapped_page);
0606 }
0607 return ret;
0608 }
0609
0610
0611
0612
0613 int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
0614 {
0615 int len = strnlen(arg, MAX_ARG_STRLEN) + 1 ;
0616 unsigned long pos = bprm->p;
0617
0618 if (len == 0)
0619 return -EFAULT;
0620 if (!valid_arg_len(bprm, len))
0621 return -E2BIG;
0622
0623
0624 arg += len;
0625 bprm->p -= len;
0626 if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
0627 return -E2BIG;
0628
0629 while (len > 0) {
0630 unsigned int bytes_to_copy = min_t(unsigned int, len,
0631 min_not_zero(offset_in_page(pos), PAGE_SIZE));
0632 struct page *page;
0633
0634 pos -= bytes_to_copy;
0635 arg -= bytes_to_copy;
0636 len -= bytes_to_copy;
0637
0638 page = get_arg_page(bprm, pos, 1);
0639 if (!page)
0640 return -E2BIG;
0641 flush_arg_page(bprm, pos & PAGE_MASK, page);
0642 memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
0643 put_arg_page(page);
0644 }
0645
0646 return 0;
0647 }
0648 EXPORT_SYMBOL(copy_string_kernel);
0649
0650 static int copy_strings_kernel(int argc, const char *const *argv,
0651 struct linux_binprm *bprm)
0652 {
0653 while (argc-- > 0) {
0654 int ret = copy_string_kernel(argv[argc], bprm);
0655 if (ret < 0)
0656 return ret;
0657 if (fatal_signal_pending(current))
0658 return -ERESTARTNOHAND;
0659 cond_resched();
0660 }
0661 return 0;
0662 }
0663
0664 #ifdef CONFIG_MMU
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678 static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
0679 {
0680 struct mm_struct *mm = vma->vm_mm;
0681 unsigned long old_start = vma->vm_start;
0682 unsigned long old_end = vma->vm_end;
0683 unsigned long length = old_end - old_start;
0684 unsigned long new_start = old_start - shift;
0685 unsigned long new_end = old_end - shift;
0686 struct mmu_gather tlb;
0687
0688 BUG_ON(new_start > new_end);
0689
0690
0691
0692
0693
0694 if (vma != find_vma(mm, new_start))
0695 return -EFAULT;
0696
0697
0698
0699
0700 if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
0701 return -ENOMEM;
0702
0703
0704
0705
0706
0707 if (length != move_page_tables(vma, old_start,
0708 vma, new_start, length, false))
0709 return -ENOMEM;
0710
0711 lru_add_drain();
0712 tlb_gather_mmu(&tlb, mm);
0713 if (new_end > old_start) {
0714
0715
0716
0717 free_pgd_range(&tlb, new_end, old_end, new_end,
0718 vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
0719 } else {
0720
0721
0722
0723
0724
0725
0726 free_pgd_range(&tlb, old_start, old_end, new_end,
0727 vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
0728 }
0729 tlb_finish_mmu(&tlb);
0730
0731
0732
0733
0734 vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
0735
0736 return 0;
0737 }
0738
0739
0740
0741
0742
0743 int setup_arg_pages(struct linux_binprm *bprm,
0744 unsigned long stack_top,
0745 int executable_stack)
0746 {
0747 unsigned long ret;
0748 unsigned long stack_shift;
0749 struct mm_struct *mm = current->mm;
0750 struct vm_area_struct *vma = bprm->vma;
0751 struct vm_area_struct *prev = NULL;
0752 unsigned long vm_flags;
0753 unsigned long stack_base;
0754 unsigned long stack_size;
0755 unsigned long stack_expand;
0756 unsigned long rlim_stack;
0757 struct mmu_gather tlb;
0758
0759 #ifdef CONFIG_STACK_GROWSUP
0760
0761 stack_base = bprm->rlim_stack.rlim_max;
0762
0763 stack_base = calc_max_stack_size(stack_base);
0764
0765
0766 stack_base += (STACK_RND_MASK << PAGE_SHIFT);
0767
0768
0769 if (vma->vm_end - vma->vm_start > stack_base)
0770 return -ENOMEM;
0771
0772 stack_base = PAGE_ALIGN(stack_top - stack_base);
0773
0774 stack_shift = vma->vm_start - stack_base;
0775 mm->arg_start = bprm->p - stack_shift;
0776 bprm->p = vma->vm_end - stack_shift;
0777 #else
0778 stack_top = arch_align_stack(stack_top);
0779 stack_top = PAGE_ALIGN(stack_top);
0780
0781 if (unlikely(stack_top < mmap_min_addr) ||
0782 unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
0783 return -ENOMEM;
0784
0785 stack_shift = vma->vm_end - stack_top;
0786
0787 bprm->p -= stack_shift;
0788 mm->arg_start = bprm->p;
0789 #endif
0790
0791 if (bprm->loader)
0792 bprm->loader -= stack_shift;
0793 bprm->exec -= stack_shift;
0794
0795 if (mmap_write_lock_killable(mm))
0796 return -EINTR;
0797
0798 vm_flags = VM_STACK_FLAGS;
0799
0800
0801
0802
0803
0804
0805 if (unlikely(executable_stack == EXSTACK_ENABLE_X))
0806 vm_flags |= VM_EXEC;
0807 else if (executable_stack == EXSTACK_DISABLE_X)
0808 vm_flags &= ~VM_EXEC;
0809 vm_flags |= mm->def_flags;
0810 vm_flags |= VM_STACK_INCOMPLETE_SETUP;
0811
0812 tlb_gather_mmu(&tlb, mm);
0813 ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end,
0814 vm_flags);
0815 tlb_finish_mmu(&tlb);
0816
0817 if (ret)
0818 goto out_unlock;
0819 BUG_ON(prev != vma);
0820
0821 if (unlikely(vm_flags & VM_EXEC)) {
0822 pr_warn_once("process '%pD4' started with executable stack\n",
0823 bprm->file);
0824 }
0825
0826
0827 if (stack_shift) {
0828 ret = shift_arg_pages(vma, stack_shift);
0829 if (ret)
0830 goto out_unlock;
0831 }
0832
0833
0834 vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
0835
0836 stack_expand = 131072UL;
0837 stack_size = vma->vm_end - vma->vm_start;
0838
0839
0840
0841
0842 rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
0843 #ifdef CONFIG_STACK_GROWSUP
0844 if (stack_size + stack_expand > rlim_stack)
0845 stack_base = vma->vm_start + rlim_stack;
0846 else
0847 stack_base = vma->vm_end + stack_expand;
0848 #else
0849 if (stack_size + stack_expand > rlim_stack)
0850 stack_base = vma->vm_end - rlim_stack;
0851 else
0852 stack_base = vma->vm_start - stack_expand;
0853 #endif
0854 current->mm->start_stack = bprm->p;
0855 ret = expand_stack(vma, stack_base);
0856 if (ret)
0857 ret = -EFAULT;
0858
0859 out_unlock:
0860 mmap_write_unlock(mm);
0861 return ret;
0862 }
0863 EXPORT_SYMBOL(setup_arg_pages);
0864
0865 #else
0866
0867
0868
0869
0870
0871 int transfer_args_to_stack(struct linux_binprm *bprm,
0872 unsigned long *sp_location)
0873 {
0874 unsigned long index, stop, sp;
0875 int ret = 0;
0876
0877 stop = bprm->p >> PAGE_SHIFT;
0878 sp = *sp_location;
0879
0880 for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
0881 unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
0882 char *src = kmap_local_page(bprm->page[index]) + offset;
0883 sp -= PAGE_SIZE - offset;
0884 if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
0885 ret = -EFAULT;
0886 kunmap_local(src);
0887 if (ret)
0888 goto out;
0889 }
0890
0891 *sp_location = sp;
0892
0893 out:
0894 return ret;
0895 }
0896 EXPORT_SYMBOL(transfer_args_to_stack);
0897
0898 #endif
0899
0900 static struct file *do_open_execat(int fd, struct filename *name, int flags)
0901 {
0902 struct file *file;
0903 int err;
0904 struct open_flags open_exec_flags = {
0905 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
0906 .acc_mode = MAY_EXEC,
0907 .intent = LOOKUP_OPEN,
0908 .lookup_flags = LOOKUP_FOLLOW,
0909 };
0910
0911 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
0912 return ERR_PTR(-EINVAL);
0913 if (flags & AT_SYMLINK_NOFOLLOW)
0914 open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
0915 if (flags & AT_EMPTY_PATH)
0916 open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
0917
0918 file = do_filp_open(fd, name, &open_exec_flags);
0919 if (IS_ERR(file))
0920 goto out;
0921
0922
0923
0924
0925
0926
0927 err = -EACCES;
0928 if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
0929 path_noexec(&file->f_path)))
0930 goto exit;
0931
0932 err = deny_write_access(file);
0933 if (err)
0934 goto exit;
0935
0936 if (name->name[0] != '\0')
0937 fsnotify_open(file);
0938
0939 out:
0940 return file;
0941
0942 exit:
0943 fput(file);
0944 return ERR_PTR(err);
0945 }
0946
0947 struct file *open_exec(const char *name)
0948 {
0949 struct filename *filename = getname_kernel(name);
0950 struct file *f = ERR_CAST(filename);
0951
0952 if (!IS_ERR(filename)) {
0953 f = do_open_execat(AT_FDCWD, filename, 0);
0954 putname(filename);
0955 }
0956 return f;
0957 }
0958 EXPORT_SYMBOL(open_exec);
0959
0960 #if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
0961 defined(CONFIG_BINFMT_ELF_FDPIC)
0962 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
0963 {
0964 ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
0965 if (res > 0)
0966 flush_icache_user_range(addr, addr + len);
0967 return res;
0968 }
0969 EXPORT_SYMBOL(read_code);
0970 #endif
0971
0972
0973
0974
0975
0976
0977 static int exec_mmap(struct mm_struct *mm)
0978 {
0979 struct task_struct *tsk;
0980 struct mm_struct *old_mm, *active_mm;
0981 int ret;
0982
0983
0984 tsk = current;
0985 old_mm = current->mm;
0986 exec_mm_release(tsk, old_mm);
0987 if (old_mm)
0988 sync_mm_rss(old_mm);
0989
0990 ret = down_write_killable(&tsk->signal->exec_update_lock);
0991 if (ret)
0992 return ret;
0993
0994 if (old_mm) {
0995
0996
0997
0998
0999
1000 ret = mmap_read_lock_killable(old_mm);
1001 if (ret) {
1002 up_write(&tsk->signal->exec_update_lock);
1003 return ret;
1004 }
1005 }
1006
1007 task_lock(tsk);
1008 membarrier_exec_mmap(mm);
1009
1010 local_irq_disable();
1011 active_mm = tsk->active_mm;
1012 tsk->active_mm = mm;
1013 tsk->mm = mm;
1014
1015
1016
1017
1018
1019
1020
1021 if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1022 local_irq_enable();
1023 activate_mm(active_mm, mm);
1024 if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1025 local_irq_enable();
1026 tsk->mm->vmacache_seqnum = 0;
1027 vmacache_flush(tsk);
1028 task_unlock(tsk);
1029 if (old_mm) {
1030 mmap_read_unlock(old_mm);
1031 BUG_ON(active_mm != old_mm);
1032 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
1033 mm_update_next_owner(old_mm);
1034 mmput(old_mm);
1035 return 0;
1036 }
1037 mmdrop(active_mm);
1038 return 0;
1039 }
1040
1041 static int de_thread(struct task_struct *tsk)
1042 {
1043 struct signal_struct *sig = tsk->signal;
1044 struct sighand_struct *oldsighand = tsk->sighand;
1045 spinlock_t *lock = &oldsighand->siglock;
1046
1047 if (thread_group_empty(tsk))
1048 goto no_thread_group;
1049
1050
1051
1052
1053 spin_lock_irq(lock);
1054 if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
1055
1056
1057
1058
1059 spin_unlock_irq(lock);
1060 return -EAGAIN;
1061 }
1062
1063 sig->group_exec_task = tsk;
1064 sig->notify_count = zap_other_threads(tsk);
1065 if (!thread_group_leader(tsk))
1066 sig->notify_count--;
1067
1068 while (sig->notify_count) {
1069 __set_current_state(TASK_KILLABLE);
1070 spin_unlock_irq(lock);
1071 schedule();
1072 if (__fatal_signal_pending(tsk))
1073 goto killed;
1074 spin_lock_irq(lock);
1075 }
1076 spin_unlock_irq(lock);
1077
1078
1079
1080
1081
1082
1083 if (!thread_group_leader(tsk)) {
1084 struct task_struct *leader = tsk->group_leader;
1085
1086 for (;;) {
1087 cgroup_threadgroup_change_begin(tsk);
1088 write_lock_irq(&tasklist_lock);
1089
1090
1091
1092
1093 sig->notify_count = -1;
1094 if (likely(leader->exit_state))
1095 break;
1096 __set_current_state(TASK_KILLABLE);
1097 write_unlock_irq(&tasklist_lock);
1098 cgroup_threadgroup_change_end(tsk);
1099 schedule();
1100 if (__fatal_signal_pending(tsk))
1101 goto killed;
1102 }
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 tsk->start_time = leader->start_time;
1115 tsk->start_boottime = leader->start_boottime;
1116
1117 BUG_ON(!same_thread_group(leader, tsk));
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128 exchange_tids(tsk, leader);
1129 transfer_pid(leader, tsk, PIDTYPE_TGID);
1130 transfer_pid(leader, tsk, PIDTYPE_PGID);
1131 transfer_pid(leader, tsk, PIDTYPE_SID);
1132
1133 list_replace_rcu(&leader->tasks, &tsk->tasks);
1134 list_replace_init(&leader->sibling, &tsk->sibling);
1135
1136 tsk->group_leader = tsk;
1137 leader->group_leader = tsk;
1138
1139 tsk->exit_signal = SIGCHLD;
1140 leader->exit_signal = -1;
1141
1142 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
1143 leader->exit_state = EXIT_DEAD;
1144
1145
1146
1147
1148
1149
1150 if (unlikely(leader->ptrace))
1151 __wake_up_parent(leader, leader->parent);
1152 write_unlock_irq(&tasklist_lock);
1153 cgroup_threadgroup_change_end(tsk);
1154
1155 release_task(leader);
1156 }
1157
1158 sig->group_exec_task = NULL;
1159 sig->notify_count = 0;
1160
1161 no_thread_group:
1162
1163 tsk->exit_signal = SIGCHLD;
1164
1165 BUG_ON(!thread_group_leader(tsk));
1166 return 0;
1167
1168 killed:
1169
1170 read_lock(&tasklist_lock);
1171 sig->group_exec_task = NULL;
1172 sig->notify_count = 0;
1173 read_unlock(&tasklist_lock);
1174 return -EAGAIN;
1175 }
1176
1177
1178
1179
1180
1181
1182
1183
1184 static int unshare_sighand(struct task_struct *me)
1185 {
1186 struct sighand_struct *oldsighand = me->sighand;
1187
1188 if (refcount_read(&oldsighand->count) != 1) {
1189 struct sighand_struct *newsighand;
1190
1191
1192
1193
1194 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
1195 if (!newsighand)
1196 return -ENOMEM;
1197
1198 refcount_set(&newsighand->count, 1);
1199 memcpy(newsighand->action, oldsighand->action,
1200 sizeof(newsighand->action));
1201
1202 write_lock_irq(&tasklist_lock);
1203 spin_lock(&oldsighand->siglock);
1204 rcu_assign_pointer(me->sighand, newsighand);
1205 spin_unlock(&oldsighand->siglock);
1206 write_unlock_irq(&tasklist_lock);
1207
1208 __cleanup_sighand(oldsighand);
1209 }
1210 return 0;
1211 }
1212
1213 char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
1214 {
1215 task_lock(tsk);
1216
1217 strscpy_pad(buf, tsk->comm, buf_size);
1218 task_unlock(tsk);
1219 return buf;
1220 }
1221 EXPORT_SYMBOL_GPL(__get_task_comm);
1222
1223
1224
1225
1226
1227
1228 void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
1229 {
1230 task_lock(tsk);
1231 trace_task_rename(tsk, buf);
1232 strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
1233 task_unlock(tsk);
1234 perf_event_comm(tsk, exec);
1235 }
1236
1237
1238
1239
1240
1241
1242
1243 int begin_new_exec(struct linux_binprm * bprm)
1244 {
1245 struct task_struct *me = current;
1246 int retval;
1247
1248
1249 retval = bprm_creds_from_file(bprm);
1250 if (retval)
1251 return retval;
1252
1253
1254
1255
1256 bprm->point_of_no_return = true;
1257
1258
1259
1260
1261 retval = de_thread(me);
1262 if (retval)
1263 goto out;
1264
1265
1266
1267
1268 io_uring_task_cancel();
1269
1270
1271 retval = unshare_files();
1272 if (retval)
1273 goto out;
1274
1275
1276
1277
1278
1279
1280 retval = set_mm_exe_file(bprm->mm, bprm->file);
1281 if (retval)
1282 goto out;
1283
1284
1285 would_dump(bprm, bprm->file);
1286 if (bprm->have_execfd)
1287 would_dump(bprm, bprm->executable);
1288
1289
1290
1291
1292 acct_arg_size(bprm, 0);
1293 retval = exec_mmap(bprm->mm);
1294 if (retval)
1295 goto out;
1296
1297 bprm->mm = NULL;
1298
1299 #ifdef CONFIG_POSIX_TIMERS
1300 spin_lock_irq(&me->sighand->siglock);
1301 posix_cpu_timers_exit(me);
1302 spin_unlock_irq(&me->sighand->siglock);
1303 exit_itimers(me);
1304 flush_itimer_signals();
1305 #endif
1306
1307
1308
1309
1310 retval = unshare_sighand(me);
1311 if (retval)
1312 goto out_unlock;
1313
1314 me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
1315 PF_NOFREEZE | PF_NO_SETAFFINITY);
1316 flush_thread();
1317 me->personality &= ~bprm->per_clear;
1318
1319 clear_syscall_work_syscall_user_dispatch(me);
1320
1321
1322
1323
1324
1325
1326
1327 do_close_on_exec(me->files);
1328
1329 if (bprm->secureexec) {
1330
1331 me->pdeath_signal = 0;
1332
1333
1334
1335
1336
1337
1338
1339
1340 if (bprm->rlim_stack.rlim_cur > _STK_LIM)
1341 bprm->rlim_stack.rlim_cur = _STK_LIM;
1342 }
1343
1344 me->sas_ss_sp = me->sas_ss_size = 0;
1345
1346
1347
1348
1349
1350
1351 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1352 !(uid_eq(current_euid(), current_uid()) &&
1353 gid_eq(current_egid(), current_gid())))
1354 set_dumpable(current->mm, suid_dumpable);
1355 else
1356 set_dumpable(current->mm, SUID_DUMP_USER);
1357
1358 perf_event_exec();
1359 __set_task_comm(me, kbasename(bprm->filename), true);
1360
1361
1362
1363 WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
1364 flush_signal_handlers(me, 0);
1365
1366 retval = set_cred_ucounts(bprm->cred);
1367 if (retval < 0)
1368 goto out_unlock;
1369
1370
1371
1372
1373 security_bprm_committing_creds(bprm);
1374
1375 commit_creds(bprm->cred);
1376 bprm->cred = NULL;
1377
1378
1379
1380
1381
1382
1383
1384 if (get_dumpable(me->mm) != SUID_DUMP_USER)
1385 perf_event_exit_task(me);
1386
1387
1388
1389
1390
1391 security_bprm_committed_creds(bprm);
1392
1393
1394 if (bprm->have_execfd) {
1395 retval = get_unused_fd_flags(0);
1396 if (retval < 0)
1397 goto out_unlock;
1398 fd_install(retval, bprm->executable);
1399 bprm->executable = NULL;
1400 bprm->execfd = retval;
1401 }
1402 return 0;
1403
1404 out_unlock:
1405 up_write(&me->signal->exec_update_lock);
1406 out:
1407 return retval;
1408 }
1409 EXPORT_SYMBOL(begin_new_exec);
1410
1411 void would_dump(struct linux_binprm *bprm, struct file *file)
1412 {
1413 struct inode *inode = file_inode(file);
1414 struct user_namespace *mnt_userns = file_mnt_user_ns(file);
1415 if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
1416 struct user_namespace *old, *user_ns;
1417 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1418
1419
1420 user_ns = old = bprm->mm->user_ns;
1421 while ((user_ns != &init_user_ns) &&
1422 !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
1423 user_ns = user_ns->parent;
1424
1425 if (old != user_ns) {
1426 bprm->mm->user_ns = get_user_ns(user_ns);
1427 put_user_ns(old);
1428 }
1429 }
1430 }
1431 EXPORT_SYMBOL(would_dump);
1432
1433 void setup_new_exec(struct linux_binprm * bprm)
1434 {
1435
1436 struct task_struct *me = current;
1437
1438 arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
1439
1440 arch_setup_new_exec();
1441
1442
1443
1444
1445
1446 me->mm->task_size = TASK_SIZE;
1447 up_write(&me->signal->exec_update_lock);
1448 mutex_unlock(&me->signal->cred_guard_mutex);
1449 }
1450 EXPORT_SYMBOL(setup_new_exec);
1451
1452
1453 void finalize_exec(struct linux_binprm *bprm)
1454 {
1455
1456 task_lock(current->group_leader);
1457 current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
1458 task_unlock(current->group_leader);
1459 }
1460 EXPORT_SYMBOL(finalize_exec);
1461
1462
1463
1464
1465
1466
1467
1468 static int prepare_bprm_creds(struct linux_binprm *bprm)
1469 {
1470 if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
1471 return -ERESTARTNOINTR;
1472
1473 bprm->cred = prepare_exec_creds();
1474 if (likely(bprm->cred))
1475 return 0;
1476
1477 mutex_unlock(¤t->signal->cred_guard_mutex);
1478 return -ENOMEM;
1479 }
1480
1481 static void free_bprm(struct linux_binprm *bprm)
1482 {
1483 if (bprm->mm) {
1484 acct_arg_size(bprm, 0);
1485 mmput(bprm->mm);
1486 }
1487 free_arg_pages(bprm);
1488 if (bprm->cred) {
1489 mutex_unlock(¤t->signal->cred_guard_mutex);
1490 abort_creds(bprm->cred);
1491 }
1492 if (bprm->file) {
1493 allow_write_access(bprm->file);
1494 fput(bprm->file);
1495 }
1496 if (bprm->executable)
1497 fput(bprm->executable);
1498
1499 if (bprm->interp != bprm->filename)
1500 kfree(bprm->interp);
1501 kfree(bprm->fdpath);
1502 kfree(bprm);
1503 }
1504
1505 static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
1506 {
1507 struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1508 int retval = -ENOMEM;
1509 if (!bprm)
1510 goto out;
1511
1512 if (fd == AT_FDCWD || filename->name[0] == '/') {
1513 bprm->filename = filename->name;
1514 } else {
1515 if (filename->name[0] == '\0')
1516 bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
1517 else
1518 bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
1519 fd, filename->name);
1520 if (!bprm->fdpath)
1521 goto out_free;
1522
1523 bprm->filename = bprm->fdpath;
1524 }
1525 bprm->interp = bprm->filename;
1526
1527 retval = bprm_mm_init(bprm);
1528 if (retval)
1529 goto out_free;
1530 return bprm;
1531
1532 out_free:
1533 free_bprm(bprm);
1534 out:
1535 return ERR_PTR(retval);
1536 }
1537
1538 int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1539 {
1540
1541 if (bprm->interp != bprm->filename)
1542 kfree(bprm->interp);
1543 bprm->interp = kstrdup(interp, GFP_KERNEL);
1544 if (!bprm->interp)
1545 return -ENOMEM;
1546 return 0;
1547 }
1548 EXPORT_SYMBOL(bprm_change_interp);
1549
1550
1551
1552
1553
1554
1555 static void check_unsafe_exec(struct linux_binprm *bprm)
1556 {
1557 struct task_struct *p = current, *t;
1558 unsigned n_fs;
1559
1560 if (p->ptrace)
1561 bprm->unsafe |= LSM_UNSAFE_PTRACE;
1562
1563
1564
1565
1566
1567 if (task_no_new_privs(current))
1568 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1569
1570 t = p;
1571 n_fs = 1;
1572 spin_lock(&p->fs->lock);
1573 rcu_read_lock();
1574 while_each_thread(p, t) {
1575 if (t->fs == p->fs)
1576 n_fs++;
1577 }
1578 rcu_read_unlock();
1579
1580 if (p->fs->users > n_fs)
1581 bprm->unsafe |= LSM_UNSAFE_SHARE;
1582 else
1583 p->fs->in_exec = 1;
1584 spin_unlock(&p->fs->lock);
1585 }
1586
1587 static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
1588 {
1589
1590 struct user_namespace *mnt_userns;
1591 struct inode *inode;
1592 unsigned int mode;
1593 kuid_t uid;
1594 kgid_t gid;
1595
1596 if (!mnt_may_suid(file->f_path.mnt))
1597 return;
1598
1599 if (task_no_new_privs(current))
1600 return;
1601
1602 inode = file->f_path.dentry->d_inode;
1603 mode = READ_ONCE(inode->i_mode);
1604 if (!(mode & (S_ISUID|S_ISGID)))
1605 return;
1606
1607 mnt_userns = file_mnt_user_ns(file);
1608
1609
1610 inode_lock(inode);
1611
1612
1613 mode = inode->i_mode;
1614 uid = i_uid_into_mnt(mnt_userns, inode);
1615 gid = i_gid_into_mnt(mnt_userns, inode);
1616 inode_unlock(inode);
1617
1618
1619 if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
1620 !kgid_has_mapping(bprm->cred->user_ns, gid))
1621 return;
1622
1623 if (mode & S_ISUID) {
1624 bprm->per_clear |= PER_CLEAR_ON_SETID;
1625 bprm->cred->euid = uid;
1626 }
1627
1628 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1629 bprm->per_clear |= PER_CLEAR_ON_SETID;
1630 bprm->cred->egid = gid;
1631 }
1632 }
1633
1634
1635
1636
1637 static int bprm_creds_from_file(struct linux_binprm *bprm)
1638 {
1639
1640 struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
1641
1642 bprm_fill_uid(bprm, file);
1643 return security_bprm_creds_from_file(bprm, file);
1644 }
1645
1646
1647
1648
1649
1650
1651
1652 static int prepare_binprm(struct linux_binprm *bprm)
1653 {
1654 loff_t pos = 0;
1655
1656 memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1657 return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
1658 }
1659
1660
1661
1662
1663
1664
1665 int remove_arg_zero(struct linux_binprm *bprm)
1666 {
1667 int ret = 0;
1668 unsigned long offset;
1669 char *kaddr;
1670 struct page *page;
1671
1672 if (!bprm->argc)
1673 return 0;
1674
1675 do {
1676 offset = bprm->p & ~PAGE_MASK;
1677 page = get_arg_page(bprm, bprm->p, 0);
1678 if (!page) {
1679 ret = -EFAULT;
1680 goto out;
1681 }
1682 kaddr = kmap_local_page(page);
1683
1684 for (; offset < PAGE_SIZE && kaddr[offset];
1685 offset++, bprm->p++)
1686 ;
1687
1688 kunmap_local(kaddr);
1689 put_arg_page(page);
1690 } while (offset == PAGE_SIZE);
1691
1692 bprm->p++;
1693 bprm->argc--;
1694 ret = 0;
1695
1696 out:
1697 return ret;
1698 }
1699 EXPORT_SYMBOL(remove_arg_zero);
1700
1701 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1702
1703
1704
1705 static int search_binary_handler(struct linux_binprm *bprm)
1706 {
1707 bool need_retry = IS_ENABLED(CONFIG_MODULES);
1708 struct linux_binfmt *fmt;
1709 int retval;
1710
1711 retval = prepare_binprm(bprm);
1712 if (retval < 0)
1713 return retval;
1714
1715 retval = security_bprm_check(bprm);
1716 if (retval)
1717 return retval;
1718
1719 retval = -ENOENT;
1720 retry:
1721 read_lock(&binfmt_lock);
1722 list_for_each_entry(fmt, &formats, lh) {
1723 if (!try_module_get(fmt->module))
1724 continue;
1725 read_unlock(&binfmt_lock);
1726
1727 retval = fmt->load_binary(bprm);
1728
1729 read_lock(&binfmt_lock);
1730 put_binfmt(fmt);
1731 if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
1732 read_unlock(&binfmt_lock);
1733 return retval;
1734 }
1735 }
1736 read_unlock(&binfmt_lock);
1737
1738 if (need_retry) {
1739 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
1740 printable(bprm->buf[2]) && printable(bprm->buf[3]))
1741 return retval;
1742 if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
1743 return retval;
1744 need_retry = false;
1745 goto retry;
1746 }
1747
1748 return retval;
1749 }
1750
1751 static int exec_binprm(struct linux_binprm *bprm)
1752 {
1753 pid_t old_pid, old_vpid;
1754 int ret, depth;
1755
1756
1757 old_pid = current->pid;
1758 rcu_read_lock();
1759 old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1760 rcu_read_unlock();
1761
1762
1763 for (depth = 0;; depth++) {
1764 struct file *exec;
1765 if (depth > 5)
1766 return -ELOOP;
1767
1768 ret = search_binary_handler(bprm);
1769 if (ret < 0)
1770 return ret;
1771 if (!bprm->interpreter)
1772 break;
1773
1774 exec = bprm->file;
1775 bprm->file = bprm->interpreter;
1776 bprm->interpreter = NULL;
1777
1778 allow_write_access(exec);
1779 if (unlikely(bprm->have_execfd)) {
1780 if (bprm->executable) {
1781 fput(exec);
1782 return -ENOEXEC;
1783 }
1784 bprm->executable = exec;
1785 } else
1786 fput(exec);
1787 }
1788
1789 audit_bprm(bprm);
1790 trace_sched_process_exec(current, old_pid, bprm);
1791 ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1792 proc_exec_connector(current);
1793 return 0;
1794 }
1795
1796
1797
1798
1799 static int bprm_execve(struct linux_binprm *bprm,
1800 int fd, struct filename *filename, int flags)
1801 {
1802 struct file *file;
1803 int retval;
1804
1805 retval = prepare_bprm_creds(bprm);
1806 if (retval)
1807 return retval;
1808
1809 check_unsafe_exec(bprm);
1810 current->in_execve = 1;
1811
1812 file = do_open_execat(fd, filename, flags);
1813 retval = PTR_ERR(file);
1814 if (IS_ERR(file))
1815 goto out_unmark;
1816
1817 sched_exec();
1818
1819 bprm->file = file;
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829 if (bprm->fdpath && get_close_on_exec(fd))
1830 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1831
1832
1833 retval = security_bprm_creds_for_exec(bprm);
1834 if (retval)
1835 goto out;
1836
1837 retval = exec_binprm(bprm);
1838 if (retval < 0)
1839 goto out;
1840
1841
1842 current->fs->in_exec = 0;
1843 current->in_execve = 0;
1844 rseq_execve(current);
1845 acct_update_integrals(current);
1846 task_numa_free(current, false);
1847 return retval;
1848
1849 out:
1850
1851
1852
1853
1854
1855
1856 if (bprm->point_of_no_return && !fatal_signal_pending(current))
1857 force_fatal_sig(SIGSEGV);
1858
1859 out_unmark:
1860 current->fs->in_exec = 0;
1861 current->in_execve = 0;
1862
1863 return retval;
1864 }
1865
1866 static int do_execveat_common(int fd, struct filename *filename,
1867 struct user_arg_ptr argv,
1868 struct user_arg_ptr envp,
1869 int flags)
1870 {
1871 struct linux_binprm *bprm;
1872 int retval;
1873
1874 if (IS_ERR(filename))
1875 return PTR_ERR(filename);
1876
1877
1878
1879
1880
1881
1882
1883 if ((current->flags & PF_NPROC_EXCEEDED) &&
1884 is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
1885 retval = -EAGAIN;
1886 goto out_ret;
1887 }
1888
1889
1890
1891 current->flags &= ~PF_NPROC_EXCEEDED;
1892
1893 bprm = alloc_bprm(fd, filename);
1894 if (IS_ERR(bprm)) {
1895 retval = PTR_ERR(bprm);
1896 goto out_ret;
1897 }
1898
1899 retval = count(argv, MAX_ARG_STRINGS);
1900 if (retval == 0)
1901 pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
1902 current->comm, bprm->filename);
1903 if (retval < 0)
1904 goto out_free;
1905 bprm->argc = retval;
1906
1907 retval = count(envp, MAX_ARG_STRINGS);
1908 if (retval < 0)
1909 goto out_free;
1910 bprm->envc = retval;
1911
1912 retval = bprm_stack_limits(bprm);
1913 if (retval < 0)
1914 goto out_free;
1915
1916 retval = copy_string_kernel(bprm->filename, bprm);
1917 if (retval < 0)
1918 goto out_free;
1919 bprm->exec = bprm->p;
1920
1921 retval = copy_strings(bprm->envc, envp, bprm);
1922 if (retval < 0)
1923 goto out_free;
1924
1925 retval = copy_strings(bprm->argc, argv, bprm);
1926 if (retval < 0)
1927 goto out_free;
1928
1929
1930
1931
1932
1933
1934
1935 if (bprm->argc == 0) {
1936 retval = copy_string_kernel("", bprm);
1937 if (retval < 0)
1938 goto out_free;
1939 bprm->argc = 1;
1940 }
1941
1942 retval = bprm_execve(bprm, fd, filename, flags);
1943 out_free:
1944 free_bprm(bprm);
1945
1946 out_ret:
1947 putname(filename);
1948 return retval;
1949 }
1950
1951 int kernel_execve(const char *kernel_filename,
1952 const char *const *argv, const char *const *envp)
1953 {
1954 struct filename *filename;
1955 struct linux_binprm *bprm;
1956 int fd = AT_FDCWD;
1957 int retval;
1958
1959
1960 if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
1961 return -EINVAL;
1962
1963 filename = getname_kernel(kernel_filename);
1964 if (IS_ERR(filename))
1965 return PTR_ERR(filename);
1966
1967 bprm = alloc_bprm(fd, filename);
1968 if (IS_ERR(bprm)) {
1969 retval = PTR_ERR(bprm);
1970 goto out_ret;
1971 }
1972
1973 retval = count_strings_kernel(argv);
1974 if (WARN_ON_ONCE(retval == 0))
1975 retval = -EINVAL;
1976 if (retval < 0)
1977 goto out_free;
1978 bprm->argc = retval;
1979
1980 retval = count_strings_kernel(envp);
1981 if (retval < 0)
1982 goto out_free;
1983 bprm->envc = retval;
1984
1985 retval = bprm_stack_limits(bprm);
1986 if (retval < 0)
1987 goto out_free;
1988
1989 retval = copy_string_kernel(bprm->filename, bprm);
1990 if (retval < 0)
1991 goto out_free;
1992 bprm->exec = bprm->p;
1993
1994 retval = copy_strings_kernel(bprm->envc, envp, bprm);
1995 if (retval < 0)
1996 goto out_free;
1997
1998 retval = copy_strings_kernel(bprm->argc, argv, bprm);
1999 if (retval < 0)
2000 goto out_free;
2001
2002 retval = bprm_execve(bprm, fd, filename, 0);
2003 out_free:
2004 free_bprm(bprm);
2005 out_ret:
2006 putname(filename);
2007 return retval;
2008 }
2009
2010 static int do_execve(struct filename *filename,
2011 const char __user *const __user *__argv,
2012 const char __user *const __user *__envp)
2013 {
2014 struct user_arg_ptr argv = { .ptr.native = __argv };
2015 struct user_arg_ptr envp = { .ptr.native = __envp };
2016 return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
2017 }
2018
2019 static int do_execveat(int fd, struct filename *filename,
2020 const char __user *const __user *__argv,
2021 const char __user *const __user *__envp,
2022 int flags)
2023 {
2024 struct user_arg_ptr argv = { .ptr.native = __argv };
2025 struct user_arg_ptr envp = { .ptr.native = __envp };
2026
2027 return do_execveat_common(fd, filename, argv, envp, flags);
2028 }
2029
2030 #ifdef CONFIG_COMPAT
2031 static int compat_do_execve(struct filename *filename,
2032 const compat_uptr_t __user *__argv,
2033 const compat_uptr_t __user *__envp)
2034 {
2035 struct user_arg_ptr argv = {
2036 .is_compat = true,
2037 .ptr.compat = __argv,
2038 };
2039 struct user_arg_ptr envp = {
2040 .is_compat = true,
2041 .ptr.compat = __envp,
2042 };
2043 return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
2044 }
2045
2046 static int compat_do_execveat(int fd, struct filename *filename,
2047 const compat_uptr_t __user *__argv,
2048 const compat_uptr_t __user *__envp,
2049 int flags)
2050 {
2051 struct user_arg_ptr argv = {
2052 .is_compat = true,
2053 .ptr.compat = __argv,
2054 };
2055 struct user_arg_ptr envp = {
2056 .is_compat = true,
2057 .ptr.compat = __envp,
2058 };
2059 return do_execveat_common(fd, filename, argv, envp, flags);
2060 }
2061 #endif
2062
2063 void set_binfmt(struct linux_binfmt *new)
2064 {
2065 struct mm_struct *mm = current->mm;
2066
2067 if (mm->binfmt)
2068 module_put(mm->binfmt->module);
2069
2070 mm->binfmt = new;
2071 if (new)
2072 __module_get(new->module);
2073 }
2074 EXPORT_SYMBOL(set_binfmt);
2075
2076
2077
2078
2079 void set_dumpable(struct mm_struct *mm, int value)
2080 {
2081 if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
2082 return;
2083
2084 set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
2085 }
2086
2087 SYSCALL_DEFINE3(execve,
2088 const char __user *, filename,
2089 const char __user *const __user *, argv,
2090 const char __user *const __user *, envp)
2091 {
2092 return do_execve(getname(filename), argv, envp);
2093 }
2094
2095 SYSCALL_DEFINE5(execveat,
2096 int, fd, const char __user *, filename,
2097 const char __user *const __user *, argv,
2098 const char __user *const __user *, envp,
2099 int, flags)
2100 {
2101 return do_execveat(fd,
2102 getname_uflags(filename, flags),
2103 argv, envp, flags);
2104 }
2105
2106 #ifdef CONFIG_COMPAT
2107 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
2108 const compat_uptr_t __user *, argv,
2109 const compat_uptr_t __user *, envp)
2110 {
2111 return compat_do_execve(getname(filename), argv, envp);
2112 }
2113
2114 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
2115 const char __user *, filename,
2116 const compat_uptr_t __user *, argv,
2117 const compat_uptr_t __user *, envp,
2118 int, flags)
2119 {
2120 return compat_do_execveat(fd,
2121 getname_uflags(filename, flags),
2122 argv, envp, flags);
2123 }
2124 #endif
2125
2126 #ifdef CONFIG_SYSCTL
2127
2128 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2129 void *buffer, size_t *lenp, loff_t *ppos)
2130 {
2131 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2132
2133 if (!error)
2134 validate_coredump_safety();
2135 return error;
2136 }
2137
2138 static struct ctl_table fs_exec_sysctls[] = {
2139 {
2140 .procname = "suid_dumpable",
2141 .data = &suid_dumpable,
2142 .maxlen = sizeof(int),
2143 .mode = 0644,
2144 .proc_handler = proc_dointvec_minmax_coredump,
2145 .extra1 = SYSCTL_ZERO,
2146 .extra2 = SYSCTL_TWO,
2147 },
2148 { }
2149 };
2150
2151 static int __init init_fs_exec_sysctls(void)
2152 {
2153 register_sysctl_init("fs", fs_exec_sysctls);
2154 return 0;
2155 }
2156
2157 fs_initcall(init_fs_exec_sysctls);
2158 #endif