Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/slab.h>
0003 #include <linux/file.h>
0004 #include <linux/fdtable.h>
0005 #include <linux/freezer.h>
0006 #include <linux/mm.h>
0007 #include <linux/stat.h>
0008 #include <linux/fcntl.h>
0009 #include <linux/swap.h>
0010 #include <linux/ctype.h>
0011 #include <linux/string.h>
0012 #include <linux/init.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/perf_event.h>
0015 #include <linux/highmem.h>
0016 #include <linux/spinlock.h>
0017 #include <linux/key.h>
0018 #include <linux/personality.h>
0019 #include <linux/binfmts.h>
0020 #include <linux/coredump.h>
0021 #include <linux/sched/coredump.h>
0022 #include <linux/sched/signal.h>
0023 #include <linux/sched/task_stack.h>
0024 #include <linux/utsname.h>
0025 #include <linux/pid_namespace.h>
0026 #include <linux/module.h>
0027 #include <linux/namei.h>
0028 #include <linux/mount.h>
0029 #include <linux/security.h>
0030 #include <linux/syscalls.h>
0031 #include <linux/tsacct_kern.h>
0032 #include <linux/cn_proc.h>
0033 #include <linux/audit.h>
0034 #include <linux/kmod.h>
0035 #include <linux/fsnotify.h>
0036 #include <linux/fs_struct.h>
0037 #include <linux/pipe_fs_i.h>
0038 #include <linux/oom.h>
0039 #include <linux/compat.h>
0040 #include <linux/fs.h>
0041 #include <linux/path.h>
0042 #include <linux/timekeeping.h>
0043 #include <linux/sysctl.h>
0044 #include <linux/elf.h>
0045 
0046 #include <linux/uaccess.h>
0047 #include <asm/mmu_context.h>
0048 #include <asm/tlb.h>
0049 #include <asm/exec.h>
0050 
0051 #include <trace/events/task.h>
0052 #include "internal.h"
0053 
0054 #include <trace/events/sched.h>
0055 
0056 static bool dump_vma_snapshot(struct coredump_params *cprm);
0057 static void free_vma_snapshot(struct coredump_params *cprm);
0058 
0059 static int core_uses_pid;
0060 static unsigned int core_pipe_limit;
0061 static char core_pattern[CORENAME_MAX_SIZE] = "core";
0062 static int core_name_size = CORENAME_MAX_SIZE;
0063 
0064 struct core_name {
0065     char *corename;
0066     int used, size;
0067 };
0068 
0069 static int expand_corename(struct core_name *cn, int size)
0070 {
0071     char *corename = krealloc(cn->corename, size, GFP_KERNEL);
0072 
0073     if (!corename)
0074         return -ENOMEM;
0075 
0076     if (size > core_name_size) /* racy but harmless */
0077         core_name_size = size;
0078 
0079     cn->size = ksize(corename);
0080     cn->corename = corename;
0081     return 0;
0082 }
0083 
0084 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
0085                      va_list arg)
0086 {
0087     int free, need;
0088     va_list arg_copy;
0089 
0090 again:
0091     free = cn->size - cn->used;
0092 
0093     va_copy(arg_copy, arg);
0094     need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
0095     va_end(arg_copy);
0096 
0097     if (need < free) {
0098         cn->used += need;
0099         return 0;
0100     }
0101 
0102     if (!expand_corename(cn, cn->size + need - free + 1))
0103         goto again;
0104 
0105     return -ENOMEM;
0106 }
0107 
0108 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
0109 {
0110     va_list arg;
0111     int ret;
0112 
0113     va_start(arg, fmt);
0114     ret = cn_vprintf(cn, fmt, arg);
0115     va_end(arg);
0116 
0117     return ret;
0118 }
0119 
0120 static __printf(2, 3)
0121 int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
0122 {
0123     int cur = cn->used;
0124     va_list arg;
0125     int ret;
0126 
0127     va_start(arg, fmt);
0128     ret = cn_vprintf(cn, fmt, arg);
0129     va_end(arg);
0130 
0131     if (ret == 0) {
0132         /*
0133          * Ensure that this coredump name component can't cause the
0134          * resulting corefile path to consist of a ".." or ".".
0135          */
0136         if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
0137                 (cn->used - cur == 2 && cn->corename[cur] == '.'
0138                 && cn->corename[cur+1] == '.'))
0139             cn->corename[cur] = '!';
0140 
0141         /*
0142          * Empty names are fishy and could be used to create a "//" in a
0143          * corefile name, causing the coredump to happen one directory
0144          * level too high. Enforce that all components of the core
0145          * pattern are at least one character long.
0146          */
0147         if (cn->used == cur)
0148             ret = cn_printf(cn, "!");
0149     }
0150 
0151     for (; cur < cn->used; ++cur) {
0152         if (cn->corename[cur] == '/')
0153             cn->corename[cur] = '!';
0154     }
0155     return ret;
0156 }
0157 
0158 static int cn_print_exe_file(struct core_name *cn, bool name_only)
0159 {
0160     struct file *exe_file;
0161     char *pathbuf, *path, *ptr;
0162     int ret;
0163 
0164     exe_file = get_mm_exe_file(current->mm);
0165     if (!exe_file)
0166         return cn_esc_printf(cn, "%s (path unknown)", current->comm);
0167 
0168     pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
0169     if (!pathbuf) {
0170         ret = -ENOMEM;
0171         goto put_exe_file;
0172     }
0173 
0174     path = file_path(exe_file, pathbuf, PATH_MAX);
0175     if (IS_ERR(path)) {
0176         ret = PTR_ERR(path);
0177         goto free_buf;
0178     }
0179 
0180     if (name_only) {
0181         ptr = strrchr(path, '/');
0182         if (ptr)
0183             path = ptr + 1;
0184     }
0185     ret = cn_esc_printf(cn, "%s", path);
0186 
0187 free_buf:
0188     kfree(pathbuf);
0189 put_exe_file:
0190     fput(exe_file);
0191     return ret;
0192 }
0193 
0194 /* format_corename will inspect the pattern parameter, and output a
0195  * name into corename, which must have space for at least
0196  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
0197  */
0198 static int format_corename(struct core_name *cn, struct coredump_params *cprm,
0199                size_t **argv, int *argc)
0200 {
0201     const struct cred *cred = current_cred();
0202     const char *pat_ptr = core_pattern;
0203     int ispipe = (*pat_ptr == '|');
0204     bool was_space = false;
0205     int pid_in_pattern = 0;
0206     int err = 0;
0207 
0208     cn->used = 0;
0209     cn->corename = NULL;
0210     if (expand_corename(cn, core_name_size))
0211         return -ENOMEM;
0212     cn->corename[0] = '\0';
0213 
0214     if (ispipe) {
0215         int argvs = sizeof(core_pattern) / 2;
0216         (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
0217         if (!(*argv))
0218             return -ENOMEM;
0219         (*argv)[(*argc)++] = 0;
0220         ++pat_ptr;
0221         if (!(*pat_ptr))
0222             return -ENOMEM;
0223     }
0224 
0225     /* Repeat as long as we have more pattern to process and more output
0226        space */
0227     while (*pat_ptr) {
0228         /*
0229          * Split on spaces before doing template expansion so that
0230          * %e and %E don't get split if they have spaces in them
0231          */
0232         if (ispipe) {
0233             if (isspace(*pat_ptr)) {
0234                 if (cn->used != 0)
0235                     was_space = true;
0236                 pat_ptr++;
0237                 continue;
0238             } else if (was_space) {
0239                 was_space = false;
0240                 err = cn_printf(cn, "%c", '\0');
0241                 if (err)
0242                     return err;
0243                 (*argv)[(*argc)++] = cn->used;
0244             }
0245         }
0246         if (*pat_ptr != '%') {
0247             err = cn_printf(cn, "%c", *pat_ptr++);
0248         } else {
0249             switch (*++pat_ptr) {
0250             /* single % at the end, drop that */
0251             case 0:
0252                 goto out;
0253             /* Double percent, output one percent */
0254             case '%':
0255                 err = cn_printf(cn, "%c", '%');
0256                 break;
0257             /* pid */
0258             case 'p':
0259                 pid_in_pattern = 1;
0260                 err = cn_printf(cn, "%d",
0261                           task_tgid_vnr(current));
0262                 break;
0263             /* global pid */
0264             case 'P':
0265                 err = cn_printf(cn, "%d",
0266                           task_tgid_nr(current));
0267                 break;
0268             case 'i':
0269                 err = cn_printf(cn, "%d",
0270                           task_pid_vnr(current));
0271                 break;
0272             case 'I':
0273                 err = cn_printf(cn, "%d",
0274                           task_pid_nr(current));
0275                 break;
0276             /* uid */
0277             case 'u':
0278                 err = cn_printf(cn, "%u",
0279                         from_kuid(&init_user_ns,
0280                               cred->uid));
0281                 break;
0282             /* gid */
0283             case 'g':
0284                 err = cn_printf(cn, "%u",
0285                         from_kgid(&init_user_ns,
0286                               cred->gid));
0287                 break;
0288             case 'd':
0289                 err = cn_printf(cn, "%d",
0290                     __get_dumpable(cprm->mm_flags));
0291                 break;
0292             /* signal that caused the coredump */
0293             case 's':
0294                 err = cn_printf(cn, "%d",
0295                         cprm->siginfo->si_signo);
0296                 break;
0297             /* UNIX time of coredump */
0298             case 't': {
0299                 time64_t time;
0300 
0301                 time = ktime_get_real_seconds();
0302                 err = cn_printf(cn, "%lld", time);
0303                 break;
0304             }
0305             /* hostname */
0306             case 'h':
0307                 down_read(&uts_sem);
0308                 err = cn_esc_printf(cn, "%s",
0309                           utsname()->nodename);
0310                 up_read(&uts_sem);
0311                 break;
0312             /* executable, could be changed by prctl PR_SET_NAME etc */
0313             case 'e':
0314                 err = cn_esc_printf(cn, "%s", current->comm);
0315                 break;
0316             /* file name of executable */
0317             case 'f':
0318                 err = cn_print_exe_file(cn, true);
0319                 break;
0320             case 'E':
0321                 err = cn_print_exe_file(cn, false);
0322                 break;
0323             /* core limit size */
0324             case 'c':
0325                 err = cn_printf(cn, "%lu",
0326                           rlimit(RLIMIT_CORE));
0327                 break;
0328             default:
0329                 break;
0330             }
0331             ++pat_ptr;
0332         }
0333 
0334         if (err)
0335             return err;
0336     }
0337 
0338 out:
0339     /* Backward compatibility with core_uses_pid:
0340      *
0341      * If core_pattern does not include a %p (as is the default)
0342      * and core_uses_pid is set, then .%pid will be appended to
0343      * the filename. Do not do this for piped commands. */
0344     if (!ispipe && !pid_in_pattern && core_uses_pid) {
0345         err = cn_printf(cn, ".%d", task_tgid_vnr(current));
0346         if (err)
0347             return err;
0348     }
0349     return ispipe;
0350 }
0351 
0352 static int zap_process(struct task_struct *start, int exit_code)
0353 {
0354     struct task_struct *t;
0355     int nr = 0;
0356 
0357     /* ignore all signals except SIGKILL, see prepare_signal() */
0358     start->signal->flags = SIGNAL_GROUP_EXIT;
0359     start->signal->group_exit_code = exit_code;
0360     start->signal->group_stop_count = 0;
0361 
0362     for_each_thread(start, t) {
0363         task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
0364         if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
0365             sigaddset(&t->pending.signal, SIGKILL);
0366             signal_wake_up(t, 1);
0367             nr++;
0368         }
0369     }
0370 
0371     return nr;
0372 }
0373 
0374 static int zap_threads(struct task_struct *tsk,
0375             struct core_state *core_state, int exit_code)
0376 {
0377     struct signal_struct *signal = tsk->signal;
0378     int nr = -EAGAIN;
0379 
0380     spin_lock_irq(&tsk->sighand->siglock);
0381     if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
0382         signal->core_state = core_state;
0383         nr = zap_process(tsk, exit_code);
0384         clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
0385         tsk->flags |= PF_DUMPCORE;
0386         atomic_set(&core_state->nr_threads, nr);
0387     }
0388     spin_unlock_irq(&tsk->sighand->siglock);
0389     return nr;
0390 }
0391 
0392 static int coredump_wait(int exit_code, struct core_state *core_state)
0393 {
0394     struct task_struct *tsk = current;
0395     int core_waiters = -EBUSY;
0396 
0397     init_completion(&core_state->startup);
0398     core_state->dumper.task = tsk;
0399     core_state->dumper.next = NULL;
0400 
0401     core_waiters = zap_threads(tsk, core_state, exit_code);
0402     if (core_waiters > 0) {
0403         struct core_thread *ptr;
0404 
0405         freezer_do_not_count();
0406         wait_for_completion(&core_state->startup);
0407         freezer_count();
0408         /*
0409          * Wait for all the threads to become inactive, so that
0410          * all the thread context (extended register state, like
0411          * fpu etc) gets copied to the memory.
0412          */
0413         ptr = core_state->dumper.next;
0414         while (ptr != NULL) {
0415             wait_task_inactive(ptr->task, 0);
0416             ptr = ptr->next;
0417         }
0418     }
0419 
0420     return core_waiters;
0421 }
0422 
0423 static void coredump_finish(bool core_dumped)
0424 {
0425     struct core_thread *curr, *next;
0426     struct task_struct *task;
0427 
0428     spin_lock_irq(&current->sighand->siglock);
0429     if (core_dumped && !__fatal_signal_pending(current))
0430         current->signal->group_exit_code |= 0x80;
0431     next = current->signal->core_state->dumper.next;
0432     current->signal->core_state = NULL;
0433     spin_unlock_irq(&current->sighand->siglock);
0434 
0435     while ((curr = next) != NULL) {
0436         next = curr->next;
0437         task = curr->task;
0438         /*
0439          * see coredump_task_exit(), curr->task must not see
0440          * ->task == NULL before we read ->next.
0441          */
0442         smp_mb();
0443         curr->task = NULL;
0444         wake_up_process(task);
0445     }
0446 }
0447 
0448 static bool dump_interrupted(void)
0449 {
0450     /*
0451      * SIGKILL or freezing() interrupt the coredumping. Perhaps we
0452      * can do try_to_freeze() and check __fatal_signal_pending(),
0453      * but then we need to teach dump_write() to restart and clear
0454      * TIF_SIGPENDING.
0455      */
0456     return fatal_signal_pending(current) || freezing(current);
0457 }
0458 
0459 static void wait_for_dump_helpers(struct file *file)
0460 {
0461     struct pipe_inode_info *pipe = file->private_data;
0462 
0463     pipe_lock(pipe);
0464     pipe->readers++;
0465     pipe->writers--;
0466     wake_up_interruptible_sync(&pipe->rd_wait);
0467     kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0468     pipe_unlock(pipe);
0469 
0470     /*
0471      * We actually want wait_event_freezable() but then we need
0472      * to clear TIF_SIGPENDING and improve dump_interrupted().
0473      */
0474     wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
0475 
0476     pipe_lock(pipe);
0477     pipe->readers--;
0478     pipe->writers++;
0479     pipe_unlock(pipe);
0480 }
0481 
0482 /*
0483  * umh_pipe_setup
0484  * helper function to customize the process used
0485  * to collect the core in userspace.  Specifically
0486  * it sets up a pipe and installs it as fd 0 (stdin)
0487  * for the process.  Returns 0 on success, or
0488  * PTR_ERR on failure.
0489  * Note that it also sets the core limit to 1.  This
0490  * is a special value that we use to trap recursive
0491  * core dumps
0492  */
0493 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
0494 {
0495     struct file *files[2];
0496     struct coredump_params *cp = (struct coredump_params *)info->data;
0497     int err = create_pipe_files(files, 0);
0498     if (err)
0499         return err;
0500 
0501     cp->file = files[1];
0502 
0503     err = replace_fd(0, files[0], 0);
0504     fput(files[0]);
0505     /* and disallow core files too */
0506     current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
0507 
0508     return err;
0509 }
0510 
0511 void do_coredump(const kernel_siginfo_t *siginfo)
0512 {
0513     struct core_state core_state;
0514     struct core_name cn;
0515     struct mm_struct *mm = current->mm;
0516     struct linux_binfmt * binfmt;
0517     const struct cred *old_cred;
0518     struct cred *cred;
0519     int retval = 0;
0520     int ispipe;
0521     size_t *argv = NULL;
0522     int argc = 0;
0523     /* require nonrelative corefile path and be extra careful */
0524     bool need_suid_safe = false;
0525     bool core_dumped = false;
0526     static atomic_t core_dump_count = ATOMIC_INIT(0);
0527     struct coredump_params cprm = {
0528         .siginfo = siginfo,
0529         .regs = signal_pt_regs(),
0530         .limit = rlimit(RLIMIT_CORE),
0531         /*
0532          * We must use the same mm->flags while dumping core to avoid
0533          * inconsistency of bit flags, since this flag is not protected
0534          * by any locks.
0535          */
0536         .mm_flags = mm->flags,
0537         .vma_meta = NULL,
0538     };
0539 
0540     audit_core_dumps(siginfo->si_signo);
0541 
0542     binfmt = mm->binfmt;
0543     if (!binfmt || !binfmt->core_dump)
0544         goto fail;
0545     if (!__get_dumpable(cprm.mm_flags))
0546         goto fail;
0547 
0548     cred = prepare_creds();
0549     if (!cred)
0550         goto fail;
0551     /*
0552      * We cannot trust fsuid as being the "true" uid of the process
0553      * nor do we know its entire history. We only know it was tainted
0554      * so we dump it as root in mode 2, and only into a controlled
0555      * environment (pipe handler or fully qualified path).
0556      */
0557     if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
0558         /* Setuid core dump mode */
0559         cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
0560         need_suid_safe = true;
0561     }
0562 
0563     retval = coredump_wait(siginfo->si_signo, &core_state);
0564     if (retval < 0)
0565         goto fail_creds;
0566 
0567     old_cred = override_creds(cred);
0568 
0569     ispipe = format_corename(&cn, &cprm, &argv, &argc);
0570 
0571     if (ispipe) {
0572         int argi;
0573         int dump_count;
0574         char **helper_argv;
0575         struct subprocess_info *sub_info;
0576 
0577         if (ispipe < 0) {
0578             printk(KERN_WARNING "format_corename failed\n");
0579             printk(KERN_WARNING "Aborting core\n");
0580             goto fail_unlock;
0581         }
0582 
0583         if (cprm.limit == 1) {
0584             /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
0585              *
0586              * Normally core limits are irrelevant to pipes, since
0587              * we're not writing to the file system, but we use
0588              * cprm.limit of 1 here as a special value, this is a
0589              * consistent way to catch recursive crashes.
0590              * We can still crash if the core_pattern binary sets
0591              * RLIM_CORE = !1, but it runs as root, and can do
0592              * lots of stupid things.
0593              *
0594              * Note that we use task_tgid_vnr here to grab the pid
0595              * of the process group leader.  That way we get the
0596              * right pid if a thread in a multi-threaded
0597              * core_pattern process dies.
0598              */
0599             printk(KERN_WARNING
0600                 "Process %d(%s) has RLIMIT_CORE set to 1\n",
0601                 task_tgid_vnr(current), current->comm);
0602             printk(KERN_WARNING "Aborting core\n");
0603             goto fail_unlock;
0604         }
0605         cprm.limit = RLIM_INFINITY;
0606 
0607         dump_count = atomic_inc_return(&core_dump_count);
0608         if (core_pipe_limit && (core_pipe_limit < dump_count)) {
0609             printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
0610                    task_tgid_vnr(current), current->comm);
0611             printk(KERN_WARNING "Skipping core dump\n");
0612             goto fail_dropcount;
0613         }
0614 
0615         helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
0616                         GFP_KERNEL);
0617         if (!helper_argv) {
0618             printk(KERN_WARNING "%s failed to allocate memory\n",
0619                    __func__);
0620             goto fail_dropcount;
0621         }
0622         for (argi = 0; argi < argc; argi++)
0623             helper_argv[argi] = cn.corename + argv[argi];
0624         helper_argv[argi] = NULL;
0625 
0626         retval = -ENOMEM;
0627         sub_info = call_usermodehelper_setup(helper_argv[0],
0628                         helper_argv, NULL, GFP_KERNEL,
0629                         umh_pipe_setup, NULL, &cprm);
0630         if (sub_info)
0631             retval = call_usermodehelper_exec(sub_info,
0632                               UMH_WAIT_EXEC);
0633 
0634         kfree(helper_argv);
0635         if (retval) {
0636             printk(KERN_INFO "Core dump to |%s pipe failed\n",
0637                    cn.corename);
0638             goto close_fail;
0639         }
0640     } else {
0641         struct user_namespace *mnt_userns;
0642         struct inode *inode;
0643         int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
0644                  O_LARGEFILE | O_EXCL;
0645 
0646         if (cprm.limit < binfmt->min_coredump)
0647             goto fail_unlock;
0648 
0649         if (need_suid_safe && cn.corename[0] != '/') {
0650             printk(KERN_WARNING "Pid %d(%s) can only dump core "\
0651                 "to fully qualified path!\n",
0652                 task_tgid_vnr(current), current->comm);
0653             printk(KERN_WARNING "Skipping core dump\n");
0654             goto fail_unlock;
0655         }
0656 
0657         /*
0658          * Unlink the file if it exists unless this is a SUID
0659          * binary - in that case, we're running around with root
0660          * privs and don't want to unlink another user's coredump.
0661          */
0662         if (!need_suid_safe) {
0663             /*
0664              * If it doesn't exist, that's fine. If there's some
0665              * other problem, we'll catch it at the filp_open().
0666              */
0667             do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
0668         }
0669 
0670         /*
0671          * There is a race between unlinking and creating the
0672          * file, but if that causes an EEXIST here, that's
0673          * fine - another process raced with us while creating
0674          * the corefile, and the other process won. To userspace,
0675          * what matters is that at least one of the two processes
0676          * writes its coredump successfully, not which one.
0677          */
0678         if (need_suid_safe) {
0679             /*
0680              * Using user namespaces, normal user tasks can change
0681              * their current->fs->root to point to arbitrary
0682              * directories. Since the intention of the "only dump
0683              * with a fully qualified path" rule is to control where
0684              * coredumps may be placed using root privileges,
0685              * current->fs->root must not be used. Instead, use the
0686              * root directory of init_task.
0687              */
0688             struct path root;
0689 
0690             task_lock(&init_task);
0691             get_fs_root(init_task.fs, &root);
0692             task_unlock(&init_task);
0693             cprm.file = file_open_root(&root, cn.corename,
0694                            open_flags, 0600);
0695             path_put(&root);
0696         } else {
0697             cprm.file = filp_open(cn.corename, open_flags, 0600);
0698         }
0699         if (IS_ERR(cprm.file))
0700             goto fail_unlock;
0701 
0702         inode = file_inode(cprm.file);
0703         if (inode->i_nlink > 1)
0704             goto close_fail;
0705         if (d_unhashed(cprm.file->f_path.dentry))
0706             goto close_fail;
0707         /*
0708          * AK: actually i see no reason to not allow this for named
0709          * pipes etc, but keep the previous behaviour for now.
0710          */
0711         if (!S_ISREG(inode->i_mode))
0712             goto close_fail;
0713         /*
0714          * Don't dump core if the filesystem changed owner or mode
0715          * of the file during file creation. This is an issue when
0716          * a process dumps core while its cwd is e.g. on a vfat
0717          * filesystem.
0718          */
0719         mnt_userns = file_mnt_user_ns(cprm.file);
0720         if (!uid_eq(i_uid_into_mnt(mnt_userns, inode),
0721                 current_fsuid())) {
0722             pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n",
0723                         cn.corename);
0724             goto close_fail;
0725         }
0726         if ((inode->i_mode & 0677) != 0600) {
0727             pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n",
0728                         cn.corename);
0729             goto close_fail;
0730         }
0731         if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
0732             goto close_fail;
0733         if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
0734                 0, 0, cprm.file))
0735             goto close_fail;
0736     }
0737 
0738     /* get us an unshared descriptor table; almost always a no-op */
0739     /* The cell spufs coredump code reads the file descriptor tables */
0740     retval = unshare_files();
0741     if (retval)
0742         goto close_fail;
0743     if (!dump_interrupted()) {
0744         /*
0745          * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
0746          * have this set to NULL.
0747          */
0748         if (!cprm.file) {
0749             pr_info("Core dump to |%s disabled\n", cn.corename);
0750             goto close_fail;
0751         }
0752         if (!dump_vma_snapshot(&cprm))
0753             goto close_fail;
0754 
0755         file_start_write(cprm.file);
0756         core_dumped = binfmt->core_dump(&cprm);
0757         /*
0758          * Ensures that file size is big enough to contain the current
0759          * file postion. This prevents gdb from complaining about
0760          * a truncated file if the last "write" to the file was
0761          * dump_skip.
0762          */
0763         if (cprm.to_skip) {
0764             cprm.to_skip--;
0765             dump_emit(&cprm, "", 1);
0766         }
0767         file_end_write(cprm.file);
0768         free_vma_snapshot(&cprm);
0769     }
0770     if (ispipe && core_pipe_limit)
0771         wait_for_dump_helpers(cprm.file);
0772 close_fail:
0773     if (cprm.file)
0774         filp_close(cprm.file, NULL);
0775 fail_dropcount:
0776     if (ispipe)
0777         atomic_dec(&core_dump_count);
0778 fail_unlock:
0779     kfree(argv);
0780     kfree(cn.corename);
0781     coredump_finish(core_dumped);
0782     revert_creds(old_cred);
0783 fail_creds:
0784     put_cred(cred);
0785 fail:
0786     return;
0787 }
0788 
0789 /*
0790  * Core dumping helper functions.  These are the only things you should
0791  * do on a core-file: use only these functions to write out all the
0792  * necessary info.
0793  */
0794 static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
0795 {
0796     struct file *file = cprm->file;
0797     loff_t pos = file->f_pos;
0798     ssize_t n;
0799     if (cprm->written + nr > cprm->limit)
0800         return 0;
0801 
0802 
0803     if (dump_interrupted())
0804         return 0;
0805     n = __kernel_write(file, addr, nr, &pos);
0806     if (n != nr)
0807         return 0;
0808     file->f_pos = pos;
0809     cprm->written += n;
0810     cprm->pos += n;
0811 
0812     return 1;
0813 }
0814 
0815 static int __dump_skip(struct coredump_params *cprm, size_t nr)
0816 {
0817     static char zeroes[PAGE_SIZE];
0818     struct file *file = cprm->file;
0819     if (file->f_mode & FMODE_LSEEK) {
0820         if (dump_interrupted() ||
0821             vfs_llseek(file, nr, SEEK_CUR) < 0)
0822             return 0;
0823         cprm->pos += nr;
0824         return 1;
0825     } else {
0826         while (nr > PAGE_SIZE) {
0827             if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
0828                 return 0;
0829             nr -= PAGE_SIZE;
0830         }
0831         return __dump_emit(cprm, zeroes, nr);
0832     }
0833 }
0834 
0835 static int dump_emit_page(struct coredump_params *cprm, struct page *page)
0836 {
0837     struct bio_vec bvec = {
0838         .bv_page    = page,
0839         .bv_offset  = 0,
0840         .bv_len     = PAGE_SIZE,
0841     };
0842     struct iov_iter iter;
0843     struct file *file = cprm->file;
0844     loff_t pos;
0845     ssize_t n;
0846 
0847     if (cprm->to_skip) {
0848         if (!__dump_skip(cprm, cprm->to_skip))
0849             return 0;
0850         cprm->to_skip = 0;
0851     }
0852     if (cprm->written + PAGE_SIZE > cprm->limit)
0853         return 0;
0854     if (dump_interrupted())
0855         return 0;
0856     pos = file->f_pos;
0857     iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
0858     n = __kernel_write_iter(cprm->file, &iter, &pos);
0859     if (n != PAGE_SIZE)
0860         return 0;
0861     file->f_pos = pos;
0862     cprm->written += PAGE_SIZE;
0863     cprm->pos += PAGE_SIZE;
0864 
0865     return 1;
0866 }
0867 
0868 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
0869 {
0870     if (cprm->to_skip) {
0871         if (!__dump_skip(cprm, cprm->to_skip))
0872             return 0;
0873         cprm->to_skip = 0;
0874     }
0875     return __dump_emit(cprm, addr, nr);
0876 }
0877 EXPORT_SYMBOL(dump_emit);
0878 
0879 void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
0880 {
0881     cprm->to_skip = pos - cprm->pos;
0882 }
0883 EXPORT_SYMBOL(dump_skip_to);
0884 
0885 void dump_skip(struct coredump_params *cprm, size_t nr)
0886 {
0887     cprm->to_skip += nr;
0888 }
0889 EXPORT_SYMBOL(dump_skip);
0890 
0891 #ifdef CONFIG_ELF_CORE
0892 int dump_user_range(struct coredump_params *cprm, unsigned long start,
0893             unsigned long len)
0894 {
0895     unsigned long addr;
0896 
0897     for (addr = start; addr < start + len; addr += PAGE_SIZE) {
0898         struct page *page;
0899 
0900         /*
0901          * To avoid having to allocate page tables for virtual address
0902          * ranges that have never been used yet, and also to make it
0903          * easy to generate sparse core files, use a helper that returns
0904          * NULL when encountering an empty page table entry that would
0905          * otherwise have been filled with the zero page.
0906          */
0907         page = get_dump_page(addr);
0908         if (page) {
0909             int stop = !dump_emit_page(cprm, page);
0910             put_page(page);
0911             if (stop)
0912                 return 0;
0913         } else {
0914             dump_skip(cprm, PAGE_SIZE);
0915         }
0916     }
0917     return 1;
0918 }
0919 #endif
0920 
0921 int dump_align(struct coredump_params *cprm, int align)
0922 {
0923     unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1);
0924     if (align & (align - 1))
0925         return 0;
0926     if (mod)
0927         cprm->to_skip += align - mod;
0928     return 1;
0929 }
0930 EXPORT_SYMBOL(dump_align);
0931 
0932 #ifdef CONFIG_SYSCTL
0933 
0934 void validate_coredump_safety(void)
0935 {
0936     if (suid_dumpable == SUID_DUMP_ROOT &&
0937         core_pattern[0] != '/' && core_pattern[0] != '|') {
0938         pr_warn(
0939 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
0940 "Pipe handler or fully qualified core dump path required.\n"
0941 "Set kernel.core_pattern before fs.suid_dumpable.\n"
0942         );
0943     }
0944 }
0945 
0946 static int proc_dostring_coredump(struct ctl_table *table, int write,
0947           void *buffer, size_t *lenp, loff_t *ppos)
0948 {
0949     int error = proc_dostring(table, write, buffer, lenp, ppos);
0950 
0951     if (!error)
0952         validate_coredump_safety();
0953     return error;
0954 }
0955 
0956 static struct ctl_table coredump_sysctls[] = {
0957     {
0958         .procname   = "core_uses_pid",
0959         .data       = &core_uses_pid,
0960         .maxlen     = sizeof(int),
0961         .mode       = 0644,
0962         .proc_handler   = proc_dointvec,
0963     },
0964     {
0965         .procname   = "core_pattern",
0966         .data       = core_pattern,
0967         .maxlen     = CORENAME_MAX_SIZE,
0968         .mode       = 0644,
0969         .proc_handler   = proc_dostring_coredump,
0970     },
0971     {
0972         .procname   = "core_pipe_limit",
0973         .data       = &core_pipe_limit,
0974         .maxlen     = sizeof(unsigned int),
0975         .mode       = 0644,
0976         .proc_handler   = proc_dointvec,
0977     },
0978     { }
0979 };
0980 
0981 static int __init init_fs_coredump_sysctls(void)
0982 {
0983     register_sysctl_init("kernel", coredump_sysctls);
0984     return 0;
0985 }
0986 fs_initcall(init_fs_coredump_sysctls);
0987 #endif /* CONFIG_SYSCTL */
0988 
0989 /*
0990  * The purpose of always_dump_vma() is to make sure that special kernel mappings
0991  * that are useful for post-mortem analysis are included in every core dump.
0992  * In that way we ensure that the core dump is fully interpretable later
0993  * without matching up the same kernel and hardware config to see what PC values
0994  * meant. These special mappings include - vDSO, vsyscall, and other
0995  * architecture specific mappings
0996  */
0997 static bool always_dump_vma(struct vm_area_struct *vma)
0998 {
0999     /* Any vsyscall mappings? */
1000     if (vma == get_gate_vma(vma->vm_mm))
1001         return true;
1002 
1003     /*
1004      * Assume that all vmas with a .name op should always be dumped.
1005      * If this changes, a new vm_ops field can easily be added.
1006      */
1007     if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1008         return true;
1009 
1010     /*
1011      * arch_vma_name() returns non-NULL for special architecture mappings,
1012      * such as vDSO sections.
1013      */
1014     if (arch_vma_name(vma))
1015         return true;
1016 
1017     return false;
1018 }
1019 
1020 #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
1021 
1022 /*
1023  * Decide how much of @vma's contents should be included in a core dump.
1024  */
1025 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1026                    unsigned long mm_flags)
1027 {
1028 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1029 
1030     /* always dump the vdso and vsyscall sections */
1031     if (always_dump_vma(vma))
1032         goto whole;
1033 
1034     if (vma->vm_flags & VM_DONTDUMP)
1035         return 0;
1036 
1037     /* support for DAX */
1038     if (vma_is_dax(vma)) {
1039         if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1040             goto whole;
1041         if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1042             goto whole;
1043         return 0;
1044     }
1045 
1046     /* Hugetlb memory check */
1047     if (is_vm_hugetlb_page(vma)) {
1048         if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1049             goto whole;
1050         if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1051             goto whole;
1052         return 0;
1053     }
1054 
1055     /* Do not dump I/O mapped devices or special mappings */
1056     if (vma->vm_flags & VM_IO)
1057         return 0;
1058 
1059     /* By default, dump shared memory if mapped from an anonymous file. */
1060     if (vma->vm_flags & VM_SHARED) {
1061         if (file_inode(vma->vm_file)->i_nlink == 0 ?
1062             FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1063             goto whole;
1064         return 0;
1065     }
1066 
1067     /* Dump segments that have been written to.  */
1068     if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
1069         goto whole;
1070     if (vma->vm_file == NULL)
1071         return 0;
1072 
1073     if (FILTER(MAPPED_PRIVATE))
1074         goto whole;
1075 
1076     /*
1077      * If this is the beginning of an executable file mapping,
1078      * dump the first page to aid in determining what was mapped here.
1079      */
1080     if (FILTER(ELF_HEADERS) &&
1081         vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1082         if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
1083             return PAGE_SIZE;
1084 
1085         /*
1086          * ELF libraries aren't always executable.
1087          * We'll want to check whether the mapping starts with the ELF
1088          * magic, but not now - we're holding the mmap lock,
1089          * so copy_from_user() doesn't work here.
1090          * Use a placeholder instead, and fix it up later in
1091          * dump_vma_snapshot().
1092          */
1093         return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
1094     }
1095 
1096 #undef  FILTER
1097 
1098     return 0;
1099 
1100 whole:
1101     return vma->vm_end - vma->vm_start;
1102 }
1103 
1104 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1105                     struct vm_area_struct *gate_vma)
1106 {
1107     struct vm_area_struct *ret = tsk->mm->mmap;
1108 
1109     if (ret)
1110         return ret;
1111     return gate_vma;
1112 }
1113 
1114 /*
1115  * Helper function for iterating across a vma list.  It ensures that the caller
1116  * will visit `gate_vma' prior to terminating the search.
1117  */
1118 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1119                        struct vm_area_struct *gate_vma)
1120 {
1121     struct vm_area_struct *ret;
1122 
1123     ret = this_vma->vm_next;
1124     if (ret)
1125         return ret;
1126     if (this_vma == gate_vma)
1127         return NULL;
1128     return gate_vma;
1129 }
1130 
1131 static void free_vma_snapshot(struct coredump_params *cprm)
1132 {
1133     if (cprm->vma_meta) {
1134         int i;
1135         for (i = 0; i < cprm->vma_count; i++) {
1136             struct file *file = cprm->vma_meta[i].file;
1137             if (file)
1138                 fput(file);
1139         }
1140         kvfree(cprm->vma_meta);
1141         cprm->vma_meta = NULL;
1142     }
1143 }
1144 
1145 /*
1146  * Under the mmap_lock, take a snapshot of relevant information about the task's
1147  * VMAs.
1148  */
1149 static bool dump_vma_snapshot(struct coredump_params *cprm)
1150 {
1151     struct vm_area_struct *vma, *gate_vma;
1152     struct mm_struct *mm = current->mm;
1153     int i;
1154 
1155     /*
1156      * Once the stack expansion code is fixed to not change VMA bounds
1157      * under mmap_lock in read mode, this can be changed to take the
1158      * mmap_lock in read mode.
1159      */
1160     if (mmap_write_lock_killable(mm))
1161         return false;
1162 
1163     cprm->vma_data_size = 0;
1164     gate_vma = get_gate_vma(mm);
1165     cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
1166 
1167     cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
1168     if (!cprm->vma_meta) {
1169         mmap_write_unlock(mm);
1170         return false;
1171     }
1172 
1173     for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
1174             vma = next_vma(vma, gate_vma), i++) {
1175         struct core_vma_metadata *m = cprm->vma_meta + i;
1176 
1177         m->start = vma->vm_start;
1178         m->end = vma->vm_end;
1179         m->flags = vma->vm_flags;
1180         m->dump_size = vma_dump_size(vma, cprm->mm_flags);
1181         m->pgoff = vma->vm_pgoff;
1182 
1183         m->file = vma->vm_file;
1184         if (m->file)
1185             get_file(m->file);
1186     }
1187 
1188     mmap_write_unlock(mm);
1189 
1190     for (i = 0; i < cprm->vma_count; i++) {
1191         struct core_vma_metadata *m = cprm->vma_meta + i;
1192 
1193         if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
1194             char elfmag[SELFMAG];
1195 
1196             if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
1197                     memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
1198                 m->dump_size = 0;
1199             } else {
1200                 m->dump_size = PAGE_SIZE;
1201             }
1202         }
1203 
1204         cprm->vma_data_size += m->dump_size;
1205     }
1206 
1207     return true;
1208 }