Back to home page

LXR

 
 

    


0001 #include <linux/slab.h>
0002 #include <linux/file.h>
0003 #include <linux/fdtable.h>
0004 #include <linux/freezer.h>
0005 #include <linux/mm.h>
0006 #include <linux/stat.h>
0007 #include <linux/fcntl.h>
0008 #include <linux/swap.h>
0009 #include <linux/string.h>
0010 #include <linux/init.h>
0011 #include <linux/pagemap.h>
0012 #include <linux/perf_event.h>
0013 #include <linux/highmem.h>
0014 #include <linux/spinlock.h>
0015 #include <linux/key.h>
0016 #include <linux/personality.h>
0017 #include <linux/binfmts.h>
0018 #include <linux/coredump.h>
0019 #include <linux/utsname.h>
0020 #include <linux/pid_namespace.h>
0021 #include <linux/module.h>
0022 #include <linux/namei.h>
0023 #include <linux/mount.h>
0024 #include <linux/security.h>
0025 #include <linux/syscalls.h>
0026 #include <linux/tsacct_kern.h>
0027 #include <linux/cn_proc.h>
0028 #include <linux/audit.h>
0029 #include <linux/tracehook.h>
0030 #include <linux/kmod.h>
0031 #include <linux/fsnotify.h>
0032 #include <linux/fs_struct.h>
0033 #include <linux/pipe_fs_i.h>
0034 #include <linux/oom.h>
0035 #include <linux/compat.h>
0036 #include <linux/sched.h>
0037 #include <linux/fs.h>
0038 #include <linux/path.h>
0039 #include <linux/timekeeping.h>
0040 
0041 #include <linux/uaccess.h>
0042 #include <asm/mmu_context.h>
0043 #include <asm/tlb.h>
0044 #include <asm/exec.h>
0045 
0046 #include <trace/events/task.h>
0047 #include "internal.h"
0048 
0049 #include <trace/events/sched.h>
0050 
0051 int core_uses_pid;
0052 unsigned int core_pipe_limit;
0053 char core_pattern[CORENAME_MAX_SIZE] = "core";
0054 static int core_name_size = CORENAME_MAX_SIZE;
0055 
0056 struct core_name {
0057     char *corename;
0058     int used, size;
0059 };
0060 
0061 /* The maximal length of core_pattern is also specified in sysctl.c */
0062 
0063 static int expand_corename(struct core_name *cn, int size)
0064 {
0065     char *corename = krealloc(cn->corename, size, GFP_KERNEL);
0066 
0067     if (!corename)
0068         return -ENOMEM;
0069 
0070     if (size > core_name_size) /* racy but harmless */
0071         core_name_size = size;
0072 
0073     cn->size = ksize(corename);
0074     cn->corename = corename;
0075     return 0;
0076 }
0077 
0078 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
0079                      va_list arg)
0080 {
0081     int free, need;
0082     va_list arg_copy;
0083 
0084 again:
0085     free = cn->size - cn->used;
0086 
0087     va_copy(arg_copy, arg);
0088     need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
0089     va_end(arg_copy);
0090 
0091     if (need < free) {
0092         cn->used += need;
0093         return 0;
0094     }
0095 
0096     if (!expand_corename(cn, cn->size + need - free + 1))
0097         goto again;
0098 
0099     return -ENOMEM;
0100 }
0101 
0102 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
0103 {
0104     va_list arg;
0105     int ret;
0106 
0107     va_start(arg, fmt);
0108     ret = cn_vprintf(cn, fmt, arg);
0109     va_end(arg);
0110 
0111     return ret;
0112 }
0113 
0114 static __printf(2, 3)
0115 int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
0116 {
0117     int cur = cn->used;
0118     va_list arg;
0119     int ret;
0120 
0121     va_start(arg, fmt);
0122     ret = cn_vprintf(cn, fmt, arg);
0123     va_end(arg);
0124 
0125     if (ret == 0) {
0126         /*
0127          * Ensure that this coredump name component can't cause the
0128          * resulting corefile path to consist of a ".." or ".".
0129          */
0130         if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
0131                 (cn->used - cur == 2 && cn->corename[cur] == '.'
0132                 && cn->corename[cur+1] == '.'))
0133             cn->corename[cur] = '!';
0134 
0135         /*
0136          * Empty names are fishy and could be used to create a "//" in a
0137          * corefile name, causing the coredump to happen one directory
0138          * level too high. Enforce that all components of the core
0139          * pattern are at least one character long.
0140          */
0141         if (cn->used == cur)
0142             ret = cn_printf(cn, "!");
0143     }
0144 
0145     for (; cur < cn->used; ++cur) {
0146         if (cn->corename[cur] == '/')
0147             cn->corename[cur] = '!';
0148     }
0149     return ret;
0150 }
0151 
0152 static int cn_print_exe_file(struct core_name *cn)
0153 {
0154     struct file *exe_file;
0155     char *pathbuf, *path;
0156     int ret;
0157 
0158     exe_file = get_mm_exe_file(current->mm);
0159     if (!exe_file)
0160         return cn_esc_printf(cn, "%s (path unknown)", current->comm);
0161 
0162     pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
0163     if (!pathbuf) {
0164         ret = -ENOMEM;
0165         goto put_exe_file;
0166     }
0167 
0168     path = file_path(exe_file, pathbuf, PATH_MAX);
0169     if (IS_ERR(path)) {
0170         ret = PTR_ERR(path);
0171         goto free_buf;
0172     }
0173 
0174     ret = cn_esc_printf(cn, "%s", path);
0175 
0176 free_buf:
0177     kfree(pathbuf);
0178 put_exe_file:
0179     fput(exe_file);
0180     return ret;
0181 }
0182 
0183 /* format_corename will inspect the pattern parameter, and output a
0184  * name into corename, which must have space for at least
0185  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
0186  */
0187 static int format_corename(struct core_name *cn, struct coredump_params *cprm)
0188 {
0189     const struct cred *cred = current_cred();
0190     const char *pat_ptr = core_pattern;
0191     int ispipe = (*pat_ptr == '|');
0192     int pid_in_pattern = 0;
0193     int err = 0;
0194 
0195     cn->used = 0;
0196     cn->corename = NULL;
0197     if (expand_corename(cn, core_name_size))
0198         return -ENOMEM;
0199     cn->corename[0] = '\0';
0200 
0201     if (ispipe)
0202         ++pat_ptr;
0203 
0204     /* Repeat as long as we have more pattern to process and more output
0205        space */
0206     while (*pat_ptr) {
0207         if (*pat_ptr != '%') {
0208             err = cn_printf(cn, "%c", *pat_ptr++);
0209         } else {
0210             switch (*++pat_ptr) {
0211             /* single % at the end, drop that */
0212             case 0:
0213                 goto out;
0214             /* Double percent, output one percent */
0215             case '%':
0216                 err = cn_printf(cn, "%c", '%');
0217                 break;
0218             /* pid */
0219             case 'p':
0220                 pid_in_pattern = 1;
0221                 err = cn_printf(cn, "%d",
0222                           task_tgid_vnr(current));
0223                 break;
0224             /* global pid */
0225             case 'P':
0226                 err = cn_printf(cn, "%d",
0227                           task_tgid_nr(current));
0228                 break;
0229             case 'i':
0230                 err = cn_printf(cn, "%d",
0231                           task_pid_vnr(current));
0232                 break;
0233             case 'I':
0234                 err = cn_printf(cn, "%d",
0235                           task_pid_nr(current));
0236                 break;
0237             /* uid */
0238             case 'u':
0239                 err = cn_printf(cn, "%u",
0240                         from_kuid(&init_user_ns,
0241                               cred->uid));
0242                 break;
0243             /* gid */
0244             case 'g':
0245                 err = cn_printf(cn, "%u",
0246                         from_kgid(&init_user_ns,
0247                               cred->gid));
0248                 break;
0249             case 'd':
0250                 err = cn_printf(cn, "%d",
0251                     __get_dumpable(cprm->mm_flags));
0252                 break;
0253             /* signal that caused the coredump */
0254             case 's':
0255                 err = cn_printf(cn, "%d",
0256                         cprm->siginfo->si_signo);
0257                 break;
0258             /* UNIX time of coredump */
0259             case 't': {
0260                 time64_t time;
0261 
0262                 time = ktime_get_real_seconds();
0263                 err = cn_printf(cn, "%lld", time);
0264                 break;
0265             }
0266             /* hostname */
0267             case 'h':
0268                 down_read(&uts_sem);
0269                 err = cn_esc_printf(cn, "%s",
0270                           utsname()->nodename);
0271                 up_read(&uts_sem);
0272                 break;
0273             /* executable */
0274             case 'e':
0275                 err = cn_esc_printf(cn, "%s", current->comm);
0276                 break;
0277             case 'E':
0278                 err = cn_print_exe_file(cn);
0279                 break;
0280             /* core limit size */
0281             case 'c':
0282                 err = cn_printf(cn, "%lu",
0283                           rlimit(RLIMIT_CORE));
0284                 break;
0285             default:
0286                 break;
0287             }
0288             ++pat_ptr;
0289         }
0290 
0291         if (err)
0292             return err;
0293     }
0294 
0295 out:
0296     /* Backward compatibility with core_uses_pid:
0297      *
0298      * If core_pattern does not include a %p (as is the default)
0299      * and core_uses_pid is set, then .%pid will be appended to
0300      * the filename. Do not do this for piped commands. */
0301     if (!ispipe && !pid_in_pattern && core_uses_pid) {
0302         err = cn_printf(cn, ".%d", task_tgid_vnr(current));
0303         if (err)
0304             return err;
0305     }
0306     return ispipe;
0307 }
0308 
0309 static int zap_process(struct task_struct *start, int exit_code, int flags)
0310 {
0311     struct task_struct *t;
0312     int nr = 0;
0313 
0314     /* ignore all signals except SIGKILL, see prepare_signal() */
0315     start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
0316     start->signal->group_exit_code = exit_code;
0317     start->signal->group_stop_count = 0;
0318 
0319     for_each_thread(start, t) {
0320         task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
0321         if (t != current && t->mm) {
0322             sigaddset(&t->pending.signal, SIGKILL);
0323             signal_wake_up(t, 1);
0324             nr++;
0325         }
0326     }
0327 
0328     return nr;
0329 }
0330 
0331 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
0332             struct core_state *core_state, int exit_code)
0333 {
0334     struct task_struct *g, *p;
0335     unsigned long flags;
0336     int nr = -EAGAIN;
0337 
0338     spin_lock_irq(&tsk->sighand->siglock);
0339     if (!signal_group_exit(tsk->signal)) {
0340         mm->core_state = core_state;
0341         tsk->signal->group_exit_task = tsk;
0342         nr = zap_process(tsk, exit_code, 0);
0343         clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
0344     }
0345     spin_unlock_irq(&tsk->sighand->siglock);
0346     if (unlikely(nr < 0))
0347         return nr;
0348 
0349     tsk->flags |= PF_DUMPCORE;
0350     if (atomic_read(&mm->mm_users) == nr + 1)
0351         goto done;
0352     /*
0353      * We should find and kill all tasks which use this mm, and we should
0354      * count them correctly into ->nr_threads. We don't take tasklist
0355      * lock, but this is safe wrt:
0356      *
0357      * fork:
0358      *  None of sub-threads can fork after zap_process(leader). All
0359      *  processes which were created before this point should be
0360      *  visible to zap_threads() because copy_process() adds the new
0361      *  process to the tail of init_task.tasks list, and lock/unlock
0362      *  of ->siglock provides a memory barrier.
0363      *
0364      * do_exit:
0365      *  The caller holds mm->mmap_sem. This means that the task which
0366      *  uses this mm can't pass exit_mm(), so it can't exit or clear
0367      *  its ->mm.
0368      *
0369      * de_thread:
0370      *  It does list_replace_rcu(&leader->tasks, &current->tasks),
0371      *  we must see either old or new leader, this does not matter.
0372      *  However, it can change p->sighand, so lock_task_sighand(p)
0373      *  must be used. Since p->mm != NULL and we hold ->mmap_sem
0374      *  it can't fail.
0375      *
0376      *  Note also that "g" can be the old leader with ->mm == NULL
0377      *  and already unhashed and thus removed from ->thread_group.
0378      *  This is OK, __unhash_process()->list_del_rcu() does not
0379      *  clear the ->next pointer, we will find the new leader via
0380      *  next_thread().
0381      */
0382     rcu_read_lock();
0383     for_each_process(g) {
0384         if (g == tsk->group_leader)
0385             continue;
0386         if (g->flags & PF_KTHREAD)
0387             continue;
0388 
0389         for_each_thread(g, p) {
0390             if (unlikely(!p->mm))
0391                 continue;
0392             if (unlikely(p->mm == mm)) {
0393                 lock_task_sighand(p, &flags);
0394                 nr += zap_process(p, exit_code,
0395                             SIGNAL_GROUP_EXIT);
0396                 unlock_task_sighand(p, &flags);
0397             }
0398             break;
0399         }
0400     }
0401     rcu_read_unlock();
0402 done:
0403     atomic_set(&core_state->nr_threads, nr);
0404     return nr;
0405 }
0406 
0407 static int coredump_wait(int exit_code, struct core_state *core_state)
0408 {
0409     struct task_struct *tsk = current;
0410     struct mm_struct *mm = tsk->mm;
0411     int core_waiters = -EBUSY;
0412 
0413     init_completion(&core_state->startup);
0414     core_state->dumper.task = tsk;
0415     core_state->dumper.next = NULL;
0416 
0417     if (down_write_killable(&mm->mmap_sem))
0418         return -EINTR;
0419 
0420     if (!mm->core_state)
0421         core_waiters = zap_threads(tsk, mm, core_state, exit_code);
0422     up_write(&mm->mmap_sem);
0423 
0424     if (core_waiters > 0) {
0425         struct core_thread *ptr;
0426 
0427         freezer_do_not_count();
0428         wait_for_completion(&core_state->startup);
0429         freezer_count();
0430         /*
0431          * Wait for all the threads to become inactive, so that
0432          * all the thread context (extended register state, like
0433          * fpu etc) gets copied to the memory.
0434          */
0435         ptr = core_state->dumper.next;
0436         while (ptr != NULL) {
0437             wait_task_inactive(ptr->task, 0);
0438             ptr = ptr->next;
0439         }
0440     }
0441 
0442     return core_waiters;
0443 }
0444 
0445 static void coredump_finish(struct mm_struct *mm, bool core_dumped)
0446 {
0447     struct core_thread *curr, *next;
0448     struct task_struct *task;
0449 
0450     spin_lock_irq(&current->sighand->siglock);
0451     if (core_dumped && !__fatal_signal_pending(current))
0452         current->signal->group_exit_code |= 0x80;
0453     current->signal->group_exit_task = NULL;
0454     current->signal->flags = SIGNAL_GROUP_EXIT;
0455     spin_unlock_irq(&current->sighand->siglock);
0456 
0457     next = mm->core_state->dumper.next;
0458     while ((curr = next) != NULL) {
0459         next = curr->next;
0460         task = curr->task;
0461         /*
0462          * see exit_mm(), curr->task must not see
0463          * ->task == NULL before we read ->next.
0464          */
0465         smp_mb();
0466         curr->task = NULL;
0467         wake_up_process(task);
0468     }
0469 
0470     mm->core_state = NULL;
0471 }
0472 
0473 static bool dump_interrupted(void)
0474 {
0475     /*
0476      * SIGKILL or freezing() interrupt the coredumping. Perhaps we
0477      * can do try_to_freeze() and check __fatal_signal_pending(),
0478      * but then we need to teach dump_write() to restart and clear
0479      * TIF_SIGPENDING.
0480      */
0481     return signal_pending(current);
0482 }
0483 
0484 static void wait_for_dump_helpers(struct file *file)
0485 {
0486     struct pipe_inode_info *pipe = file->private_data;
0487 
0488     pipe_lock(pipe);
0489     pipe->readers++;
0490     pipe->writers--;
0491     wake_up_interruptible_sync(&pipe->wait);
0492     kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0493     pipe_unlock(pipe);
0494 
0495     /*
0496      * We actually want wait_event_freezable() but then we need
0497      * to clear TIF_SIGPENDING and improve dump_interrupted().
0498      */
0499     wait_event_interruptible(pipe->wait, pipe->readers == 1);
0500 
0501     pipe_lock(pipe);
0502     pipe->readers--;
0503     pipe->writers++;
0504     pipe_unlock(pipe);
0505 }
0506 
0507 /*
0508  * umh_pipe_setup
0509  * helper function to customize the process used
0510  * to collect the core in userspace.  Specifically
0511  * it sets up a pipe and installs it as fd 0 (stdin)
0512  * for the process.  Returns 0 on success, or
0513  * PTR_ERR on failure.
0514  * Note that it also sets the core limit to 1.  This
0515  * is a special value that we use to trap recursive
0516  * core dumps
0517  */
0518 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
0519 {
0520     struct file *files[2];
0521     struct coredump_params *cp = (struct coredump_params *)info->data;
0522     int err = create_pipe_files(files, 0);
0523     if (err)
0524         return err;
0525 
0526     cp->file = files[1];
0527 
0528     err = replace_fd(0, files[0], 0);
0529     fput(files[0]);
0530     /* and disallow core files too */
0531     current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
0532 
0533     return err;
0534 }
0535 
0536 void do_coredump(const siginfo_t *siginfo)
0537 {
0538     struct core_state core_state;
0539     struct core_name cn;
0540     struct mm_struct *mm = current->mm;
0541     struct linux_binfmt * binfmt;
0542     const struct cred *old_cred;
0543     struct cred *cred;
0544     int retval = 0;
0545     int ispipe;
0546     struct files_struct *displaced;
0547     /* require nonrelative corefile path and be extra careful */
0548     bool need_suid_safe = false;
0549     bool core_dumped = false;
0550     static atomic_t core_dump_count = ATOMIC_INIT(0);
0551     struct coredump_params cprm = {
0552         .siginfo = siginfo,
0553         .regs = signal_pt_regs(),
0554         .limit = rlimit(RLIMIT_CORE),
0555         /*
0556          * We must use the same mm->flags while dumping core to avoid
0557          * inconsistency of bit flags, since this flag is not protected
0558          * by any locks.
0559          */
0560         .mm_flags = mm->flags,
0561     };
0562 
0563     audit_core_dumps(siginfo->si_signo);
0564 
0565     binfmt = mm->binfmt;
0566     if (!binfmt || !binfmt->core_dump)
0567         goto fail;
0568     if (!__get_dumpable(cprm.mm_flags))
0569         goto fail;
0570 
0571     cred = prepare_creds();
0572     if (!cred)
0573         goto fail;
0574     /*
0575      * We cannot trust fsuid as being the "true" uid of the process
0576      * nor do we know its entire history. We only know it was tainted
0577      * so we dump it as root in mode 2, and only into a controlled
0578      * environment (pipe handler or fully qualified path).
0579      */
0580     if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
0581         /* Setuid core dump mode */
0582         cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
0583         need_suid_safe = true;
0584     }
0585 
0586     retval = coredump_wait(siginfo->si_signo, &core_state);
0587     if (retval < 0)
0588         goto fail_creds;
0589 
0590     old_cred = override_creds(cred);
0591 
0592     ispipe = format_corename(&cn, &cprm);
0593 
0594     if (ispipe) {
0595         int dump_count;
0596         char **helper_argv;
0597         struct subprocess_info *sub_info;
0598 
0599         if (ispipe < 0) {
0600             printk(KERN_WARNING "format_corename failed\n");
0601             printk(KERN_WARNING "Aborting core\n");
0602             goto fail_unlock;
0603         }
0604 
0605         if (cprm.limit == 1) {
0606             /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
0607              *
0608              * Normally core limits are irrelevant to pipes, since
0609              * we're not writing to the file system, but we use
0610              * cprm.limit of 1 here as a special value, this is a
0611              * consistent way to catch recursive crashes.
0612              * We can still crash if the core_pattern binary sets
0613              * RLIM_CORE = !1, but it runs as root, and can do
0614              * lots of stupid things.
0615              *
0616              * Note that we use task_tgid_vnr here to grab the pid
0617              * of the process group leader.  That way we get the
0618              * right pid if a thread in a multi-threaded
0619              * core_pattern process dies.
0620              */
0621             printk(KERN_WARNING
0622                 "Process %d(%s) has RLIMIT_CORE set to 1\n",
0623                 task_tgid_vnr(current), current->comm);
0624             printk(KERN_WARNING "Aborting core\n");
0625             goto fail_unlock;
0626         }
0627         cprm.limit = RLIM_INFINITY;
0628 
0629         dump_count = atomic_inc_return(&core_dump_count);
0630         if (core_pipe_limit && (core_pipe_limit < dump_count)) {
0631             printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
0632                    task_tgid_vnr(current), current->comm);
0633             printk(KERN_WARNING "Skipping core dump\n");
0634             goto fail_dropcount;
0635         }
0636 
0637         helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
0638         if (!helper_argv) {
0639             printk(KERN_WARNING "%s failed to allocate memory\n",
0640                    __func__);
0641             goto fail_dropcount;
0642         }
0643 
0644         retval = -ENOMEM;
0645         sub_info = call_usermodehelper_setup(helper_argv[0],
0646                         helper_argv, NULL, GFP_KERNEL,
0647                         umh_pipe_setup, NULL, &cprm);
0648         if (sub_info)
0649             retval = call_usermodehelper_exec(sub_info,
0650                               UMH_WAIT_EXEC);
0651 
0652         argv_free(helper_argv);
0653         if (retval) {
0654             printk(KERN_INFO "Core dump to |%s pipe failed\n",
0655                    cn.corename);
0656             goto close_fail;
0657         }
0658     } else {
0659         struct inode *inode;
0660         int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
0661                  O_LARGEFILE | O_EXCL;
0662 
0663         if (cprm.limit < binfmt->min_coredump)
0664             goto fail_unlock;
0665 
0666         if (need_suid_safe && cn.corename[0] != '/') {
0667             printk(KERN_WARNING "Pid %d(%s) can only dump core "\
0668                 "to fully qualified path!\n",
0669                 task_tgid_vnr(current), current->comm);
0670             printk(KERN_WARNING "Skipping core dump\n");
0671             goto fail_unlock;
0672         }
0673 
0674         /*
0675          * Unlink the file if it exists unless this is a SUID
0676          * binary - in that case, we're running around with root
0677          * privs and don't want to unlink another user's coredump.
0678          */
0679         if (!need_suid_safe) {
0680             mm_segment_t old_fs;
0681 
0682             old_fs = get_fs();
0683             set_fs(KERNEL_DS);
0684             /*
0685              * If it doesn't exist, that's fine. If there's some
0686              * other problem, we'll catch it at the filp_open().
0687              */
0688             (void) sys_unlink((const char __user *)cn.corename);
0689             set_fs(old_fs);
0690         }
0691 
0692         /*
0693          * There is a race between unlinking and creating the
0694          * file, but if that causes an EEXIST here, that's
0695          * fine - another process raced with us while creating
0696          * the corefile, and the other process won. To userspace,
0697          * what matters is that at least one of the two processes
0698          * writes its coredump successfully, not which one.
0699          */
0700         if (need_suid_safe) {
0701             /*
0702              * Using user namespaces, normal user tasks can change
0703              * their current->fs->root to point to arbitrary
0704              * directories. Since the intention of the "only dump
0705              * with a fully qualified path" rule is to control where
0706              * coredumps may be placed using root privileges,
0707              * current->fs->root must not be used. Instead, use the
0708              * root directory of init_task.
0709              */
0710             struct path root;
0711 
0712             task_lock(&init_task);
0713             get_fs_root(init_task.fs, &root);
0714             task_unlock(&init_task);
0715             cprm.file = file_open_root(root.dentry, root.mnt,
0716                 cn.corename, open_flags, 0600);
0717             path_put(&root);
0718         } else {
0719             cprm.file = filp_open(cn.corename, open_flags, 0600);
0720         }
0721         if (IS_ERR(cprm.file))
0722             goto fail_unlock;
0723 
0724         inode = file_inode(cprm.file);
0725         if (inode->i_nlink > 1)
0726             goto close_fail;
0727         if (d_unhashed(cprm.file->f_path.dentry))
0728             goto close_fail;
0729         /*
0730          * AK: actually i see no reason to not allow this for named
0731          * pipes etc, but keep the previous behaviour for now.
0732          */
0733         if (!S_ISREG(inode->i_mode))
0734             goto close_fail;
0735         /*
0736          * Don't dump core if the filesystem changed owner or mode
0737          * of the file during file creation. This is an issue when
0738          * a process dumps core while its cwd is e.g. on a vfat
0739          * filesystem.
0740          */
0741         if (!uid_eq(inode->i_uid, current_fsuid()))
0742             goto close_fail;
0743         if ((inode->i_mode & 0677) != 0600)
0744             goto close_fail;
0745         if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
0746             goto close_fail;
0747         if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
0748             goto close_fail;
0749     }
0750 
0751     /* get us an unshared descriptor table; almost always a no-op */
0752     retval = unshare_files(&displaced);
0753     if (retval)
0754         goto close_fail;
0755     if (displaced)
0756         put_files_struct(displaced);
0757     if (!dump_interrupted()) {
0758         file_start_write(cprm.file);
0759         core_dumped = binfmt->core_dump(&cprm);
0760         file_end_write(cprm.file);
0761     }
0762     if (ispipe && core_pipe_limit)
0763         wait_for_dump_helpers(cprm.file);
0764 close_fail:
0765     if (cprm.file)
0766         filp_close(cprm.file, NULL);
0767 fail_dropcount:
0768     if (ispipe)
0769         atomic_dec(&core_dump_count);
0770 fail_unlock:
0771     kfree(cn.corename);
0772     coredump_finish(mm, core_dumped);
0773     revert_creds(old_cred);
0774 fail_creds:
0775     put_cred(cred);
0776 fail:
0777     return;
0778 }
0779 
0780 /*
0781  * Core dumping helper functions.  These are the only things you should
0782  * do on a core-file: use only these functions to write out all the
0783  * necessary info.
0784  */
0785 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
0786 {
0787     struct file *file = cprm->file;
0788     loff_t pos = file->f_pos;
0789     ssize_t n;
0790     if (cprm->written + nr > cprm->limit)
0791         return 0;
0792     while (nr) {
0793         if (dump_interrupted())
0794             return 0;
0795         n = __kernel_write(file, addr, nr, &pos);
0796         if (n <= 0)
0797             return 0;
0798         file->f_pos = pos;
0799         cprm->written += n;
0800         cprm->pos += n;
0801         nr -= n;
0802     }
0803     return 1;
0804 }
0805 EXPORT_SYMBOL(dump_emit);
0806 
0807 int dump_skip(struct coredump_params *cprm, size_t nr)
0808 {
0809     static char zeroes[PAGE_SIZE];
0810     struct file *file = cprm->file;
0811     if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
0812         if (dump_interrupted() ||
0813             file->f_op->llseek(file, nr, SEEK_CUR) < 0)
0814             return 0;
0815         cprm->pos += nr;
0816         return 1;
0817     } else {
0818         while (nr > PAGE_SIZE) {
0819             if (!dump_emit(cprm, zeroes, PAGE_SIZE))
0820                 return 0;
0821             nr -= PAGE_SIZE;
0822         }
0823         return dump_emit(cprm, zeroes, nr);
0824     }
0825 }
0826 EXPORT_SYMBOL(dump_skip);
0827 
0828 int dump_align(struct coredump_params *cprm, int align)
0829 {
0830     unsigned mod = cprm->pos & (align - 1);
0831     if (align & (align - 1))
0832         return 0;
0833     return mod ? dump_skip(cprm, align - mod) : 1;
0834 }
0835 EXPORT_SYMBOL(dump_align);
0836 
0837 /*
0838  * Ensures that file size is big enough to contain the current file
0839  * postion. This prevents gdb from complaining about a truncated file
0840  * if the last "write" to the file was dump_skip.
0841  */
0842 void dump_truncate(struct coredump_params *cprm)
0843 {
0844     struct file *file = cprm->file;
0845     loff_t offset;
0846 
0847     if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
0848         offset = file->f_op->llseek(file, 0, SEEK_CUR);
0849         if (i_size_read(file->f_mapping->host) < offset)
0850             do_truncate(file->f_path.dentry, offset, 0, file);
0851     }
0852 }
0853 EXPORT_SYMBOL(dump_truncate);