Back to home page

LXR

 
 

    


0001 /*
0002  * linux/fs/binfmt_elf.c
0003  *
0004  * These are the functions used to load ELF format executables as used
0005  * on SVr4 machines.  Information on the format may be found in the book
0006  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
0007  * Tools".
0008  *
0009  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
0010  */
0011 
0012 #include <linux/module.h>
0013 #include <linux/kernel.h>
0014 #include <linux/fs.h>
0015 #include <linux/mm.h>
0016 #include <linux/mman.h>
0017 #include <linux/errno.h>
0018 #include <linux/signal.h>
0019 #include <linux/binfmts.h>
0020 #include <linux/string.h>
0021 #include <linux/file.h>
0022 #include <linux/slab.h>
0023 #include <linux/personality.h>
0024 #include <linux/elfcore.h>
0025 #include <linux/init.h>
0026 #include <linux/highuid.h>
0027 #include <linux/compiler.h>
0028 #include <linux/highmem.h>
0029 #include <linux/pagemap.h>
0030 #include <linux/vmalloc.h>
0031 #include <linux/security.h>
0032 #include <linux/random.h>
0033 #include <linux/elf.h>
0034 #include <linux/elf-randomize.h>
0035 #include <linux/utsname.h>
0036 #include <linux/coredump.h>
0037 #include <linux/sched.h>
0038 #include <linux/dax.h>
0039 #include <linux/uaccess.h>
0040 #include <asm/param.h>
0041 #include <asm/page.h>
0042 
0043 #ifndef user_long_t
0044 #define user_long_t long
0045 #endif
0046 #ifndef user_siginfo_t
0047 #define user_siginfo_t siginfo_t
0048 #endif
0049 
0050 static int load_elf_binary(struct linux_binprm *bprm);
0051 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
0052                 int, int, unsigned long);
0053 
0054 #ifdef CONFIG_USELIB
0055 static int load_elf_library(struct file *);
0056 #else
0057 #define load_elf_library NULL
0058 #endif
0059 
0060 /*
0061  * If we don't support core dumping, then supply a NULL so we
0062  * don't even try.
0063  */
0064 #ifdef CONFIG_ELF_CORE
0065 static int elf_core_dump(struct coredump_params *cprm);
0066 #else
0067 #define elf_core_dump   NULL
0068 #endif
0069 
0070 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
0071 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
0072 #else
0073 #define ELF_MIN_ALIGN   PAGE_SIZE
0074 #endif
0075 
0076 #ifndef ELF_CORE_EFLAGS
0077 #define ELF_CORE_EFLAGS 0
0078 #endif
0079 
0080 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
0081 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
0082 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
0083 
0084 static struct linux_binfmt elf_format = {
0085     .module     = THIS_MODULE,
0086     .load_binary    = load_elf_binary,
0087     .load_shlib = load_elf_library,
0088     .core_dump  = elf_core_dump,
0089     .min_coredump   = ELF_EXEC_PAGESIZE,
0090 };
0091 
0092 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
0093 
0094 static int set_brk(unsigned long start, unsigned long end)
0095 {
0096     start = ELF_PAGEALIGN(start);
0097     end = ELF_PAGEALIGN(end);
0098     if (end > start) {
0099         int error = vm_brk(start, end - start);
0100         if (error)
0101             return error;
0102     }
0103     current->mm->start_brk = current->mm->brk = end;
0104     return 0;
0105 }
0106 
0107 /* We need to explicitly zero any fractional pages
0108    after the data section (i.e. bss).  This would
0109    contain the junk from the file that should not
0110    be in memory
0111  */
0112 static int padzero(unsigned long elf_bss)
0113 {
0114     unsigned long nbyte;
0115 
0116     nbyte = ELF_PAGEOFFSET(elf_bss);
0117     if (nbyte) {
0118         nbyte = ELF_MIN_ALIGN - nbyte;
0119         if (clear_user((void __user *) elf_bss, nbyte))
0120             return -EFAULT;
0121     }
0122     return 0;
0123 }
0124 
0125 /* Let's use some macros to make this stack manipulation a little clearer */
0126 #ifdef CONFIG_STACK_GROWSUP
0127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
0128 #define STACK_ROUND(sp, items) \
0129     ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
0130 #define STACK_ALLOC(sp, len) ({ \
0131     elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
0132     old_sp; })
0133 #else
0134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
0135 #define STACK_ROUND(sp, items) \
0136     (((unsigned long) (sp - items)) &~ 15UL)
0137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
0138 #endif
0139 
0140 #ifndef ELF_BASE_PLATFORM
0141 /*
0142  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
0143  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
0144  * will be copied to the user stack in the same manner as AT_PLATFORM.
0145  */
0146 #define ELF_BASE_PLATFORM NULL
0147 #endif
0148 
0149 static int
0150 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
0151         unsigned long load_addr, unsigned long interp_load_addr)
0152 {
0153     unsigned long p = bprm->p;
0154     int argc = bprm->argc;
0155     int envc = bprm->envc;
0156     elf_addr_t __user *argv;
0157     elf_addr_t __user *envp;
0158     elf_addr_t __user *sp;
0159     elf_addr_t __user *u_platform;
0160     elf_addr_t __user *u_base_platform;
0161     elf_addr_t __user *u_rand_bytes;
0162     const char *k_platform = ELF_PLATFORM;
0163     const char *k_base_platform = ELF_BASE_PLATFORM;
0164     unsigned char k_rand_bytes[16];
0165     int items;
0166     elf_addr_t *elf_info;
0167     int ei_index = 0;
0168     const struct cred *cred = current_cred();
0169     struct vm_area_struct *vma;
0170 
0171     /*
0172      * In some cases (e.g. Hyper-Threading), we want to avoid L1
0173      * evictions by the processes running on the same package. One
0174      * thing we can do is to shuffle the initial stack for them.
0175      */
0176 
0177     p = arch_align_stack(p);
0178 
0179     /*
0180      * If this architecture has a platform capability string, copy it
0181      * to userspace.  In some cases (Sparc), this info is impossible
0182      * for userspace to get any other way, in others (i386) it is
0183      * merely difficult.
0184      */
0185     u_platform = NULL;
0186     if (k_platform) {
0187         size_t len = strlen(k_platform) + 1;
0188 
0189         u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
0190         if (__copy_to_user(u_platform, k_platform, len))
0191             return -EFAULT;
0192     }
0193 
0194     /*
0195      * If this architecture has a "base" platform capability
0196      * string, copy it to userspace.
0197      */
0198     u_base_platform = NULL;
0199     if (k_base_platform) {
0200         size_t len = strlen(k_base_platform) + 1;
0201 
0202         u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
0203         if (__copy_to_user(u_base_platform, k_base_platform, len))
0204             return -EFAULT;
0205     }
0206 
0207     /*
0208      * Generate 16 random bytes for userspace PRNG seeding.
0209      */
0210     get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
0211     u_rand_bytes = (elf_addr_t __user *)
0212                STACK_ALLOC(p, sizeof(k_rand_bytes));
0213     if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
0214         return -EFAULT;
0215 
0216     /* Create the ELF interpreter info */
0217     elf_info = (elf_addr_t *)current->mm->saved_auxv;
0218     /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
0219 #define NEW_AUX_ENT(id, val) \
0220     do { \
0221         elf_info[ei_index++] = id; \
0222         elf_info[ei_index++] = val; \
0223     } while (0)
0224 
0225 #ifdef ARCH_DLINFO
0226     /* 
0227      * ARCH_DLINFO must come first so PPC can do its special alignment of
0228      * AUXV.
0229      * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
0230      * ARCH_DLINFO changes
0231      */
0232     ARCH_DLINFO;
0233 #endif
0234     NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
0235     NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
0236     NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
0237     NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
0238     NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
0239     NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
0240     NEW_AUX_ENT(AT_BASE, interp_load_addr);
0241     NEW_AUX_ENT(AT_FLAGS, 0);
0242     NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
0243     NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
0244     NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
0245     NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
0246     NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
0247     NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
0248     NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
0249 #ifdef ELF_HWCAP2
0250     NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
0251 #endif
0252     NEW_AUX_ENT(AT_EXECFN, bprm->exec);
0253     if (k_platform) {
0254         NEW_AUX_ENT(AT_PLATFORM,
0255                 (elf_addr_t)(unsigned long)u_platform);
0256     }
0257     if (k_base_platform) {
0258         NEW_AUX_ENT(AT_BASE_PLATFORM,
0259                 (elf_addr_t)(unsigned long)u_base_platform);
0260     }
0261     if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
0262         NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
0263     }
0264 #undef NEW_AUX_ENT
0265     /* AT_NULL is zero; clear the rest too */
0266     memset(&elf_info[ei_index], 0,
0267            sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
0268 
0269     /* And advance past the AT_NULL entry.  */
0270     ei_index += 2;
0271 
0272     sp = STACK_ADD(p, ei_index);
0273 
0274     items = (argc + 1) + (envc + 1) + 1;
0275     bprm->p = STACK_ROUND(sp, items);
0276 
0277     /* Point sp at the lowest address on the stack */
0278 #ifdef CONFIG_STACK_GROWSUP
0279     sp = (elf_addr_t __user *)bprm->p - items - ei_index;
0280     bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
0281 #else
0282     sp = (elf_addr_t __user *)bprm->p;
0283 #endif
0284 
0285 
0286     /*
0287      * Grow the stack manually; some architectures have a limit on how
0288      * far ahead a user-space access may be in order to grow the stack.
0289      */
0290     vma = find_extend_vma(current->mm, bprm->p);
0291     if (!vma)
0292         return -EFAULT;
0293 
0294     /* Now, let's put argc (and argv, envp if appropriate) on the stack */
0295     if (__put_user(argc, sp++))
0296         return -EFAULT;
0297     argv = sp;
0298     envp = argv + argc + 1;
0299 
0300     /* Populate argv and envp */
0301     p = current->mm->arg_end = current->mm->arg_start;
0302     while (argc-- > 0) {
0303         size_t len;
0304         if (__put_user((elf_addr_t)p, argv++))
0305             return -EFAULT;
0306         len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
0307         if (!len || len > MAX_ARG_STRLEN)
0308             return -EINVAL;
0309         p += len;
0310     }
0311     if (__put_user(0, argv))
0312         return -EFAULT;
0313     current->mm->arg_end = current->mm->env_start = p;
0314     while (envc-- > 0) {
0315         size_t len;
0316         if (__put_user((elf_addr_t)p, envp++))
0317             return -EFAULT;
0318         len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
0319         if (!len || len > MAX_ARG_STRLEN)
0320             return -EINVAL;
0321         p += len;
0322     }
0323     if (__put_user(0, envp))
0324         return -EFAULT;
0325     current->mm->env_end = p;
0326 
0327     /* Put the elf_info on the stack in the right place.  */
0328     sp = (elf_addr_t __user *)envp + 1;
0329     if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
0330         return -EFAULT;
0331     return 0;
0332 }
0333 
0334 #ifndef elf_map
0335 
0336 static unsigned long elf_map(struct file *filep, unsigned long addr,
0337         struct elf_phdr *eppnt, int prot, int type,
0338         unsigned long total_size)
0339 {
0340     unsigned long map_addr;
0341     unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
0342     unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
0343     addr = ELF_PAGESTART(addr);
0344     size = ELF_PAGEALIGN(size);
0345 
0346     /* mmap() will return -EINVAL if given a zero size, but a
0347      * segment with zero filesize is perfectly valid */
0348     if (!size)
0349         return addr;
0350 
0351     /*
0352     * total_size is the size of the ELF (interpreter) image.
0353     * The _first_ mmap needs to know the full size, otherwise
0354     * randomization might put this image into an overlapping
0355     * position with the ELF binary image. (since size < total_size)
0356     * So we first map the 'big' image - and unmap the remainder at
0357     * the end. (which unmap is needed for ELF images with holes.)
0358     */
0359     if (total_size) {
0360         total_size = ELF_PAGEALIGN(total_size);
0361         map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
0362         if (!BAD_ADDR(map_addr))
0363             vm_munmap(map_addr+size, total_size-size);
0364     } else
0365         map_addr = vm_mmap(filep, addr, size, prot, type, off);
0366 
0367     return(map_addr);
0368 }
0369 
0370 #endif /* !elf_map */
0371 
0372 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
0373 {
0374     int i, first_idx = -1, last_idx = -1;
0375 
0376     for (i = 0; i < nr; i++) {
0377         if (cmds[i].p_type == PT_LOAD) {
0378             last_idx = i;
0379             if (first_idx == -1)
0380                 first_idx = i;
0381         }
0382     }
0383     if (first_idx == -1)
0384         return 0;
0385 
0386     return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
0387                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
0388 }
0389 
0390 /**
0391  * load_elf_phdrs() - load ELF program headers
0392  * @elf_ex:   ELF header of the binary whose program headers should be loaded
0393  * @elf_file: the opened ELF binary file
0394  *
0395  * Loads ELF program headers from the binary file elf_file, which has the ELF
0396  * header pointed to by elf_ex, into a newly allocated array. The caller is
0397  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
0398  */
0399 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
0400                        struct file *elf_file)
0401 {
0402     struct elf_phdr *elf_phdata = NULL;
0403     int retval, size, err = -1;
0404 
0405     /*
0406      * If the size of this structure has changed, then punt, since
0407      * we will be doing the wrong thing.
0408      */
0409     if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
0410         goto out;
0411 
0412     /* Sanity check the number of program headers... */
0413     if (elf_ex->e_phnum < 1 ||
0414         elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
0415         goto out;
0416 
0417     /* ...and their total size. */
0418     size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
0419     if (size > ELF_MIN_ALIGN)
0420         goto out;
0421 
0422     elf_phdata = kmalloc(size, GFP_KERNEL);
0423     if (!elf_phdata)
0424         goto out;
0425 
0426     /* Read in the program headers */
0427     retval = kernel_read(elf_file, elf_ex->e_phoff,
0428                  (char *)elf_phdata, size);
0429     if (retval != size) {
0430         err = (retval < 0) ? retval : -EIO;
0431         goto out;
0432     }
0433 
0434     /* Success! */
0435     err = 0;
0436 out:
0437     if (err) {
0438         kfree(elf_phdata);
0439         elf_phdata = NULL;
0440     }
0441     return elf_phdata;
0442 }
0443 
0444 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
0445 
0446 /**
0447  * struct arch_elf_state - arch-specific ELF loading state
0448  *
0449  * This structure is used to preserve architecture specific data during
0450  * the loading of an ELF file, throughout the checking of architecture
0451  * specific ELF headers & through to the point where the ELF load is
0452  * known to be proceeding (ie. SET_PERSONALITY).
0453  *
0454  * This implementation is a dummy for architectures which require no
0455  * specific state.
0456  */
0457 struct arch_elf_state {
0458 };
0459 
0460 #define INIT_ARCH_ELF_STATE {}
0461 
0462 /**
0463  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
0464  * @ehdr:   The main ELF header
0465  * @phdr:   The program header to check
0466  * @elf:    The open ELF file
0467  * @is_interp:  True if the phdr is from the interpreter of the ELF being
0468  *      loaded, else false.
0469  * @state:  Architecture-specific state preserved throughout the process
0470  *      of loading the ELF.
0471  *
0472  * Inspects the program header phdr to validate its correctness and/or
0473  * suitability for the system. Called once per ELF program header in the
0474  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
0475  * interpreter.
0476  *
0477  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
0478  *         with that return code.
0479  */
0480 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
0481                    struct elf_phdr *phdr,
0482                    struct file *elf, bool is_interp,
0483                    struct arch_elf_state *state)
0484 {
0485     /* Dummy implementation, always proceed */
0486     return 0;
0487 }
0488 
0489 /**
0490  * arch_check_elf() - check an ELF executable
0491  * @ehdr:   The main ELF header
0492  * @has_interp: True if the ELF has an interpreter, else false.
0493  * @interp_ehdr: The interpreter's ELF header
0494  * @state:  Architecture-specific state preserved throughout the process
0495  *      of loading the ELF.
0496  *
0497  * Provides a final opportunity for architecture code to reject the loading
0498  * of the ELF & cause an exec syscall to return an error. This is called after
0499  * all program headers to be checked by arch_elf_pt_proc have been.
0500  *
0501  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
0502  *         with that return code.
0503  */
0504 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
0505                  struct elfhdr *interp_ehdr,
0506                  struct arch_elf_state *state)
0507 {
0508     /* Dummy implementation, always proceed */
0509     return 0;
0510 }
0511 
0512 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
0513 
0514 /* This is much more generalized than the library routine read function,
0515    so we keep this separate.  Technically the library read function
0516    is only provided so that we can read a.out libraries that have
0517    an ELF header */
0518 
0519 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
0520         struct file *interpreter, unsigned long *interp_map_addr,
0521         unsigned long no_base, struct elf_phdr *interp_elf_phdata)
0522 {
0523     struct elf_phdr *eppnt;
0524     unsigned long load_addr = 0;
0525     int load_addr_set = 0;
0526     unsigned long last_bss = 0, elf_bss = 0;
0527     unsigned long error = ~0UL;
0528     unsigned long total_size;
0529     int i;
0530 
0531     /* First of all, some simple consistency checks */
0532     if (interp_elf_ex->e_type != ET_EXEC &&
0533         interp_elf_ex->e_type != ET_DYN)
0534         goto out;
0535     if (!elf_check_arch(interp_elf_ex))
0536         goto out;
0537     if (!interpreter->f_op->mmap)
0538         goto out;
0539 
0540     total_size = total_mapping_size(interp_elf_phdata,
0541                     interp_elf_ex->e_phnum);
0542     if (!total_size) {
0543         error = -EINVAL;
0544         goto out;
0545     }
0546 
0547     eppnt = interp_elf_phdata;
0548     for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
0549         if (eppnt->p_type == PT_LOAD) {
0550             int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
0551             int elf_prot = 0;
0552             unsigned long vaddr = 0;
0553             unsigned long k, map_addr;
0554 
0555             if (eppnt->p_flags & PF_R)
0556                     elf_prot = PROT_READ;
0557             if (eppnt->p_flags & PF_W)
0558                 elf_prot |= PROT_WRITE;
0559             if (eppnt->p_flags & PF_X)
0560                 elf_prot |= PROT_EXEC;
0561             vaddr = eppnt->p_vaddr;
0562             if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
0563                 elf_type |= MAP_FIXED;
0564             else if (no_base && interp_elf_ex->e_type == ET_DYN)
0565                 load_addr = -vaddr;
0566 
0567             map_addr = elf_map(interpreter, load_addr + vaddr,
0568                     eppnt, elf_prot, elf_type, total_size);
0569             total_size = 0;
0570             if (!*interp_map_addr)
0571                 *interp_map_addr = map_addr;
0572             error = map_addr;
0573             if (BAD_ADDR(map_addr))
0574                 goto out;
0575 
0576             if (!load_addr_set &&
0577                 interp_elf_ex->e_type == ET_DYN) {
0578                 load_addr = map_addr - ELF_PAGESTART(vaddr);
0579                 load_addr_set = 1;
0580             }
0581 
0582             /*
0583              * Check to see if the section's size will overflow the
0584              * allowed task size. Note that p_filesz must always be
0585              * <= p_memsize so it's only necessary to check p_memsz.
0586              */
0587             k = load_addr + eppnt->p_vaddr;
0588             if (BAD_ADDR(k) ||
0589                 eppnt->p_filesz > eppnt->p_memsz ||
0590                 eppnt->p_memsz > TASK_SIZE ||
0591                 TASK_SIZE - eppnt->p_memsz < k) {
0592                 error = -ENOMEM;
0593                 goto out;
0594             }
0595 
0596             /*
0597              * Find the end of the file mapping for this phdr, and
0598              * keep track of the largest address we see for this.
0599              */
0600             k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
0601             if (k > elf_bss)
0602                 elf_bss = k;
0603 
0604             /*
0605              * Do the same thing for the memory mapping - between
0606              * elf_bss and last_bss is the bss section.
0607              */
0608             k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
0609             if (k > last_bss)
0610                 last_bss = k;
0611         }
0612     }
0613 
0614     /*
0615      * Now fill out the bss section: first pad the last page from
0616      * the file up to the page boundary, and zero it from elf_bss
0617      * up to the end of the page.
0618      */
0619     if (padzero(elf_bss)) {
0620         error = -EFAULT;
0621         goto out;
0622     }
0623     /*
0624      * Next, align both the file and mem bss up to the page size,
0625      * since this is where elf_bss was just zeroed up to, and where
0626      * last_bss will end after the vm_brk() below.
0627      */
0628     elf_bss = ELF_PAGEALIGN(elf_bss);
0629     last_bss = ELF_PAGEALIGN(last_bss);
0630     /* Finally, if there is still more bss to allocate, do it. */
0631     if (last_bss > elf_bss) {
0632         error = vm_brk(elf_bss, last_bss - elf_bss);
0633         if (error)
0634             goto out;
0635     }
0636 
0637     error = load_addr;
0638 out:
0639     return error;
0640 }
0641 
0642 /*
0643  * These are the functions used to load ELF style executables and shared
0644  * libraries.  There is no binary dependent code anywhere else.
0645  */
0646 
0647 #ifndef STACK_RND_MASK
0648 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
0649 #endif
0650 
0651 static unsigned long randomize_stack_top(unsigned long stack_top)
0652 {
0653     unsigned long random_variable = 0;
0654 
0655     if ((current->flags & PF_RANDOMIZE) &&
0656         !(current->personality & ADDR_NO_RANDOMIZE)) {
0657         random_variable = get_random_long();
0658         random_variable &= STACK_RND_MASK;
0659         random_variable <<= PAGE_SHIFT;
0660     }
0661 #ifdef CONFIG_STACK_GROWSUP
0662     return PAGE_ALIGN(stack_top) + random_variable;
0663 #else
0664     return PAGE_ALIGN(stack_top) - random_variable;
0665 #endif
0666 }
0667 
0668 static int load_elf_binary(struct linux_binprm *bprm)
0669 {
0670     struct file *interpreter = NULL; /* to shut gcc up */
0671     unsigned long load_addr = 0, load_bias = 0;
0672     int load_addr_set = 0;
0673     char * elf_interpreter = NULL;
0674     unsigned long error;
0675     struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
0676     unsigned long elf_bss, elf_brk;
0677     int retval, i;
0678     unsigned long elf_entry;
0679     unsigned long interp_load_addr = 0;
0680     unsigned long start_code, end_code, start_data, end_data;
0681     unsigned long reloc_func_desc __maybe_unused = 0;
0682     int executable_stack = EXSTACK_DEFAULT;
0683     struct pt_regs *regs = current_pt_regs();
0684     struct {
0685         struct elfhdr elf_ex;
0686         struct elfhdr interp_elf_ex;
0687     } *loc;
0688     struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
0689 
0690     loc = kmalloc(sizeof(*loc), GFP_KERNEL);
0691     if (!loc) {
0692         retval = -ENOMEM;
0693         goto out_ret;
0694     }
0695     
0696     /* Get the exec-header */
0697     loc->elf_ex = *((struct elfhdr *)bprm->buf);
0698 
0699     retval = -ENOEXEC;
0700     /* First of all, some simple consistency checks */
0701     if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
0702         goto out;
0703 
0704     if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
0705         goto out;
0706     if (!elf_check_arch(&loc->elf_ex))
0707         goto out;
0708     if (!bprm->file->f_op->mmap)
0709         goto out;
0710 
0711     elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
0712     if (!elf_phdata)
0713         goto out;
0714 
0715     elf_ppnt = elf_phdata;
0716     elf_bss = 0;
0717     elf_brk = 0;
0718 
0719     start_code = ~0UL;
0720     end_code = 0;
0721     start_data = 0;
0722     end_data = 0;
0723 
0724     for (i = 0; i < loc->elf_ex.e_phnum; i++) {
0725         if (elf_ppnt->p_type == PT_INTERP) {
0726             /* This is the program interpreter used for
0727              * shared libraries - for now assume that this
0728              * is an a.out format binary
0729              */
0730             retval = -ENOEXEC;
0731             if (elf_ppnt->p_filesz > PATH_MAX || 
0732                 elf_ppnt->p_filesz < 2)
0733                 goto out_free_ph;
0734 
0735             retval = -ENOMEM;
0736             elf_interpreter = kmalloc(elf_ppnt->p_filesz,
0737                           GFP_KERNEL);
0738             if (!elf_interpreter)
0739                 goto out_free_ph;
0740 
0741             retval = kernel_read(bprm->file, elf_ppnt->p_offset,
0742                          elf_interpreter,
0743                          elf_ppnt->p_filesz);
0744             if (retval != elf_ppnt->p_filesz) {
0745                 if (retval >= 0)
0746                     retval = -EIO;
0747                 goto out_free_interp;
0748             }
0749             /* make sure path is NULL terminated */
0750             retval = -ENOEXEC;
0751             if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
0752                 goto out_free_interp;
0753 
0754             interpreter = open_exec(elf_interpreter);
0755             retval = PTR_ERR(interpreter);
0756             if (IS_ERR(interpreter))
0757                 goto out_free_interp;
0758 
0759             /*
0760              * If the binary is not readable then enforce
0761              * mm->dumpable = 0 regardless of the interpreter's
0762              * permissions.
0763              */
0764             would_dump(bprm, interpreter);
0765 
0766             /* Get the exec headers */
0767             retval = kernel_read(interpreter, 0,
0768                          (void *)&loc->interp_elf_ex,
0769                          sizeof(loc->interp_elf_ex));
0770             if (retval != sizeof(loc->interp_elf_ex)) {
0771                 if (retval >= 0)
0772                     retval = -EIO;
0773                 goto out_free_dentry;
0774             }
0775 
0776             break;
0777         }
0778         elf_ppnt++;
0779     }
0780 
0781     elf_ppnt = elf_phdata;
0782     for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
0783         switch (elf_ppnt->p_type) {
0784         case PT_GNU_STACK:
0785             if (elf_ppnt->p_flags & PF_X)
0786                 executable_stack = EXSTACK_ENABLE_X;
0787             else
0788                 executable_stack = EXSTACK_DISABLE_X;
0789             break;
0790 
0791         case PT_LOPROC ... PT_HIPROC:
0792             retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
0793                           bprm->file, false,
0794                           &arch_state);
0795             if (retval)
0796                 goto out_free_dentry;
0797             break;
0798         }
0799 
0800     /* Some simple consistency checks for the interpreter */
0801     if (elf_interpreter) {
0802         retval = -ELIBBAD;
0803         /* Not an ELF interpreter */
0804         if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
0805             goto out_free_dentry;
0806         /* Verify the interpreter has a valid arch */
0807         if (!elf_check_arch(&loc->interp_elf_ex))
0808             goto out_free_dentry;
0809 
0810         /* Load the interpreter program headers */
0811         interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
0812                            interpreter);
0813         if (!interp_elf_phdata)
0814             goto out_free_dentry;
0815 
0816         /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
0817         elf_ppnt = interp_elf_phdata;
0818         for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
0819             switch (elf_ppnt->p_type) {
0820             case PT_LOPROC ... PT_HIPROC:
0821                 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
0822                               elf_ppnt, interpreter,
0823                               true, &arch_state);
0824                 if (retval)
0825                     goto out_free_dentry;
0826                 break;
0827             }
0828     }
0829 
0830     /*
0831      * Allow arch code to reject the ELF at this point, whilst it's
0832      * still possible to return an error to the code that invoked
0833      * the exec syscall.
0834      */
0835     retval = arch_check_elf(&loc->elf_ex,
0836                 !!interpreter, &loc->interp_elf_ex,
0837                 &arch_state);
0838     if (retval)
0839         goto out_free_dentry;
0840 
0841     /* Flush all traces of the currently running executable */
0842     retval = flush_old_exec(bprm);
0843     if (retval)
0844         goto out_free_dentry;
0845 
0846     /* Do this immediately, since STACK_TOP as used in setup_arg_pages
0847        may depend on the personality.  */
0848     SET_PERSONALITY2(loc->elf_ex, &arch_state);
0849     if (elf_read_implies_exec(loc->elf_ex, executable_stack))
0850         current->personality |= READ_IMPLIES_EXEC;
0851 
0852     if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
0853         current->flags |= PF_RANDOMIZE;
0854 
0855     setup_new_exec(bprm);
0856     install_exec_creds(bprm);
0857 
0858     /* Do this so that we can load the interpreter, if need be.  We will
0859        change some of these later */
0860     retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
0861                  executable_stack);
0862     if (retval < 0)
0863         goto out_free_dentry;
0864     
0865     current->mm->start_stack = bprm->p;
0866 
0867     /* Now we do a little grungy work by mmapping the ELF image into
0868        the correct location in memory. */
0869     for(i = 0, elf_ppnt = elf_phdata;
0870         i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
0871         int elf_prot = 0, elf_flags;
0872         unsigned long k, vaddr;
0873         unsigned long total_size = 0;
0874 
0875         if (elf_ppnt->p_type != PT_LOAD)
0876             continue;
0877 
0878         if (unlikely (elf_brk > elf_bss)) {
0879             unsigned long nbyte;
0880                 
0881             /* There was a PT_LOAD segment with p_memsz > p_filesz
0882                before this one. Map anonymous pages, if needed,
0883                and clear the area.  */
0884             retval = set_brk(elf_bss + load_bias,
0885                      elf_brk + load_bias);
0886             if (retval)
0887                 goto out_free_dentry;
0888             nbyte = ELF_PAGEOFFSET(elf_bss);
0889             if (nbyte) {
0890                 nbyte = ELF_MIN_ALIGN - nbyte;
0891                 if (nbyte > elf_brk - elf_bss)
0892                     nbyte = elf_brk - elf_bss;
0893                 if (clear_user((void __user *)elf_bss +
0894                             load_bias, nbyte)) {
0895                     /*
0896                      * This bss-zeroing can fail if the ELF
0897                      * file specifies odd protections. So
0898                      * we don't check the return value
0899                      */
0900                 }
0901             }
0902         }
0903 
0904         if (elf_ppnt->p_flags & PF_R)
0905             elf_prot |= PROT_READ;
0906         if (elf_ppnt->p_flags & PF_W)
0907             elf_prot |= PROT_WRITE;
0908         if (elf_ppnt->p_flags & PF_X)
0909             elf_prot |= PROT_EXEC;
0910 
0911         elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
0912 
0913         vaddr = elf_ppnt->p_vaddr;
0914         if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
0915             elf_flags |= MAP_FIXED;
0916         } else if (loc->elf_ex.e_type == ET_DYN) {
0917             /* Try and get dynamic programs out of the way of the
0918              * default mmap base, as well as whatever program they
0919              * might try to exec.  This is because the brk will
0920              * follow the loader, and is not movable.  */
0921             load_bias = ELF_ET_DYN_BASE - vaddr;
0922             if (current->flags & PF_RANDOMIZE)
0923                 load_bias += arch_mmap_rnd();
0924             load_bias = ELF_PAGESTART(load_bias);
0925             total_size = total_mapping_size(elf_phdata,
0926                             loc->elf_ex.e_phnum);
0927             if (!total_size) {
0928                 retval = -EINVAL;
0929                 goto out_free_dentry;
0930             }
0931         }
0932 
0933         error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
0934                 elf_prot, elf_flags, total_size);
0935         if (BAD_ADDR(error)) {
0936             retval = IS_ERR((void *)error) ?
0937                 PTR_ERR((void*)error) : -EINVAL;
0938             goto out_free_dentry;
0939         }
0940 
0941         if (!load_addr_set) {
0942             load_addr_set = 1;
0943             load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
0944             if (loc->elf_ex.e_type == ET_DYN) {
0945                 load_bias += error -
0946                              ELF_PAGESTART(load_bias + vaddr);
0947                 load_addr += load_bias;
0948                 reloc_func_desc = load_bias;
0949             }
0950         }
0951         k = elf_ppnt->p_vaddr;
0952         if (k < start_code)
0953             start_code = k;
0954         if (start_data < k)
0955             start_data = k;
0956 
0957         /*
0958          * Check to see if the section's size will overflow the
0959          * allowed task size. Note that p_filesz must always be
0960          * <= p_memsz so it is only necessary to check p_memsz.
0961          */
0962         if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
0963             elf_ppnt->p_memsz > TASK_SIZE ||
0964             TASK_SIZE - elf_ppnt->p_memsz < k) {
0965             /* set_brk can never work. Avoid overflows. */
0966             retval = -EINVAL;
0967             goto out_free_dentry;
0968         }
0969 
0970         k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
0971 
0972         if (k > elf_bss)
0973             elf_bss = k;
0974         if ((elf_ppnt->p_flags & PF_X) && end_code < k)
0975             end_code = k;
0976         if (end_data < k)
0977             end_data = k;
0978         k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
0979         if (k > elf_brk)
0980             elf_brk = k;
0981     }
0982 
0983     loc->elf_ex.e_entry += load_bias;
0984     elf_bss += load_bias;
0985     elf_brk += load_bias;
0986     start_code += load_bias;
0987     end_code += load_bias;
0988     start_data += load_bias;
0989     end_data += load_bias;
0990 
0991     /* Calling set_brk effectively mmaps the pages that we need
0992      * for the bss and break sections.  We must do this before
0993      * mapping in the interpreter, to make sure it doesn't wind
0994      * up getting placed where the bss needs to go.
0995      */
0996     retval = set_brk(elf_bss, elf_brk);
0997     if (retval)
0998         goto out_free_dentry;
0999     if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1000         retval = -EFAULT; /* Nobody gets to see this, but.. */
1001         goto out_free_dentry;
1002     }
1003 
1004     if (elf_interpreter) {
1005         unsigned long interp_map_addr = 0;
1006 
1007         elf_entry = load_elf_interp(&loc->interp_elf_ex,
1008                         interpreter,
1009                         &interp_map_addr,
1010                         load_bias, interp_elf_phdata);
1011         if (!IS_ERR((void *)elf_entry)) {
1012             /*
1013              * load_elf_interp() returns relocation
1014              * adjustment
1015              */
1016             interp_load_addr = elf_entry;
1017             elf_entry += loc->interp_elf_ex.e_entry;
1018         }
1019         if (BAD_ADDR(elf_entry)) {
1020             retval = IS_ERR((void *)elf_entry) ?
1021                     (int)elf_entry : -EINVAL;
1022             goto out_free_dentry;
1023         }
1024         reloc_func_desc = interp_load_addr;
1025 
1026         allow_write_access(interpreter);
1027         fput(interpreter);
1028         kfree(elf_interpreter);
1029     } else {
1030         elf_entry = loc->elf_ex.e_entry;
1031         if (BAD_ADDR(elf_entry)) {
1032             retval = -EINVAL;
1033             goto out_free_dentry;
1034         }
1035     }
1036 
1037     kfree(interp_elf_phdata);
1038     kfree(elf_phdata);
1039 
1040     set_binfmt(&elf_format);
1041 
1042 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1043     retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1044     if (retval < 0)
1045         goto out;
1046 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1047 
1048     retval = create_elf_tables(bprm, &loc->elf_ex,
1049               load_addr, interp_load_addr);
1050     if (retval < 0)
1051         goto out;
1052     /* N.B. passed_fileno might not be initialized? */
1053     current->mm->end_code = end_code;
1054     current->mm->start_code = start_code;
1055     current->mm->start_data = start_data;
1056     current->mm->end_data = end_data;
1057     current->mm->start_stack = bprm->p;
1058 
1059     if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1060         current->mm->brk = current->mm->start_brk =
1061             arch_randomize_brk(current->mm);
1062 #ifdef compat_brk_randomized
1063         current->brk_randomized = 1;
1064 #endif
1065     }
1066 
1067     if (current->personality & MMAP_PAGE_ZERO) {
1068         /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1069            and some applications "depend" upon this behavior.
1070            Since we do not have the power to recompile these, we
1071            emulate the SVr4 behavior. Sigh. */
1072         error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1073                 MAP_FIXED | MAP_PRIVATE, 0);
1074     }
1075 
1076 #ifdef ELF_PLAT_INIT
1077     /*
1078      * The ABI may specify that certain registers be set up in special
1079      * ways (on i386 %edx is the address of a DT_FINI function, for
1080      * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1081      * that the e_entry field is the address of the function descriptor
1082      * for the startup routine, rather than the address of the startup
1083      * routine itself.  This macro performs whatever initialization to
1084      * the regs structure is required as well as any relocations to the
1085      * function descriptor entries when executing dynamically links apps.
1086      */
1087     ELF_PLAT_INIT(regs, reloc_func_desc);
1088 #endif
1089 
1090     start_thread(regs, elf_entry, bprm->p);
1091     retval = 0;
1092 out:
1093     kfree(loc);
1094 out_ret:
1095     return retval;
1096 
1097     /* error cleanup */
1098 out_free_dentry:
1099     kfree(interp_elf_phdata);
1100     allow_write_access(interpreter);
1101     if (interpreter)
1102         fput(interpreter);
1103 out_free_interp:
1104     kfree(elf_interpreter);
1105 out_free_ph:
1106     kfree(elf_phdata);
1107     goto out;
1108 }
1109 
1110 #ifdef CONFIG_USELIB
1111 /* This is really simpleminded and specialized - we are loading an
1112    a.out library that is given an ELF header. */
1113 static int load_elf_library(struct file *file)
1114 {
1115     struct elf_phdr *elf_phdata;
1116     struct elf_phdr *eppnt;
1117     unsigned long elf_bss, bss, len;
1118     int retval, error, i, j;
1119     struct elfhdr elf_ex;
1120 
1121     error = -ENOEXEC;
1122     retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1123     if (retval != sizeof(elf_ex))
1124         goto out;
1125 
1126     if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1127         goto out;
1128 
1129     /* First of all, some simple consistency checks */
1130     if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1131         !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1132         goto out;
1133 
1134     /* Now read in all of the header information */
1135 
1136     j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1137     /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1138 
1139     error = -ENOMEM;
1140     elf_phdata = kmalloc(j, GFP_KERNEL);
1141     if (!elf_phdata)
1142         goto out;
1143 
1144     eppnt = elf_phdata;
1145     error = -ENOEXEC;
1146     retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1147     if (retval != j)
1148         goto out_free_ph;
1149 
1150     for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1151         if ((eppnt + i)->p_type == PT_LOAD)
1152             j++;
1153     if (j != 1)
1154         goto out_free_ph;
1155 
1156     while (eppnt->p_type != PT_LOAD)
1157         eppnt++;
1158 
1159     /* Now use mmap to map the library into memory. */
1160     error = vm_mmap(file,
1161             ELF_PAGESTART(eppnt->p_vaddr),
1162             (eppnt->p_filesz +
1163              ELF_PAGEOFFSET(eppnt->p_vaddr)),
1164             PROT_READ | PROT_WRITE | PROT_EXEC,
1165             MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1166             (eppnt->p_offset -
1167              ELF_PAGEOFFSET(eppnt->p_vaddr)));
1168     if (error != ELF_PAGESTART(eppnt->p_vaddr))
1169         goto out_free_ph;
1170 
1171     elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1172     if (padzero(elf_bss)) {
1173         error = -EFAULT;
1174         goto out_free_ph;
1175     }
1176 
1177     len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1178                 ELF_MIN_ALIGN - 1);
1179     bss = eppnt->p_memsz + eppnt->p_vaddr;
1180     if (bss > len) {
1181         error = vm_brk(len, bss - len);
1182         if (error)
1183             goto out_free_ph;
1184     }
1185     error = 0;
1186 
1187 out_free_ph:
1188     kfree(elf_phdata);
1189 out:
1190     return error;
1191 }
1192 #endif /* #ifdef CONFIG_USELIB */
1193 
1194 #ifdef CONFIG_ELF_CORE
1195 /*
1196  * ELF core dumper
1197  *
1198  * Modelled on fs/exec.c:aout_core_dump()
1199  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1200  */
1201 
1202 /*
1203  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1204  * that are useful for post-mortem analysis are included in every core dump.
1205  * In that way we ensure that the core dump is fully interpretable later
1206  * without matching up the same kernel and hardware config to see what PC values
1207  * meant. These special mappings include - vDSO, vsyscall, and other
1208  * architecture specific mappings
1209  */
1210 static bool always_dump_vma(struct vm_area_struct *vma)
1211 {
1212     /* Any vsyscall mappings? */
1213     if (vma == get_gate_vma(vma->vm_mm))
1214         return true;
1215 
1216     /*
1217      * Assume that all vmas with a .name op should always be dumped.
1218      * If this changes, a new vm_ops field can easily be added.
1219      */
1220     if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1221         return true;
1222 
1223     /*
1224      * arch_vma_name() returns non-NULL for special architecture mappings,
1225      * such as vDSO sections.
1226      */
1227     if (arch_vma_name(vma))
1228         return true;
1229 
1230     return false;
1231 }
1232 
1233 /*
1234  * Decide what to dump of a segment, part, all or none.
1235  */
1236 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1237                    unsigned long mm_flags)
1238 {
1239 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1240 
1241     /* always dump the vdso and vsyscall sections */
1242     if (always_dump_vma(vma))
1243         goto whole;
1244 
1245     if (vma->vm_flags & VM_DONTDUMP)
1246         return 0;
1247 
1248     /* support for DAX */
1249     if (vma_is_dax(vma)) {
1250         if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1251             goto whole;
1252         if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1253             goto whole;
1254         return 0;
1255     }
1256 
1257     /* Hugetlb memory check */
1258     if (vma->vm_flags & VM_HUGETLB) {
1259         if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1260             goto whole;
1261         if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1262             goto whole;
1263         return 0;
1264     }
1265 
1266     /* Do not dump I/O mapped devices or special mappings */
1267     if (vma->vm_flags & VM_IO)
1268         return 0;
1269 
1270     /* By default, dump shared memory if mapped from an anonymous file. */
1271     if (vma->vm_flags & VM_SHARED) {
1272         if (file_inode(vma->vm_file)->i_nlink == 0 ?
1273             FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1274             goto whole;
1275         return 0;
1276     }
1277 
1278     /* Dump segments that have been written to.  */
1279     if (vma->anon_vma && FILTER(ANON_PRIVATE))
1280         goto whole;
1281     if (vma->vm_file == NULL)
1282         return 0;
1283 
1284     if (FILTER(MAPPED_PRIVATE))
1285         goto whole;
1286 
1287     /*
1288      * If this looks like the beginning of a DSO or executable mapping,
1289      * check for an ELF header.  If we find one, dump the first page to
1290      * aid in determining what was mapped here.
1291      */
1292     if (FILTER(ELF_HEADERS) &&
1293         vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1294         u32 __user *header = (u32 __user *) vma->vm_start;
1295         u32 word;
1296         mm_segment_t fs = get_fs();
1297         /*
1298          * Doing it this way gets the constant folded by GCC.
1299          */
1300         union {
1301             u32 cmp;
1302             char elfmag[SELFMAG];
1303         } magic;
1304         BUILD_BUG_ON(SELFMAG != sizeof word);
1305         magic.elfmag[EI_MAG0] = ELFMAG0;
1306         magic.elfmag[EI_MAG1] = ELFMAG1;
1307         magic.elfmag[EI_MAG2] = ELFMAG2;
1308         magic.elfmag[EI_MAG3] = ELFMAG3;
1309         /*
1310          * Switch to the user "segment" for get_user(),
1311          * then put back what elf_core_dump() had in place.
1312          */
1313         set_fs(USER_DS);
1314         if (unlikely(get_user(word, header)))
1315             word = 0;
1316         set_fs(fs);
1317         if (word == magic.cmp)
1318             return PAGE_SIZE;
1319     }
1320 
1321 #undef  FILTER
1322 
1323     return 0;
1324 
1325 whole:
1326     return vma->vm_end - vma->vm_start;
1327 }
1328 
1329 /* An ELF note in memory */
1330 struct memelfnote
1331 {
1332     const char *name;
1333     int type;
1334     unsigned int datasz;
1335     void *data;
1336 };
1337 
1338 static int notesize(struct memelfnote *en)
1339 {
1340     int sz;
1341 
1342     sz = sizeof(struct elf_note);
1343     sz += roundup(strlen(en->name) + 1, 4);
1344     sz += roundup(en->datasz, 4);
1345 
1346     return sz;
1347 }
1348 
1349 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1350 {
1351     struct elf_note en;
1352     en.n_namesz = strlen(men->name) + 1;
1353     en.n_descsz = men->datasz;
1354     en.n_type = men->type;
1355 
1356     return dump_emit(cprm, &en, sizeof(en)) &&
1357         dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1358         dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1359 }
1360 
1361 static void fill_elf_header(struct elfhdr *elf, int segs,
1362                 u16 machine, u32 flags)
1363 {
1364     memset(elf, 0, sizeof(*elf));
1365 
1366     memcpy(elf->e_ident, ELFMAG, SELFMAG);
1367     elf->e_ident[EI_CLASS] = ELF_CLASS;
1368     elf->e_ident[EI_DATA] = ELF_DATA;
1369     elf->e_ident[EI_VERSION] = EV_CURRENT;
1370     elf->e_ident[EI_OSABI] = ELF_OSABI;
1371 
1372     elf->e_type = ET_CORE;
1373     elf->e_machine = machine;
1374     elf->e_version = EV_CURRENT;
1375     elf->e_phoff = sizeof(struct elfhdr);
1376     elf->e_flags = flags;
1377     elf->e_ehsize = sizeof(struct elfhdr);
1378     elf->e_phentsize = sizeof(struct elf_phdr);
1379     elf->e_phnum = segs;
1380 
1381     return;
1382 }
1383 
1384 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1385 {
1386     phdr->p_type = PT_NOTE;
1387     phdr->p_offset = offset;
1388     phdr->p_vaddr = 0;
1389     phdr->p_paddr = 0;
1390     phdr->p_filesz = sz;
1391     phdr->p_memsz = 0;
1392     phdr->p_flags = 0;
1393     phdr->p_align = 0;
1394     return;
1395 }
1396 
1397 static void fill_note(struct memelfnote *note, const char *name, int type, 
1398         unsigned int sz, void *data)
1399 {
1400     note->name = name;
1401     note->type = type;
1402     note->datasz = sz;
1403     note->data = data;
1404     return;
1405 }
1406 
1407 /*
1408  * fill up all the fields in prstatus from the given task struct, except
1409  * registers which need to be filled up separately.
1410  */
1411 static void fill_prstatus(struct elf_prstatus *prstatus,
1412         struct task_struct *p, long signr)
1413 {
1414     prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1415     prstatus->pr_sigpend = p->pending.signal.sig[0];
1416     prstatus->pr_sighold = p->blocked.sig[0];
1417     rcu_read_lock();
1418     prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1419     rcu_read_unlock();
1420     prstatus->pr_pid = task_pid_vnr(p);
1421     prstatus->pr_pgrp = task_pgrp_vnr(p);
1422     prstatus->pr_sid = task_session_vnr(p);
1423     if (thread_group_leader(p)) {
1424         struct task_cputime cputime;
1425 
1426         /*
1427          * This is the record for the group leader.  It shows the
1428          * group-wide total, not its individual thread total.
1429          */
1430         thread_group_cputime(p, &cputime);
1431         cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1432         cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1433     } else {
1434         cputime_t utime, stime;
1435 
1436         task_cputime(p, &utime, &stime);
1437         cputime_to_timeval(utime, &prstatus->pr_utime);
1438         cputime_to_timeval(stime, &prstatus->pr_stime);
1439     }
1440     cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1441     cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1442 }
1443 
1444 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1445                struct mm_struct *mm)
1446 {
1447     const struct cred *cred;
1448     unsigned int i, len;
1449     
1450     /* first copy the parameters from user space */
1451     memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1452 
1453     len = mm->arg_end - mm->arg_start;
1454     if (len >= ELF_PRARGSZ)
1455         len = ELF_PRARGSZ-1;
1456     if (copy_from_user(&psinfo->pr_psargs,
1457                    (const char __user *)mm->arg_start, len))
1458         return -EFAULT;
1459     for(i = 0; i < len; i++)
1460         if (psinfo->pr_psargs[i] == 0)
1461             psinfo->pr_psargs[i] = ' ';
1462     psinfo->pr_psargs[len] = 0;
1463 
1464     rcu_read_lock();
1465     psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1466     rcu_read_unlock();
1467     psinfo->pr_pid = task_pid_vnr(p);
1468     psinfo->pr_pgrp = task_pgrp_vnr(p);
1469     psinfo->pr_sid = task_session_vnr(p);
1470 
1471     i = p->state ? ffz(~p->state) + 1 : 0;
1472     psinfo->pr_state = i;
1473     psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1474     psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1475     psinfo->pr_nice = task_nice(p);
1476     psinfo->pr_flag = p->flags;
1477     rcu_read_lock();
1478     cred = __task_cred(p);
1479     SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1480     SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1481     rcu_read_unlock();
1482     strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1483     
1484     return 0;
1485 }
1486 
1487 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1488 {
1489     elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1490     int i = 0;
1491     do
1492         i += 2;
1493     while (auxv[i - 2] != AT_NULL);
1494     fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1495 }
1496 
1497 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1498         const siginfo_t *siginfo)
1499 {
1500     mm_segment_t old_fs = get_fs();
1501     set_fs(KERNEL_DS);
1502     copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1503     set_fs(old_fs);
1504     fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1505 }
1506 
1507 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1508 /*
1509  * Format of NT_FILE note:
1510  *
1511  * long count     -- how many files are mapped
1512  * long page_size -- units for file_ofs
1513  * array of [COUNT] elements of
1514  *   long start
1515  *   long end
1516  *   long file_ofs
1517  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1518  */
1519 static int fill_files_note(struct memelfnote *note)
1520 {
1521     struct vm_area_struct *vma;
1522     unsigned count, size, names_ofs, remaining, n;
1523     user_long_t *data;
1524     user_long_t *start_end_ofs;
1525     char *name_base, *name_curpos;
1526 
1527     /* *Estimated* file count and total data size needed */
1528     count = current->mm->map_count;
1529     size = count * 64;
1530 
1531     names_ofs = (2 + 3 * count) * sizeof(data[0]);
1532  alloc:
1533     if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1534         return -EINVAL;
1535     size = round_up(size, PAGE_SIZE);
1536     data = vmalloc(size);
1537     if (!data)
1538         return -ENOMEM;
1539 
1540     start_end_ofs = data + 2;
1541     name_base = name_curpos = ((char *)data) + names_ofs;
1542     remaining = size - names_ofs;
1543     count = 0;
1544     for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1545         struct file *file;
1546         const char *filename;
1547 
1548         file = vma->vm_file;
1549         if (!file)
1550             continue;
1551         filename = file_path(file, name_curpos, remaining);
1552         if (IS_ERR(filename)) {
1553             if (PTR_ERR(filename) == -ENAMETOOLONG) {
1554                 vfree(data);
1555                 size = size * 5 / 4;
1556                 goto alloc;
1557             }
1558             continue;
1559         }
1560 
1561         /* file_path() fills at the end, move name down */
1562         /* n = strlen(filename) + 1: */
1563         n = (name_curpos + remaining) - filename;
1564         remaining = filename - name_curpos;
1565         memmove(name_curpos, filename, n);
1566         name_curpos += n;
1567 
1568         *start_end_ofs++ = vma->vm_start;
1569         *start_end_ofs++ = vma->vm_end;
1570         *start_end_ofs++ = vma->vm_pgoff;
1571         count++;
1572     }
1573 
1574     /* Now we know exact count of files, can store it */
1575     data[0] = count;
1576     data[1] = PAGE_SIZE;
1577     /*
1578      * Count usually is less than current->mm->map_count,
1579      * we need to move filenames down.
1580      */
1581     n = current->mm->map_count - count;
1582     if (n != 0) {
1583         unsigned shift_bytes = n * 3 * sizeof(data[0]);
1584         memmove(name_base - shift_bytes, name_base,
1585             name_curpos - name_base);
1586         name_curpos -= shift_bytes;
1587     }
1588 
1589     size = name_curpos - (char *)data;
1590     fill_note(note, "CORE", NT_FILE, size, data);
1591     return 0;
1592 }
1593 
1594 #ifdef CORE_DUMP_USE_REGSET
1595 #include <linux/regset.h>
1596 
1597 struct elf_thread_core_info {
1598     struct elf_thread_core_info *next;
1599     struct task_struct *task;
1600     struct elf_prstatus prstatus;
1601     struct memelfnote notes[0];
1602 };
1603 
1604 struct elf_note_info {
1605     struct elf_thread_core_info *thread;
1606     struct memelfnote psinfo;
1607     struct memelfnote signote;
1608     struct memelfnote auxv;
1609     struct memelfnote files;
1610     user_siginfo_t csigdata;
1611     size_t size;
1612     int thread_notes;
1613 };
1614 
1615 /*
1616  * When a regset has a writeback hook, we call it on each thread before
1617  * dumping user memory.  On register window machines, this makes sure the
1618  * user memory backing the register data is up to date before we read it.
1619  */
1620 static void do_thread_regset_writeback(struct task_struct *task,
1621                        const struct user_regset *regset)
1622 {
1623     if (regset->writeback)
1624         regset->writeback(task, regset, 1);
1625 }
1626 
1627 #ifndef PRSTATUS_SIZE
1628 #define PRSTATUS_SIZE(S, R) sizeof(S)
1629 #endif
1630 
1631 #ifndef SET_PR_FPVALID
1632 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1633 #endif
1634 
1635 static int fill_thread_core_info(struct elf_thread_core_info *t,
1636                  const struct user_regset_view *view,
1637                  long signr, size_t *total)
1638 {
1639     unsigned int i;
1640     unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1641 
1642     /*
1643      * NT_PRSTATUS is the one special case, because the regset data
1644      * goes into the pr_reg field inside the note contents, rather
1645      * than being the whole note contents.  We fill the reset in here.
1646      * We assume that regset 0 is NT_PRSTATUS.
1647      */
1648     fill_prstatus(&t->prstatus, t->task, signr);
1649     (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1650                     &t->prstatus.pr_reg, NULL);
1651 
1652     fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1653           PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1654     *total += notesize(&t->notes[0]);
1655 
1656     do_thread_regset_writeback(t->task, &view->regsets[0]);
1657 
1658     /*
1659      * Each other regset might generate a note too.  For each regset
1660      * that has no core_note_type or is inactive, we leave t->notes[i]
1661      * all zero and we'll know to skip writing it later.
1662      */
1663     for (i = 1; i < view->n; ++i) {
1664         const struct user_regset *regset = &view->regsets[i];
1665         do_thread_regset_writeback(t->task, regset);
1666         if (regset->core_note_type && regset->get &&
1667             (!regset->active || regset->active(t->task, regset))) {
1668             int ret;
1669             size_t size = regset->n * regset->size;
1670             void *data = kmalloc(size, GFP_KERNEL);
1671             if (unlikely(!data))
1672                 return 0;
1673             ret = regset->get(t->task, regset,
1674                       0, size, data, NULL);
1675             if (unlikely(ret))
1676                 kfree(data);
1677             else {
1678                 if (regset->core_note_type != NT_PRFPREG)
1679                     fill_note(&t->notes[i], "LINUX",
1680                           regset->core_note_type,
1681                           size, data);
1682                 else {
1683                     SET_PR_FPVALID(&t->prstatus,
1684                             1, regset_size);
1685                     fill_note(&t->notes[i], "CORE",
1686                           NT_PRFPREG, size, data);
1687                 }
1688                 *total += notesize(&t->notes[i]);
1689             }
1690         }
1691     }
1692 
1693     return 1;
1694 }
1695 
1696 static int fill_note_info(struct elfhdr *elf, int phdrs,
1697               struct elf_note_info *info,
1698               const siginfo_t *siginfo, struct pt_regs *regs)
1699 {
1700     struct task_struct *dump_task = current;
1701     const struct user_regset_view *view = task_user_regset_view(dump_task);
1702     struct elf_thread_core_info *t;
1703     struct elf_prpsinfo *psinfo;
1704     struct core_thread *ct;
1705     unsigned int i;
1706 
1707     info->size = 0;
1708     info->thread = NULL;
1709 
1710     psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1711     if (psinfo == NULL) {
1712         info->psinfo.data = NULL; /* So we don't free this wrongly */
1713         return 0;
1714     }
1715 
1716     fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1717 
1718     /*
1719      * Figure out how many notes we're going to need for each thread.
1720      */
1721     info->thread_notes = 0;
1722     for (i = 0; i < view->n; ++i)
1723         if (view->regsets[i].core_note_type != 0)
1724             ++info->thread_notes;
1725 
1726     /*
1727      * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1728      * since it is our one special case.
1729      */
1730     if (unlikely(info->thread_notes == 0) ||
1731         unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1732         WARN_ON(1);
1733         return 0;
1734     }
1735 
1736     /*
1737      * Initialize the ELF file header.
1738      */
1739     fill_elf_header(elf, phdrs,
1740             view->e_machine, view->e_flags);
1741 
1742     /*
1743      * Allocate a structure for each thread.
1744      */
1745     for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1746         t = kzalloc(offsetof(struct elf_thread_core_info,
1747                      notes[info->thread_notes]),
1748                 GFP_KERNEL);
1749         if (unlikely(!t))
1750             return 0;
1751 
1752         t->task = ct->task;
1753         if (ct->task == dump_task || !info->thread) {
1754             t->next = info->thread;
1755             info->thread = t;
1756         } else {
1757             /*
1758              * Make sure to keep the original task at
1759              * the head of the list.
1760              */
1761             t->next = info->thread->next;
1762             info->thread->next = t;
1763         }
1764     }
1765 
1766     /*
1767      * Now fill in each thread's information.
1768      */
1769     for (t = info->thread; t != NULL; t = t->next)
1770         if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1771             return 0;
1772 
1773     /*
1774      * Fill in the two process-wide notes.
1775      */
1776     fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1777     info->size += notesize(&info->psinfo);
1778 
1779     fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1780     info->size += notesize(&info->signote);
1781 
1782     fill_auxv_note(&info->auxv, current->mm);
1783     info->size += notesize(&info->auxv);
1784 
1785     if (fill_files_note(&info->files) == 0)
1786         info->size += notesize(&info->files);
1787 
1788     return 1;
1789 }
1790 
1791 static size_t get_note_info_size(struct elf_note_info *info)
1792 {
1793     return info->size;
1794 }
1795 
1796 /*
1797  * Write all the notes for each thread.  When writing the first thread, the
1798  * process-wide notes are interleaved after the first thread-specific note.
1799  */
1800 static int write_note_info(struct elf_note_info *info,
1801                struct coredump_params *cprm)
1802 {
1803     bool first = true;
1804     struct elf_thread_core_info *t = info->thread;
1805 
1806     do {
1807         int i;
1808 
1809         if (!writenote(&t->notes[0], cprm))
1810             return 0;
1811 
1812         if (first && !writenote(&info->psinfo, cprm))
1813             return 0;
1814         if (first && !writenote(&info->signote, cprm))
1815             return 0;
1816         if (first && !writenote(&info->auxv, cprm))
1817             return 0;
1818         if (first && info->files.data &&
1819                 !writenote(&info->files, cprm))
1820             return 0;
1821 
1822         for (i = 1; i < info->thread_notes; ++i)
1823             if (t->notes[i].data &&
1824                 !writenote(&t->notes[i], cprm))
1825                 return 0;
1826 
1827         first = false;
1828         t = t->next;
1829     } while (t);
1830 
1831     return 1;
1832 }
1833 
1834 static void free_note_info(struct elf_note_info *info)
1835 {
1836     struct elf_thread_core_info *threads = info->thread;
1837     while (threads) {
1838         unsigned int i;
1839         struct elf_thread_core_info *t = threads;
1840         threads = t->next;
1841         WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1842         for (i = 1; i < info->thread_notes; ++i)
1843             kfree(t->notes[i].data);
1844         kfree(t);
1845     }
1846     kfree(info->psinfo.data);
1847     vfree(info->files.data);
1848 }
1849 
1850 #else
1851 
1852 /* Here is the structure in which status of each thread is captured. */
1853 struct elf_thread_status
1854 {
1855     struct list_head list;
1856     struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1857     elf_fpregset_t fpu;     /* NT_PRFPREG */
1858     struct task_struct *thread;
1859 #ifdef ELF_CORE_COPY_XFPREGS
1860     elf_fpxregset_t xfpu;       /* ELF_CORE_XFPREG_TYPE */
1861 #endif
1862     struct memelfnote notes[3];
1863     int num_notes;
1864 };
1865 
1866 /*
1867  * In order to add the specific thread information for the elf file format,
1868  * we need to keep a linked list of every threads pr_status and then create
1869  * a single section for them in the final core file.
1870  */
1871 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1872 {
1873     int sz = 0;
1874     struct task_struct *p = t->thread;
1875     t->num_notes = 0;
1876 
1877     fill_prstatus(&t->prstatus, p, signr);
1878     elf_core_copy_task_regs(p, &t->prstatus.pr_reg);    
1879     
1880     fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1881           &(t->prstatus));
1882     t->num_notes++;
1883     sz += notesize(&t->notes[0]);
1884 
1885     if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1886                                 &t->fpu))) {
1887         fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1888               &(t->fpu));
1889         t->num_notes++;
1890         sz += notesize(&t->notes[1]);
1891     }
1892 
1893 #ifdef ELF_CORE_COPY_XFPREGS
1894     if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1895         fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1896               sizeof(t->xfpu), &t->xfpu);
1897         t->num_notes++;
1898         sz += notesize(&t->notes[2]);
1899     }
1900 #endif  
1901     return sz;
1902 }
1903 
1904 struct elf_note_info {
1905     struct memelfnote *notes;
1906     struct memelfnote *notes_files;
1907     struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1908     struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1909     struct list_head thread_list;
1910     elf_fpregset_t *fpu;
1911 #ifdef ELF_CORE_COPY_XFPREGS
1912     elf_fpxregset_t *xfpu;
1913 #endif
1914     user_siginfo_t csigdata;
1915     int thread_status_size;
1916     int numnote;
1917 };
1918 
1919 static int elf_note_info_init(struct elf_note_info *info)
1920 {
1921     memset(info, 0, sizeof(*info));
1922     INIT_LIST_HEAD(&info->thread_list);
1923 
1924     /* Allocate space for ELF notes */
1925     info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1926     if (!info->notes)
1927         return 0;
1928     info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1929     if (!info->psinfo)
1930         return 0;
1931     info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1932     if (!info->prstatus)
1933         return 0;
1934     info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1935     if (!info->fpu)
1936         return 0;
1937 #ifdef ELF_CORE_COPY_XFPREGS
1938     info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1939     if (!info->xfpu)
1940         return 0;
1941 #endif
1942     return 1;
1943 }
1944 
1945 static int fill_note_info(struct elfhdr *elf, int phdrs,
1946               struct elf_note_info *info,
1947               const siginfo_t *siginfo, struct pt_regs *regs)
1948 {
1949     struct list_head *t;
1950     struct core_thread *ct;
1951     struct elf_thread_status *ets;
1952 
1953     if (!elf_note_info_init(info))
1954         return 0;
1955 
1956     for (ct = current->mm->core_state->dumper.next;
1957                     ct; ct = ct->next) {
1958         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1959         if (!ets)
1960             return 0;
1961 
1962         ets->thread = ct->task;
1963         list_add(&ets->list, &info->thread_list);
1964     }
1965 
1966     list_for_each(t, &info->thread_list) {
1967         int sz;
1968 
1969         ets = list_entry(t, struct elf_thread_status, list);
1970         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1971         info->thread_status_size += sz;
1972     }
1973     /* now collect the dump for the current */
1974     memset(info->prstatus, 0, sizeof(*info->prstatus));
1975     fill_prstatus(info->prstatus, current, siginfo->si_signo);
1976     elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1977 
1978     /* Set up header */
1979     fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1980 
1981     /*
1982      * Set up the notes in similar form to SVR4 core dumps made
1983      * with info from their /proc.
1984      */
1985 
1986     fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1987           sizeof(*info->prstatus), info->prstatus);
1988     fill_psinfo(info->psinfo, current->group_leader, current->mm);
1989     fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1990           sizeof(*info->psinfo), info->psinfo);
1991 
1992     fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1993     fill_auxv_note(info->notes + 3, current->mm);
1994     info->numnote = 4;
1995 
1996     if (fill_files_note(info->notes + info->numnote) == 0) {
1997         info->notes_files = info->notes + info->numnote;
1998         info->numnote++;
1999     }
2000 
2001     /* Try to dump the FPU. */
2002     info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2003                                    info->fpu);
2004     if (info->prstatus->pr_fpvalid)
2005         fill_note(info->notes + info->numnote++,
2006               "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2007 #ifdef ELF_CORE_COPY_XFPREGS
2008     if (elf_core_copy_task_xfpregs(current, info->xfpu))
2009         fill_note(info->notes + info->numnote++,
2010               "LINUX", ELF_CORE_XFPREG_TYPE,
2011               sizeof(*info->xfpu), info->xfpu);
2012 #endif
2013 
2014     return 1;
2015 }
2016 
2017 static size_t get_note_info_size(struct elf_note_info *info)
2018 {
2019     int sz = 0;
2020     int i;
2021 
2022     for (i = 0; i < info->numnote; i++)
2023         sz += notesize(info->notes + i);
2024 
2025     sz += info->thread_status_size;
2026 
2027     return sz;
2028 }
2029 
2030 static int write_note_info(struct elf_note_info *info,
2031                struct coredump_params *cprm)
2032 {
2033     int i;
2034     struct list_head *t;
2035 
2036     for (i = 0; i < info->numnote; i++)
2037         if (!writenote(info->notes + i, cprm))
2038             return 0;
2039 
2040     /* write out the thread status notes section */
2041     list_for_each(t, &info->thread_list) {
2042         struct elf_thread_status *tmp =
2043                 list_entry(t, struct elf_thread_status, list);
2044 
2045         for (i = 0; i < tmp->num_notes; i++)
2046             if (!writenote(&tmp->notes[i], cprm))
2047                 return 0;
2048     }
2049 
2050     return 1;
2051 }
2052 
2053 static void free_note_info(struct elf_note_info *info)
2054 {
2055     while (!list_empty(&info->thread_list)) {
2056         struct list_head *tmp = info->thread_list.next;
2057         list_del(tmp);
2058         kfree(list_entry(tmp, struct elf_thread_status, list));
2059     }
2060 
2061     /* Free data possibly allocated by fill_files_note(): */
2062     if (info->notes_files)
2063         vfree(info->notes_files->data);
2064 
2065     kfree(info->prstatus);
2066     kfree(info->psinfo);
2067     kfree(info->notes);
2068     kfree(info->fpu);
2069 #ifdef ELF_CORE_COPY_XFPREGS
2070     kfree(info->xfpu);
2071 #endif
2072 }
2073 
2074 #endif
2075 
2076 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2077                     struct vm_area_struct *gate_vma)
2078 {
2079     struct vm_area_struct *ret = tsk->mm->mmap;
2080 
2081     if (ret)
2082         return ret;
2083     return gate_vma;
2084 }
2085 /*
2086  * Helper function for iterating across a vma list.  It ensures that the caller
2087  * will visit `gate_vma' prior to terminating the search.
2088  */
2089 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2090                     struct vm_area_struct *gate_vma)
2091 {
2092     struct vm_area_struct *ret;
2093 
2094     ret = this_vma->vm_next;
2095     if (ret)
2096         return ret;
2097     if (this_vma == gate_vma)
2098         return NULL;
2099     return gate_vma;
2100 }
2101 
2102 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2103                  elf_addr_t e_shoff, int segs)
2104 {
2105     elf->e_shoff = e_shoff;
2106     elf->e_shentsize = sizeof(*shdr4extnum);
2107     elf->e_shnum = 1;
2108     elf->e_shstrndx = SHN_UNDEF;
2109 
2110     memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2111 
2112     shdr4extnum->sh_type = SHT_NULL;
2113     shdr4extnum->sh_size = elf->e_shnum;
2114     shdr4extnum->sh_link = elf->e_shstrndx;
2115     shdr4extnum->sh_info = segs;
2116 }
2117 
2118 /*
2119  * Actual dumper
2120  *
2121  * This is a two-pass process; first we find the offsets of the bits,
2122  * and then they are actually written out.  If we run out of core limit
2123  * we just truncate.
2124  */
2125 static int elf_core_dump(struct coredump_params *cprm)
2126 {
2127     int has_dumped = 0;
2128     mm_segment_t fs;
2129     int segs, i;
2130     size_t vma_data_size = 0;
2131     struct vm_area_struct *vma, *gate_vma;
2132     struct elfhdr *elf = NULL;
2133     loff_t offset = 0, dataoff;
2134     struct elf_note_info info = { };
2135     struct elf_phdr *phdr4note = NULL;
2136     struct elf_shdr *shdr4extnum = NULL;
2137     Elf_Half e_phnum;
2138     elf_addr_t e_shoff;
2139     elf_addr_t *vma_filesz = NULL;
2140 
2141     /*
2142      * We no longer stop all VM operations.
2143      * 
2144      * This is because those proceses that could possibly change map_count
2145      * or the mmap / vma pages are now blocked in do_exit on current
2146      * finishing this core dump.
2147      *
2148      * Only ptrace can touch these memory addresses, but it doesn't change
2149      * the map_count or the pages allocated. So no possibility of crashing
2150      * exists while dumping the mm->vm_next areas to the core file.
2151      */
2152   
2153     /* alloc memory for large data structures: too large to be on stack */
2154     elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2155     if (!elf)
2156         goto out;
2157     /*
2158      * The number of segs are recored into ELF header as 16bit value.
2159      * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2160      */
2161     segs = current->mm->map_count;
2162     segs += elf_core_extra_phdrs();
2163 
2164     gate_vma = get_gate_vma(current->mm);
2165     if (gate_vma != NULL)
2166         segs++;
2167 
2168     /* for notes section */
2169     segs++;
2170 
2171     /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2172      * this, kernel supports extended numbering. Have a look at
2173      * include/linux/elf.h for further information. */
2174     e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2175 
2176     /*
2177      * Collect all the non-memory information about the process for the
2178      * notes.  This also sets up the file header.
2179      */
2180     if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2181         goto cleanup;
2182 
2183     has_dumped = 1;
2184 
2185     fs = get_fs();
2186     set_fs(KERNEL_DS);
2187 
2188     offset += sizeof(*elf);             /* Elf header */
2189     offset += segs * sizeof(struct elf_phdr);   /* Program headers */
2190 
2191     /* Write notes phdr entry */
2192     {
2193         size_t sz = get_note_info_size(&info);
2194 
2195         sz += elf_coredump_extra_notes_size();
2196 
2197         phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2198         if (!phdr4note)
2199             goto end_coredump;
2200 
2201         fill_elf_note_phdr(phdr4note, sz, offset);
2202         offset += sz;
2203     }
2204 
2205     dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2206 
2207     if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2208         goto end_coredump;
2209     vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2210     if (!vma_filesz)
2211         goto end_coredump;
2212 
2213     for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2214             vma = next_vma(vma, gate_vma)) {
2215         unsigned long dump_size;
2216 
2217         dump_size = vma_dump_size(vma, cprm->mm_flags);
2218         vma_filesz[i++] = dump_size;
2219         vma_data_size += dump_size;
2220     }
2221 
2222     offset += vma_data_size;
2223     offset += elf_core_extra_data_size();
2224     e_shoff = offset;
2225 
2226     if (e_phnum == PN_XNUM) {
2227         shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2228         if (!shdr4extnum)
2229             goto end_coredump;
2230         fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2231     }
2232 
2233     offset = dataoff;
2234 
2235     if (!dump_emit(cprm, elf, sizeof(*elf)))
2236         goto end_coredump;
2237 
2238     if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2239         goto end_coredump;
2240 
2241     /* Write program headers for segments dump */
2242     for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2243             vma = next_vma(vma, gate_vma)) {
2244         struct elf_phdr phdr;
2245 
2246         phdr.p_type = PT_LOAD;
2247         phdr.p_offset = offset;
2248         phdr.p_vaddr = vma->vm_start;
2249         phdr.p_paddr = 0;
2250         phdr.p_filesz = vma_filesz[i++];
2251         phdr.p_memsz = vma->vm_end - vma->vm_start;
2252         offset += phdr.p_filesz;
2253         phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2254         if (vma->vm_flags & VM_WRITE)
2255             phdr.p_flags |= PF_W;
2256         if (vma->vm_flags & VM_EXEC)
2257             phdr.p_flags |= PF_X;
2258         phdr.p_align = ELF_EXEC_PAGESIZE;
2259 
2260         if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2261             goto end_coredump;
2262     }
2263 
2264     if (!elf_core_write_extra_phdrs(cprm, offset))
2265         goto end_coredump;
2266 
2267     /* write out the notes section */
2268     if (!write_note_info(&info, cprm))
2269         goto end_coredump;
2270 
2271     if (elf_coredump_extra_notes_write(cprm))
2272         goto end_coredump;
2273 
2274     /* Align to page */
2275     if (!dump_skip(cprm, dataoff - cprm->pos))
2276         goto end_coredump;
2277 
2278     for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2279             vma = next_vma(vma, gate_vma)) {
2280         unsigned long addr;
2281         unsigned long end;
2282 
2283         end = vma->vm_start + vma_filesz[i++];
2284 
2285         for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2286             struct page *page;
2287             int stop;
2288 
2289             page = get_dump_page(addr);
2290             if (page) {
2291                 void *kaddr = kmap(page);
2292                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2293                 kunmap(page);
2294                 put_page(page);
2295             } else
2296                 stop = !dump_skip(cprm, PAGE_SIZE);
2297             if (stop)
2298                 goto end_coredump;
2299         }
2300     }
2301     dump_truncate(cprm);
2302 
2303     if (!elf_core_write_extra_data(cprm))
2304         goto end_coredump;
2305 
2306     if (e_phnum == PN_XNUM) {
2307         if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2308             goto end_coredump;
2309     }
2310 
2311 end_coredump:
2312     set_fs(fs);
2313 
2314 cleanup:
2315     free_note_info(&info);
2316     kfree(shdr4extnum);
2317     vfree(vma_filesz);
2318     kfree(phdr4note);
2319     kfree(elf);
2320 out:
2321     return has_dumped;
2322 }
2323 
2324 #endif      /* CONFIG_ELF_CORE */
2325 
2326 static int __init init_elf_binfmt(void)
2327 {
2328     register_binfmt(&elf_format);
2329     return 0;
2330 }
2331 
2332 static void __exit exit_elf_binfmt(void)
2333 {
2334     /* Remove the COFF and ELF loaders. */
2335     unregister_binfmt(&elf_format);
2336 }
2337 
2338 core_initcall(init_elf_binfmt);
2339 module_exit(exit_elf_binfmt);
2340 MODULE_LICENSE("GPL");