0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/crash_core.h>
0014 #include <linux/mm.h>
0015 #include <linux/proc_fs.h>
0016 #include <linux/kcore.h>
0017 #include <linux/user.h>
0018 #include <linux/capability.h>
0019 #include <linux/elf.h>
0020 #include <linux/elfcore.h>
0021 #include <linux/notifier.h>
0022 #include <linux/vmalloc.h>
0023 #include <linux/highmem.h>
0024 #include <linux/printk.h>
0025 #include <linux/memblock.h>
0026 #include <linux/init.h>
0027 #include <linux/slab.h>
0028 #include <linux/uaccess.h>
0029 #include <asm/io.h>
0030 #include <linux/list.h>
0031 #include <linux/ioport.h>
0032 #include <linux/memory.h>
0033 #include <linux/sched/task.h>
0034 #include <linux/security.h>
0035 #include <asm/sections.h>
0036 #include "internal.h"
0037
0038 #define CORE_STR "CORE"
0039
0040 #ifndef ELF_CORE_EFLAGS
0041 #define ELF_CORE_EFLAGS 0
0042 #endif
0043
0044 static struct proc_dir_entry *proc_root_kcore;
0045
0046
0047 #ifndef kc_vaddr_to_offset
0048 #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
0049 #endif
0050 #ifndef kc_offset_to_vaddr
0051 #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
0052 #endif
0053
0054 static LIST_HEAD(kclist_head);
0055 static DECLARE_RWSEM(kclist_lock);
0056 static int kcore_need_update = 1;
0057
0058
0059
0060
0061
0062 static int (*mem_pfn_is_ram)(unsigned long pfn);
0063
0064 int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
0065 {
0066 if (mem_pfn_is_ram)
0067 return -EBUSY;
0068 mem_pfn_is_ram = fn;
0069 return 0;
0070 }
0071
0072 static int pfn_is_ram(unsigned long pfn)
0073 {
0074 if (mem_pfn_is_ram)
0075 return mem_pfn_is_ram(pfn);
0076 else
0077 return 1;
0078 }
0079
0080
0081 void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
0082 int type)
0083 {
0084 new->addr = (unsigned long)addr;
0085 new->size = size;
0086 new->type = type;
0087
0088 list_add_tail(&new->list, &kclist_head);
0089 }
0090
0091 static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
0092 size_t *data_offset)
0093 {
0094 size_t try, size;
0095 struct kcore_list *m;
0096
0097 *nphdr = 1;
0098 size = 0;
0099
0100 list_for_each_entry(m, &kclist_head, list) {
0101 try = kc_vaddr_to_offset((size_t)m->addr + m->size);
0102 if (try > size)
0103 size = try;
0104 *nphdr = *nphdr + 1;
0105 }
0106
0107 *phdrs_len = *nphdr * sizeof(struct elf_phdr);
0108 *notes_len = (4 * sizeof(struct elf_note) +
0109 3 * ALIGN(sizeof(CORE_STR), 4) +
0110 VMCOREINFO_NOTE_NAME_BYTES +
0111 ALIGN(sizeof(struct elf_prstatus), 4) +
0112 ALIGN(sizeof(struct elf_prpsinfo), 4) +
0113 ALIGN(arch_task_struct_size, 4) +
0114 ALIGN(vmcoreinfo_size, 4));
0115 *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
0116 *notes_len);
0117 return *data_offset + size;
0118 }
0119
0120 #ifdef CONFIG_HIGHMEM
0121
0122
0123
0124
0125
0126 static int kcore_ram_list(struct list_head *head)
0127 {
0128 struct kcore_list *ent;
0129
0130 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
0131 if (!ent)
0132 return -ENOMEM;
0133 ent->addr = (unsigned long)__va(0);
0134 ent->size = max_low_pfn << PAGE_SHIFT;
0135 ent->type = KCORE_RAM;
0136 list_add(&ent->list, head);
0137 return 0;
0138 }
0139
0140 #else
0141
0142 #ifdef CONFIG_SPARSEMEM_VMEMMAP
0143
0144 static int
0145 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
0146 {
0147 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
0148 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
0149 unsigned long start, end;
0150 struct kcore_list *vmm, *tmp;
0151
0152
0153 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
0154 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
0155 end = PAGE_ALIGN(end);
0156
0157 list_for_each_entry(tmp, head, list) {
0158 if (tmp->type != KCORE_VMEMMAP)
0159 continue;
0160 if (start < tmp->addr + tmp->size)
0161 if (end > tmp->addr)
0162 end = tmp->addr;
0163 }
0164 if (start < end) {
0165 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
0166 if (!vmm)
0167 return 0;
0168 vmm->addr = start;
0169 vmm->size = end - start;
0170 vmm->type = KCORE_VMEMMAP;
0171 list_add_tail(&vmm->list, head);
0172 }
0173 return 1;
0174
0175 }
0176 #else
0177 static int
0178 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
0179 {
0180 return 1;
0181 }
0182
0183 #endif
0184
0185 static int
0186 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
0187 {
0188 struct list_head *head = (struct list_head *)arg;
0189 struct kcore_list *ent;
0190 struct page *p;
0191
0192 if (!pfn_valid(pfn))
0193 return 1;
0194
0195 p = pfn_to_page(pfn);
0196
0197 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
0198 if (!ent)
0199 return -ENOMEM;
0200 ent->addr = (unsigned long)page_to_virt(p);
0201 ent->size = nr_pages << PAGE_SHIFT;
0202
0203 if (!virt_addr_valid(ent->addr))
0204 goto free_out;
0205
0206
0207 if (ULONG_MAX - ent->addr < ent->size)
0208 ent->size = ULONG_MAX - ent->addr;
0209
0210
0211
0212
0213
0214
0215 if (VMALLOC_START > ent->addr) {
0216 if (VMALLOC_START - ent->addr < ent->size)
0217 ent->size = VMALLOC_START - ent->addr;
0218 }
0219
0220 ent->type = KCORE_RAM;
0221 list_add_tail(&ent->list, head);
0222
0223 if (!get_sparsemem_vmemmap_info(ent, head)) {
0224 list_del(&ent->list);
0225 goto free_out;
0226 }
0227
0228 return 0;
0229 free_out:
0230 kfree(ent);
0231 return 1;
0232 }
0233
0234 static int kcore_ram_list(struct list_head *list)
0235 {
0236 int nid, ret;
0237 unsigned long end_pfn;
0238
0239
0240
0241 end_pfn = 0;
0242 for_each_node_state(nid, N_MEMORY) {
0243 unsigned long node_end;
0244 node_end = node_end_pfn(nid);
0245 if (end_pfn < node_end)
0246 end_pfn = node_end;
0247 }
0248
0249 ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
0250 if (ret)
0251 return -ENOMEM;
0252 return 0;
0253 }
0254 #endif
0255
0256 static int kcore_update_ram(void)
0257 {
0258 LIST_HEAD(list);
0259 LIST_HEAD(garbage);
0260 int nphdr;
0261 size_t phdrs_len, notes_len, data_offset;
0262 struct kcore_list *tmp, *pos;
0263 int ret = 0;
0264
0265 down_write(&kclist_lock);
0266 if (!xchg(&kcore_need_update, 0))
0267 goto out;
0268
0269 ret = kcore_ram_list(&list);
0270 if (ret) {
0271
0272 WRITE_ONCE(kcore_need_update, 1);
0273 list_splice_tail(&list, &garbage);
0274 goto out;
0275 }
0276
0277 list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
0278 if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
0279 list_move(&pos->list, &garbage);
0280 }
0281 list_splice_tail(&list, &kclist_head);
0282
0283 proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len,
0284 &data_offset);
0285
0286 out:
0287 up_write(&kclist_lock);
0288 list_for_each_entry_safe(pos, tmp, &garbage, list) {
0289 list_del(&pos->list);
0290 kfree(pos);
0291 }
0292 return ret;
0293 }
0294
0295 static void append_kcore_note(char *notes, size_t *i, const char *name,
0296 unsigned int type, const void *desc,
0297 size_t descsz)
0298 {
0299 struct elf_note *note = (struct elf_note *)¬es[*i];
0300
0301 note->n_namesz = strlen(name) + 1;
0302 note->n_descsz = descsz;
0303 note->n_type = type;
0304 *i += sizeof(*note);
0305 memcpy(¬es[*i], name, note->n_namesz);
0306 *i = ALIGN(*i + note->n_namesz, 4);
0307 memcpy(¬es[*i], desc, descsz);
0308 *i = ALIGN(*i + descsz, 4);
0309 }
0310
0311 static ssize_t
0312 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
0313 {
0314 char *buf = file->private_data;
0315 size_t phdrs_offset, notes_offset, data_offset;
0316 size_t page_offline_frozen = 1;
0317 size_t phdrs_len, notes_len;
0318 struct kcore_list *m;
0319 size_t tsz;
0320 int nphdr;
0321 unsigned long start;
0322 size_t orig_buflen = buflen;
0323 int ret = 0;
0324
0325 down_read(&kclist_lock);
0326
0327
0328
0329
0330 page_offline_freeze();
0331
0332 get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset);
0333 phdrs_offset = sizeof(struct elfhdr);
0334 notes_offset = phdrs_offset + phdrs_len;
0335
0336
0337 if (buflen && *fpos < sizeof(struct elfhdr)) {
0338 struct elfhdr ehdr = {
0339 .e_ident = {
0340 [EI_MAG0] = ELFMAG0,
0341 [EI_MAG1] = ELFMAG1,
0342 [EI_MAG2] = ELFMAG2,
0343 [EI_MAG3] = ELFMAG3,
0344 [EI_CLASS] = ELF_CLASS,
0345 [EI_DATA] = ELF_DATA,
0346 [EI_VERSION] = EV_CURRENT,
0347 [EI_OSABI] = ELF_OSABI,
0348 },
0349 .e_type = ET_CORE,
0350 .e_machine = ELF_ARCH,
0351 .e_version = EV_CURRENT,
0352 .e_phoff = sizeof(struct elfhdr),
0353 .e_flags = ELF_CORE_EFLAGS,
0354 .e_ehsize = sizeof(struct elfhdr),
0355 .e_phentsize = sizeof(struct elf_phdr),
0356 .e_phnum = nphdr,
0357 };
0358
0359 tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
0360 if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
0361 ret = -EFAULT;
0362 goto out;
0363 }
0364
0365 buffer += tsz;
0366 buflen -= tsz;
0367 *fpos += tsz;
0368 }
0369
0370
0371 if (buflen && *fpos < phdrs_offset + phdrs_len) {
0372 struct elf_phdr *phdrs, *phdr;
0373
0374 phdrs = kzalloc(phdrs_len, GFP_KERNEL);
0375 if (!phdrs) {
0376 ret = -ENOMEM;
0377 goto out;
0378 }
0379
0380 phdrs[0].p_type = PT_NOTE;
0381 phdrs[0].p_offset = notes_offset;
0382 phdrs[0].p_filesz = notes_len;
0383
0384 phdr = &phdrs[1];
0385 list_for_each_entry(m, &kclist_head, list) {
0386 phdr->p_type = PT_LOAD;
0387 phdr->p_flags = PF_R | PF_W | PF_X;
0388 phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
0389 phdr->p_vaddr = (size_t)m->addr;
0390 if (m->type == KCORE_RAM)
0391 phdr->p_paddr = __pa(m->addr);
0392 else if (m->type == KCORE_TEXT)
0393 phdr->p_paddr = __pa_symbol(m->addr);
0394 else
0395 phdr->p_paddr = (elf_addr_t)-1;
0396 phdr->p_filesz = phdr->p_memsz = m->size;
0397 phdr->p_align = PAGE_SIZE;
0398 phdr++;
0399 }
0400
0401 tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
0402 if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
0403 tsz)) {
0404 kfree(phdrs);
0405 ret = -EFAULT;
0406 goto out;
0407 }
0408 kfree(phdrs);
0409
0410 buffer += tsz;
0411 buflen -= tsz;
0412 *fpos += tsz;
0413 }
0414
0415
0416 if (buflen && *fpos < notes_offset + notes_len) {
0417 struct elf_prstatus prstatus = {};
0418 struct elf_prpsinfo prpsinfo = {
0419 .pr_sname = 'R',
0420 .pr_fname = "vmlinux",
0421 };
0422 char *notes;
0423 size_t i = 0;
0424
0425 strlcpy(prpsinfo.pr_psargs, saved_command_line,
0426 sizeof(prpsinfo.pr_psargs));
0427
0428 notes = kzalloc(notes_len, GFP_KERNEL);
0429 if (!notes) {
0430 ret = -ENOMEM;
0431 goto out;
0432 }
0433
0434 append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
0435 sizeof(prstatus));
0436 append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
0437 sizeof(prpsinfo));
0438 append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
0439 arch_task_struct_size);
0440
0441
0442
0443
0444
0445
0446
0447 append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
0448 vmcoreinfo_data,
0449 min(vmcoreinfo_size, notes_len - i));
0450
0451 tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
0452 if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
0453 kfree(notes);
0454 ret = -EFAULT;
0455 goto out;
0456 }
0457 kfree(notes);
0458
0459 buffer += tsz;
0460 buflen -= tsz;
0461 *fpos += tsz;
0462 }
0463
0464
0465
0466
0467
0468 start = kc_offset_to_vaddr(*fpos - data_offset);
0469 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
0470 tsz = buflen;
0471
0472 m = NULL;
0473 while (buflen) {
0474 struct page *page;
0475 unsigned long pfn;
0476
0477
0478
0479
0480
0481 if (!m || start < m->addr || start >= m->addr + m->size) {
0482 struct kcore_list *iter;
0483
0484 m = NULL;
0485 list_for_each_entry(iter, &kclist_head, list) {
0486 if (start >= iter->addr &&
0487 start < iter->addr + iter->size) {
0488 m = iter;
0489 break;
0490 }
0491 }
0492 }
0493
0494 if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) {
0495 page_offline_thaw();
0496 cond_resched();
0497 page_offline_freeze();
0498 }
0499
0500 if (!m) {
0501 if (clear_user(buffer, tsz)) {
0502 ret = -EFAULT;
0503 goto out;
0504 }
0505 goto skip;
0506 }
0507
0508 switch (m->type) {
0509 case KCORE_VMALLOC:
0510 vread(buf, (char *)start, tsz);
0511
0512 if (copy_to_user(buffer, buf, tsz)) {
0513 ret = -EFAULT;
0514 goto out;
0515 }
0516 break;
0517 case KCORE_USER:
0518
0519 if (copy_to_user(buffer, (char *)start, tsz)) {
0520 ret = -EFAULT;
0521 goto out;
0522 }
0523 break;
0524 case KCORE_RAM:
0525 pfn = __pa(start) >> PAGE_SHIFT;
0526 page = pfn_to_online_page(pfn);
0527
0528
0529
0530
0531
0532
0533 if (!page || PageOffline(page) ||
0534 is_page_hwpoison(page) || !pfn_is_ram(pfn)) {
0535 if (clear_user(buffer, tsz)) {
0536 ret = -EFAULT;
0537 goto out;
0538 }
0539 break;
0540 }
0541 fallthrough;
0542 case KCORE_VMEMMAP:
0543 case KCORE_TEXT:
0544 if (kern_addr_valid(start)) {
0545
0546
0547
0548
0549 if (copy_from_kernel_nofault(buf, (void *)start,
0550 tsz)) {
0551 if (clear_user(buffer, tsz)) {
0552 ret = -EFAULT;
0553 goto out;
0554 }
0555 } else {
0556 if (copy_to_user(buffer, buf, tsz)) {
0557 ret = -EFAULT;
0558 goto out;
0559 }
0560 }
0561 } else {
0562 if (clear_user(buffer, tsz)) {
0563 ret = -EFAULT;
0564 goto out;
0565 }
0566 }
0567 break;
0568 default:
0569 pr_warn_once("Unhandled KCORE type: %d\n", m->type);
0570 if (clear_user(buffer, tsz)) {
0571 ret = -EFAULT;
0572 goto out;
0573 }
0574 }
0575 skip:
0576 buflen -= tsz;
0577 *fpos += tsz;
0578 buffer += tsz;
0579 start += tsz;
0580 tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
0581 }
0582
0583 out:
0584 page_offline_thaw();
0585 up_read(&kclist_lock);
0586 if (ret)
0587 return ret;
0588 return orig_buflen - buflen;
0589 }
0590
0591 static int open_kcore(struct inode *inode, struct file *filp)
0592 {
0593 int ret = security_locked_down(LOCKDOWN_KCORE);
0594
0595 if (!capable(CAP_SYS_RAWIO))
0596 return -EPERM;
0597
0598 if (ret)
0599 return ret;
0600
0601 filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
0602 if (!filp->private_data)
0603 return -ENOMEM;
0604
0605 if (kcore_need_update)
0606 kcore_update_ram();
0607 if (i_size_read(inode) != proc_root_kcore->size) {
0608 inode_lock(inode);
0609 i_size_write(inode, proc_root_kcore->size);
0610 inode_unlock(inode);
0611 }
0612 return 0;
0613 }
0614
0615 static int release_kcore(struct inode *inode, struct file *file)
0616 {
0617 kfree(file->private_data);
0618 return 0;
0619 }
0620
0621 static const struct proc_ops kcore_proc_ops = {
0622 .proc_read = read_kcore,
0623 .proc_open = open_kcore,
0624 .proc_release = release_kcore,
0625 .proc_lseek = default_llseek,
0626 };
0627
0628
0629 static int __meminit kcore_callback(struct notifier_block *self,
0630 unsigned long action, void *arg)
0631 {
0632 switch (action) {
0633 case MEM_ONLINE:
0634 case MEM_OFFLINE:
0635 kcore_need_update = 1;
0636 break;
0637 }
0638 return NOTIFY_OK;
0639 }
0640
0641 static struct notifier_block kcore_callback_nb __meminitdata = {
0642 .notifier_call = kcore_callback,
0643 .priority = 0,
0644 };
0645
0646 static struct kcore_list kcore_vmalloc;
0647
0648 #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
0649 static struct kcore_list kcore_text;
0650
0651
0652
0653
0654 static void __init proc_kcore_text_init(void)
0655 {
0656 kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
0657 }
0658 #else
0659 static void __init proc_kcore_text_init(void)
0660 {
0661 }
0662 #endif
0663
0664 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
0665
0666
0667
0668 static struct kcore_list kcore_modules;
0669 static void __init add_modules_range(void)
0670 {
0671 if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
0672 kclist_add(&kcore_modules, (void *)MODULES_VADDR,
0673 MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
0674 }
0675 }
0676 #else
0677 static void __init add_modules_range(void)
0678 {
0679 }
0680 #endif
0681
0682 static int __init proc_kcore_init(void)
0683 {
0684 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops);
0685 if (!proc_root_kcore) {
0686 pr_err("couldn't create /proc/kcore\n");
0687 return 0;
0688 }
0689
0690 proc_kcore_text_init();
0691
0692 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
0693 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
0694 add_modules_range();
0695
0696 kcore_update_ram();
0697 register_hotmemory_notifier(&kcore_callback_nb);
0698
0699 return 0;
0700 }
0701 fs_initcall(proc_kcore_init);