0001
0002 #include <linux/pagewalk.h>
0003 #include <linux/vmacache.h>
0004 #include <linux/mm_inline.h>
0005 #include <linux/hugetlb.h>
0006 #include <linux/huge_mm.h>
0007 #include <linux/mount.h>
0008 #include <linux/seq_file.h>
0009 #include <linux/highmem.h>
0010 #include <linux/ptrace.h>
0011 #include <linux/slab.h>
0012 #include <linux/pagemap.h>
0013 #include <linux/mempolicy.h>
0014 #include <linux/rmap.h>
0015 #include <linux/swap.h>
0016 #include <linux/sched/mm.h>
0017 #include <linux/swapops.h>
0018 #include <linux/mmu_notifier.h>
0019 #include <linux/page_idle.h>
0020 #include <linux/shmem_fs.h>
0021 #include <linux/uaccess.h>
0022 #include <linux/pkeys.h>
0023
0024 #include <asm/elf.h>
0025 #include <asm/tlb.h>
0026 #include <asm/tlbflush.h>
0027 #include "internal.h"
0028
0029 #define SEQ_PUT_DEC(str, val) \
0030 seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
0031 void task_mem(struct seq_file *m, struct mm_struct *mm)
0032 {
0033 unsigned long text, lib, swap, anon, file, shmem;
0034 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
0035
0036 anon = get_mm_counter(mm, MM_ANONPAGES);
0037 file = get_mm_counter(mm, MM_FILEPAGES);
0038 shmem = get_mm_counter(mm, MM_SHMEMPAGES);
0039
0040
0041
0042
0043
0044
0045
0046
0047 hiwater_vm = total_vm = mm->total_vm;
0048 if (hiwater_vm < mm->hiwater_vm)
0049 hiwater_vm = mm->hiwater_vm;
0050 hiwater_rss = total_rss = anon + file + shmem;
0051 if (hiwater_rss < mm->hiwater_rss)
0052 hiwater_rss = mm->hiwater_rss;
0053
0054
0055 text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK);
0056 text = min(text, mm->exec_vm << PAGE_SHIFT);
0057 lib = (mm->exec_vm << PAGE_SHIFT) - text;
0058
0059 swap = get_mm_counter(mm, MM_SWAPENTS);
0060 SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
0061 SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
0062 SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
0063 SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
0064 SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
0065 SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
0066 SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
0067 SEQ_PUT_DEC(" kB\nRssFile:\t", file);
0068 SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
0069 SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
0070 SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
0071 seq_put_decimal_ull_width(m,
0072 " kB\nVmExe:\t", text >> 10, 8);
0073 seq_put_decimal_ull_width(m,
0074 " kB\nVmLib:\t", lib >> 10, 8);
0075 seq_put_decimal_ull_width(m,
0076 " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
0077 SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
0078 seq_puts(m, " kB\n");
0079 hugetlb_report_usage(m, mm);
0080 }
0081 #undef SEQ_PUT_DEC
0082
0083 unsigned long task_vsize(struct mm_struct *mm)
0084 {
0085 return PAGE_SIZE * mm->total_vm;
0086 }
0087
0088 unsigned long task_statm(struct mm_struct *mm,
0089 unsigned long *shared, unsigned long *text,
0090 unsigned long *data, unsigned long *resident)
0091 {
0092 *shared = get_mm_counter(mm, MM_FILEPAGES) +
0093 get_mm_counter(mm, MM_SHMEMPAGES);
0094 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
0095 >> PAGE_SHIFT;
0096 *data = mm->data_vm + mm->stack_vm;
0097 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
0098 return mm->total_vm;
0099 }
0100
0101 #ifdef CONFIG_NUMA
0102
0103
0104
0105 static void hold_task_mempolicy(struct proc_maps_private *priv)
0106 {
0107 struct task_struct *task = priv->task;
0108
0109 task_lock(task);
0110 priv->task_mempolicy = get_task_policy(task);
0111 mpol_get(priv->task_mempolicy);
0112 task_unlock(task);
0113 }
0114 static void release_task_mempolicy(struct proc_maps_private *priv)
0115 {
0116 mpol_put(priv->task_mempolicy);
0117 }
0118 #else
0119 static void hold_task_mempolicy(struct proc_maps_private *priv)
0120 {
0121 }
0122 static void release_task_mempolicy(struct proc_maps_private *priv)
0123 {
0124 }
0125 #endif
0126
0127 static void *m_start(struct seq_file *m, loff_t *ppos)
0128 {
0129 struct proc_maps_private *priv = m->private;
0130 unsigned long last_addr = *ppos;
0131 struct mm_struct *mm;
0132 struct vm_area_struct *vma;
0133
0134
0135 if (last_addr == -1UL)
0136 return NULL;
0137
0138 priv->task = get_proc_task(priv->inode);
0139 if (!priv->task)
0140 return ERR_PTR(-ESRCH);
0141
0142 mm = priv->mm;
0143 if (!mm || !mmget_not_zero(mm)) {
0144 put_task_struct(priv->task);
0145 priv->task = NULL;
0146 return NULL;
0147 }
0148
0149 if (mmap_read_lock_killable(mm)) {
0150 mmput(mm);
0151 put_task_struct(priv->task);
0152 priv->task = NULL;
0153 return ERR_PTR(-EINTR);
0154 }
0155
0156 hold_task_mempolicy(priv);
0157 priv->tail_vma = get_gate_vma(mm);
0158
0159 vma = find_vma(mm, last_addr);
0160 if (vma)
0161 return vma;
0162
0163 return priv->tail_vma;
0164 }
0165
0166 static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
0167 {
0168 struct proc_maps_private *priv = m->private;
0169 struct vm_area_struct *next, *vma = v;
0170
0171 if (vma == priv->tail_vma)
0172 next = NULL;
0173 else if (vma->vm_next)
0174 next = vma->vm_next;
0175 else
0176 next = priv->tail_vma;
0177
0178 *ppos = next ? next->vm_start : -1UL;
0179
0180 return next;
0181 }
0182
0183 static void m_stop(struct seq_file *m, void *v)
0184 {
0185 struct proc_maps_private *priv = m->private;
0186 struct mm_struct *mm = priv->mm;
0187
0188 if (!priv->task)
0189 return;
0190
0191 release_task_mempolicy(priv);
0192 mmap_read_unlock(mm);
0193 mmput(mm);
0194 put_task_struct(priv->task);
0195 priv->task = NULL;
0196 }
0197
0198 static int proc_maps_open(struct inode *inode, struct file *file,
0199 const struct seq_operations *ops, int psize)
0200 {
0201 struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
0202
0203 if (!priv)
0204 return -ENOMEM;
0205
0206 priv->inode = inode;
0207 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
0208 if (IS_ERR(priv->mm)) {
0209 int err = PTR_ERR(priv->mm);
0210
0211 seq_release_private(inode, file);
0212 return err;
0213 }
0214
0215 return 0;
0216 }
0217
0218 static int proc_map_release(struct inode *inode, struct file *file)
0219 {
0220 struct seq_file *seq = file->private_data;
0221 struct proc_maps_private *priv = seq->private;
0222
0223 if (priv->mm)
0224 mmdrop(priv->mm);
0225
0226 return seq_release_private(inode, file);
0227 }
0228
0229 static int do_maps_open(struct inode *inode, struct file *file,
0230 const struct seq_operations *ops)
0231 {
0232 return proc_maps_open(inode, file, ops,
0233 sizeof(struct proc_maps_private));
0234 }
0235
0236
0237
0238
0239
0240 static int is_stack(struct vm_area_struct *vma)
0241 {
0242
0243
0244
0245
0246
0247 return vma->vm_start <= vma->vm_mm->start_stack &&
0248 vma->vm_end >= vma->vm_mm->start_stack;
0249 }
0250
0251 static void show_vma_header_prefix(struct seq_file *m,
0252 unsigned long start, unsigned long end,
0253 vm_flags_t flags, unsigned long long pgoff,
0254 dev_t dev, unsigned long ino)
0255 {
0256 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
0257 seq_put_hex_ll(m, NULL, start, 8);
0258 seq_put_hex_ll(m, "-", end, 8);
0259 seq_putc(m, ' ');
0260 seq_putc(m, flags & VM_READ ? 'r' : '-');
0261 seq_putc(m, flags & VM_WRITE ? 'w' : '-');
0262 seq_putc(m, flags & VM_EXEC ? 'x' : '-');
0263 seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
0264 seq_put_hex_ll(m, " ", pgoff, 8);
0265 seq_put_hex_ll(m, " ", MAJOR(dev), 2);
0266 seq_put_hex_ll(m, ":", MINOR(dev), 2);
0267 seq_put_decimal_ull(m, " ", ino);
0268 seq_putc(m, ' ');
0269 }
0270
0271 static void
0272 show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
0273 {
0274 struct mm_struct *mm = vma->vm_mm;
0275 struct file *file = vma->vm_file;
0276 vm_flags_t flags = vma->vm_flags;
0277 unsigned long ino = 0;
0278 unsigned long long pgoff = 0;
0279 unsigned long start, end;
0280 dev_t dev = 0;
0281 const char *name = NULL;
0282
0283 if (file) {
0284 struct inode *inode = file_inode(vma->vm_file);
0285 dev = inode->i_sb->s_dev;
0286 ino = inode->i_ino;
0287 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
0288 }
0289
0290 start = vma->vm_start;
0291 end = vma->vm_end;
0292 show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
0293
0294
0295
0296
0297
0298 if (file) {
0299 seq_pad(m, ' ');
0300 seq_file_path(m, file, "\n");
0301 goto done;
0302 }
0303
0304 if (vma->vm_ops && vma->vm_ops->name) {
0305 name = vma->vm_ops->name(vma);
0306 if (name)
0307 goto done;
0308 }
0309
0310 name = arch_vma_name(vma);
0311 if (!name) {
0312 struct anon_vma_name *anon_name;
0313
0314 if (!mm) {
0315 name = "[vdso]";
0316 goto done;
0317 }
0318
0319 if (vma->vm_start <= mm->brk &&
0320 vma->vm_end >= mm->start_brk) {
0321 name = "[heap]";
0322 goto done;
0323 }
0324
0325 if (is_stack(vma)) {
0326 name = "[stack]";
0327 goto done;
0328 }
0329
0330 anon_name = anon_vma_name(vma);
0331 if (anon_name) {
0332 seq_pad(m, ' ');
0333 seq_printf(m, "[anon:%s]", anon_name->name);
0334 }
0335 }
0336
0337 done:
0338 if (name) {
0339 seq_pad(m, ' ');
0340 seq_puts(m, name);
0341 }
0342 seq_putc(m, '\n');
0343 }
0344
0345 static int show_map(struct seq_file *m, void *v)
0346 {
0347 show_map_vma(m, v);
0348 return 0;
0349 }
0350
0351 static const struct seq_operations proc_pid_maps_op = {
0352 .start = m_start,
0353 .next = m_next,
0354 .stop = m_stop,
0355 .show = show_map
0356 };
0357
0358 static int pid_maps_open(struct inode *inode, struct file *file)
0359 {
0360 return do_maps_open(inode, file, &proc_pid_maps_op);
0361 }
0362
0363 const struct file_operations proc_pid_maps_operations = {
0364 .open = pid_maps_open,
0365 .read = seq_read,
0366 .llseek = seq_lseek,
0367 .release = proc_map_release,
0368 };
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387 #define PSS_SHIFT 12
0388
0389 #ifdef CONFIG_PROC_PAGE_MONITOR
0390 struct mem_size_stats {
0391 unsigned long resident;
0392 unsigned long shared_clean;
0393 unsigned long shared_dirty;
0394 unsigned long private_clean;
0395 unsigned long private_dirty;
0396 unsigned long referenced;
0397 unsigned long anonymous;
0398 unsigned long lazyfree;
0399 unsigned long anonymous_thp;
0400 unsigned long shmem_thp;
0401 unsigned long file_thp;
0402 unsigned long swap;
0403 unsigned long shared_hugetlb;
0404 unsigned long private_hugetlb;
0405 u64 pss;
0406 u64 pss_anon;
0407 u64 pss_file;
0408 u64 pss_shmem;
0409 u64 pss_dirty;
0410 u64 pss_locked;
0411 u64 swap_pss;
0412 };
0413
0414 static void smaps_page_accumulate(struct mem_size_stats *mss,
0415 struct page *page, unsigned long size, unsigned long pss,
0416 bool dirty, bool locked, bool private)
0417 {
0418 mss->pss += pss;
0419
0420 if (PageAnon(page))
0421 mss->pss_anon += pss;
0422 else if (PageSwapBacked(page))
0423 mss->pss_shmem += pss;
0424 else
0425 mss->pss_file += pss;
0426
0427 if (locked)
0428 mss->pss_locked += pss;
0429
0430 if (dirty || PageDirty(page)) {
0431 mss->pss_dirty += pss;
0432 if (private)
0433 mss->private_dirty += size;
0434 else
0435 mss->shared_dirty += size;
0436 } else {
0437 if (private)
0438 mss->private_clean += size;
0439 else
0440 mss->shared_clean += size;
0441 }
0442 }
0443
0444 static void smaps_account(struct mem_size_stats *mss, struct page *page,
0445 bool compound, bool young, bool dirty, bool locked,
0446 bool migration)
0447 {
0448 int i, nr = compound ? compound_nr(page) : 1;
0449 unsigned long size = nr * PAGE_SIZE;
0450
0451
0452
0453
0454
0455 if (PageAnon(page)) {
0456 mss->anonymous += size;
0457 if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
0458 mss->lazyfree += size;
0459 }
0460
0461 mss->resident += size;
0462
0463 if (young || page_is_young(page) || PageReferenced(page))
0464 mss->referenced += size;
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481 if ((page_count(page) == 1) || migration) {
0482 smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
0483 locked, true);
0484 return;
0485 }
0486 for (i = 0; i < nr; i++, page++) {
0487 int mapcount = page_mapcount(page);
0488 unsigned long pss = PAGE_SIZE << PSS_SHIFT;
0489 if (mapcount >= 2)
0490 pss /= mapcount;
0491 smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
0492 mapcount < 2);
0493 }
0494 }
0495
0496 #ifdef CONFIG_SHMEM
0497 static int smaps_pte_hole(unsigned long addr, unsigned long end,
0498 __always_unused int depth, struct mm_walk *walk)
0499 {
0500 struct mem_size_stats *mss = walk->private;
0501 struct vm_area_struct *vma = walk->vma;
0502
0503 mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping,
0504 linear_page_index(vma, addr),
0505 linear_page_index(vma, end));
0506
0507 return 0;
0508 }
0509 #else
0510 #define smaps_pte_hole NULL
0511 #endif
0512
0513 static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk)
0514 {
0515 #ifdef CONFIG_SHMEM
0516 if (walk->ops->pte_hole) {
0517
0518 smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk);
0519 }
0520 #endif
0521 }
0522
0523 static void smaps_pte_entry(pte_t *pte, unsigned long addr,
0524 struct mm_walk *walk)
0525 {
0526 struct mem_size_stats *mss = walk->private;
0527 struct vm_area_struct *vma = walk->vma;
0528 bool locked = !!(vma->vm_flags & VM_LOCKED);
0529 struct page *page = NULL;
0530 bool migration = false, young = false, dirty = false;
0531
0532 if (pte_present(*pte)) {
0533 page = vm_normal_page(vma, addr, *pte);
0534 young = pte_young(*pte);
0535 dirty = pte_dirty(*pte);
0536 } else if (is_swap_pte(*pte)) {
0537 swp_entry_t swpent = pte_to_swp_entry(*pte);
0538
0539 if (!non_swap_entry(swpent)) {
0540 int mapcount;
0541
0542 mss->swap += PAGE_SIZE;
0543 mapcount = swp_swapcount(swpent);
0544 if (mapcount >= 2) {
0545 u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
0546
0547 do_div(pss_delta, mapcount);
0548 mss->swap_pss += pss_delta;
0549 } else {
0550 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
0551 }
0552 } else if (is_pfn_swap_entry(swpent)) {
0553 if (is_migration_entry(swpent))
0554 migration = true;
0555 page = pfn_swap_entry_to_page(swpent);
0556 }
0557 } else {
0558 smaps_pte_hole_lookup(addr, walk);
0559 return;
0560 }
0561
0562 if (!page)
0563 return;
0564
0565 smaps_account(mss, page, false, young, dirty, locked, migration);
0566 }
0567
0568 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0569 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
0570 struct mm_walk *walk)
0571 {
0572 struct mem_size_stats *mss = walk->private;
0573 struct vm_area_struct *vma = walk->vma;
0574 bool locked = !!(vma->vm_flags & VM_LOCKED);
0575 struct page *page = NULL;
0576 bool migration = false;
0577
0578 if (pmd_present(*pmd)) {
0579
0580 page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
0581 } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
0582 swp_entry_t entry = pmd_to_swp_entry(*pmd);
0583
0584 if (is_migration_entry(entry)) {
0585 migration = true;
0586 page = pfn_swap_entry_to_page(entry);
0587 }
0588 }
0589 if (IS_ERR_OR_NULL(page))
0590 return;
0591 if (PageAnon(page))
0592 mss->anonymous_thp += HPAGE_PMD_SIZE;
0593 else if (PageSwapBacked(page))
0594 mss->shmem_thp += HPAGE_PMD_SIZE;
0595 else if (is_zone_device_page(page))
0596 ;
0597 else
0598 mss->file_thp += HPAGE_PMD_SIZE;
0599
0600 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
0601 locked, migration);
0602 }
0603 #else
0604 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
0605 struct mm_walk *walk)
0606 {
0607 }
0608 #endif
0609
0610 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0611 struct mm_walk *walk)
0612 {
0613 struct vm_area_struct *vma = walk->vma;
0614 pte_t *pte;
0615 spinlock_t *ptl;
0616
0617 ptl = pmd_trans_huge_lock(pmd, vma);
0618 if (ptl) {
0619 smaps_pmd_entry(pmd, addr, walk);
0620 spin_unlock(ptl);
0621 goto out;
0622 }
0623
0624 if (pmd_trans_unstable(pmd))
0625 goto out;
0626
0627
0628
0629
0630
0631 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
0632 for (; addr != end; pte++, addr += PAGE_SIZE)
0633 smaps_pte_entry(pte, addr, walk);
0634 pte_unmap_unlock(pte - 1, ptl);
0635 out:
0636 cond_resched();
0637 return 0;
0638 }
0639
0640 static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
0641 {
0642
0643
0644
0645 static const char mnemonics[BITS_PER_LONG][2] = {
0646
0647
0648
0649 [0 ... (BITS_PER_LONG-1)] = "??",
0650
0651 [ilog2(VM_READ)] = "rd",
0652 [ilog2(VM_WRITE)] = "wr",
0653 [ilog2(VM_EXEC)] = "ex",
0654 [ilog2(VM_SHARED)] = "sh",
0655 [ilog2(VM_MAYREAD)] = "mr",
0656 [ilog2(VM_MAYWRITE)] = "mw",
0657 [ilog2(VM_MAYEXEC)] = "me",
0658 [ilog2(VM_MAYSHARE)] = "ms",
0659 [ilog2(VM_GROWSDOWN)] = "gd",
0660 [ilog2(VM_PFNMAP)] = "pf",
0661 [ilog2(VM_LOCKED)] = "lo",
0662 [ilog2(VM_IO)] = "io",
0663 [ilog2(VM_SEQ_READ)] = "sr",
0664 [ilog2(VM_RAND_READ)] = "rr",
0665 [ilog2(VM_DONTCOPY)] = "dc",
0666 [ilog2(VM_DONTEXPAND)] = "de",
0667 [ilog2(VM_ACCOUNT)] = "ac",
0668 [ilog2(VM_NORESERVE)] = "nr",
0669 [ilog2(VM_HUGETLB)] = "ht",
0670 [ilog2(VM_SYNC)] = "sf",
0671 [ilog2(VM_ARCH_1)] = "ar",
0672 [ilog2(VM_WIPEONFORK)] = "wf",
0673 [ilog2(VM_DONTDUMP)] = "dd",
0674 #ifdef CONFIG_ARM64_BTI
0675 [ilog2(VM_ARM64_BTI)] = "bt",
0676 #endif
0677 #ifdef CONFIG_MEM_SOFT_DIRTY
0678 [ilog2(VM_SOFTDIRTY)] = "sd",
0679 #endif
0680 [ilog2(VM_MIXEDMAP)] = "mm",
0681 [ilog2(VM_HUGEPAGE)] = "hg",
0682 [ilog2(VM_NOHUGEPAGE)] = "nh",
0683 [ilog2(VM_MERGEABLE)] = "mg",
0684 [ilog2(VM_UFFD_MISSING)]= "um",
0685 [ilog2(VM_UFFD_WP)] = "uw",
0686 #ifdef CONFIG_ARM64_MTE
0687 [ilog2(VM_MTE)] = "mt",
0688 [ilog2(VM_MTE_ALLOWED)] = "",
0689 #endif
0690 #ifdef CONFIG_ARCH_HAS_PKEYS
0691
0692 [ilog2(VM_PKEY_BIT0)] = "",
0693 [ilog2(VM_PKEY_BIT1)] = "",
0694 [ilog2(VM_PKEY_BIT2)] = "",
0695 [ilog2(VM_PKEY_BIT3)] = "",
0696 #if VM_PKEY_BIT4
0697 [ilog2(VM_PKEY_BIT4)] = "",
0698 #endif
0699 #endif
0700 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
0701 [ilog2(VM_UFFD_MINOR)] = "ui",
0702 #endif
0703 };
0704 size_t i;
0705
0706 seq_puts(m, "VmFlags: ");
0707 for (i = 0; i < BITS_PER_LONG; i++) {
0708 if (!mnemonics[i][0])
0709 continue;
0710 if (vma->vm_flags & (1UL << i)) {
0711 seq_putc(m, mnemonics[i][0]);
0712 seq_putc(m, mnemonics[i][1]);
0713 seq_putc(m, ' ');
0714 }
0715 }
0716 seq_putc(m, '\n');
0717 }
0718
0719 #ifdef CONFIG_HUGETLB_PAGE
0720 static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
0721 unsigned long addr, unsigned long end,
0722 struct mm_walk *walk)
0723 {
0724 struct mem_size_stats *mss = walk->private;
0725 struct vm_area_struct *vma = walk->vma;
0726 struct page *page = NULL;
0727
0728 if (pte_present(*pte)) {
0729 page = vm_normal_page(vma, addr, *pte);
0730 } else if (is_swap_pte(*pte)) {
0731 swp_entry_t swpent = pte_to_swp_entry(*pte);
0732
0733 if (is_pfn_swap_entry(swpent))
0734 page = pfn_swap_entry_to_page(swpent);
0735 }
0736 if (page) {
0737 int mapcount = page_mapcount(page);
0738
0739 if (mapcount >= 2)
0740 mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
0741 else
0742 mss->private_hugetlb += huge_page_size(hstate_vma(vma));
0743 }
0744 return 0;
0745 }
0746 #else
0747 #define smaps_hugetlb_range NULL
0748 #endif
0749
0750 static const struct mm_walk_ops smaps_walk_ops = {
0751 .pmd_entry = smaps_pte_range,
0752 .hugetlb_entry = smaps_hugetlb_range,
0753 };
0754
0755 static const struct mm_walk_ops smaps_shmem_walk_ops = {
0756 .pmd_entry = smaps_pte_range,
0757 .hugetlb_entry = smaps_hugetlb_range,
0758 .pte_hole = smaps_pte_hole,
0759 };
0760
0761
0762
0763
0764
0765
0766
0767 static void smap_gather_stats(struct vm_area_struct *vma,
0768 struct mem_size_stats *mss, unsigned long start)
0769 {
0770 const struct mm_walk_ops *ops = &smaps_walk_ops;
0771
0772
0773 if (start >= vma->vm_end)
0774 return;
0775
0776 #ifdef CONFIG_SHMEM
0777 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788 unsigned long shmem_swapped = shmem_swap_usage(vma);
0789
0790 if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
0791 !(vma->vm_flags & VM_WRITE))) {
0792 mss->swap += shmem_swapped;
0793 } else {
0794 ops = &smaps_shmem_walk_ops;
0795 }
0796 }
0797 #endif
0798
0799 if (!start)
0800 walk_page_vma(vma, ops, mss);
0801 else
0802 walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
0803 }
0804
0805 #define SEQ_PUT_DEC(str, val) \
0806 seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
0807
0808
0809 static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
0810 bool rollup_mode)
0811 {
0812 SEQ_PUT_DEC("Rss: ", mss->resident);
0813 SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
0814 SEQ_PUT_DEC(" kB\nPss_Dirty: ", mss->pss_dirty >> PSS_SHIFT);
0815 if (rollup_mode) {
0816
0817
0818
0819
0820 SEQ_PUT_DEC(" kB\nPss_Anon: ",
0821 mss->pss_anon >> PSS_SHIFT);
0822 SEQ_PUT_DEC(" kB\nPss_File: ",
0823 mss->pss_file >> PSS_SHIFT);
0824 SEQ_PUT_DEC(" kB\nPss_Shmem: ",
0825 mss->pss_shmem >> PSS_SHIFT);
0826 }
0827 SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
0828 SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
0829 SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
0830 SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
0831 SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
0832 SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
0833 SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
0834 SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
0835 SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
0836 SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
0837 SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
0838 seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
0839 mss->private_hugetlb >> 10, 7);
0840 SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
0841 SEQ_PUT_DEC(" kB\nSwapPss: ",
0842 mss->swap_pss >> PSS_SHIFT);
0843 SEQ_PUT_DEC(" kB\nLocked: ",
0844 mss->pss_locked >> PSS_SHIFT);
0845 seq_puts(m, " kB\n");
0846 }
0847
0848 static int show_smap(struct seq_file *m, void *v)
0849 {
0850 struct vm_area_struct *vma = v;
0851 struct mem_size_stats mss;
0852
0853 memset(&mss, 0, sizeof(mss));
0854
0855 smap_gather_stats(vma, &mss, 0);
0856
0857 show_map_vma(m, vma);
0858
0859 SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
0860 SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
0861 SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
0862 seq_puts(m, " kB\n");
0863
0864 __show_smap(m, &mss, false);
0865
0866 seq_printf(m, "THPeligible: %d\n",
0867 hugepage_vma_check(vma, vma->vm_flags, true, false));
0868
0869 if (arch_pkeys_enabled())
0870 seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
0871 show_smap_vma_flags(m, vma);
0872
0873 return 0;
0874 }
0875
0876 static int show_smaps_rollup(struct seq_file *m, void *v)
0877 {
0878 struct proc_maps_private *priv = m->private;
0879 struct mem_size_stats mss;
0880 struct mm_struct *mm;
0881 struct vm_area_struct *vma;
0882 unsigned long last_vma_end = 0;
0883 int ret = 0;
0884
0885 priv->task = get_proc_task(priv->inode);
0886 if (!priv->task)
0887 return -ESRCH;
0888
0889 mm = priv->mm;
0890 if (!mm || !mmget_not_zero(mm)) {
0891 ret = -ESRCH;
0892 goto out_put_task;
0893 }
0894
0895 memset(&mss, 0, sizeof(mss));
0896
0897 ret = mmap_read_lock_killable(mm);
0898 if (ret)
0899 goto out_put_mm;
0900
0901 hold_task_mempolicy(priv);
0902
0903 for (vma = priv->mm->mmap; vma;) {
0904 smap_gather_stats(vma, &mss, 0);
0905 last_vma_end = vma->vm_end;
0906
0907
0908
0909
0910
0911 if (mmap_lock_is_contended(mm)) {
0912 mmap_read_unlock(mm);
0913 ret = mmap_read_lock_killable(mm);
0914 if (ret) {
0915 release_task_mempolicy(priv);
0916 goto out_put_mm;
0917 }
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955 vma = find_vma(mm, last_vma_end - 1);
0956
0957 if (!vma)
0958 break;
0959
0960
0961 if (vma->vm_start >= last_vma_end)
0962 continue;
0963
0964
0965 if (vma->vm_end > last_vma_end)
0966 smap_gather_stats(vma, &mss, last_vma_end);
0967 }
0968
0969 vma = vma->vm_next;
0970 }
0971
0972 show_vma_header_prefix(m, priv->mm->mmap->vm_start,
0973 last_vma_end, 0, 0, 0, 0);
0974 seq_pad(m, ' ');
0975 seq_puts(m, "[rollup]\n");
0976
0977 __show_smap(m, &mss, true);
0978
0979 release_task_mempolicy(priv);
0980 mmap_read_unlock(mm);
0981
0982 out_put_mm:
0983 mmput(mm);
0984 out_put_task:
0985 put_task_struct(priv->task);
0986 priv->task = NULL;
0987
0988 return ret;
0989 }
0990 #undef SEQ_PUT_DEC
0991
0992 static const struct seq_operations proc_pid_smaps_op = {
0993 .start = m_start,
0994 .next = m_next,
0995 .stop = m_stop,
0996 .show = show_smap
0997 };
0998
0999 static int pid_smaps_open(struct inode *inode, struct file *file)
1000 {
1001 return do_maps_open(inode, file, &proc_pid_smaps_op);
1002 }
1003
1004 static int smaps_rollup_open(struct inode *inode, struct file *file)
1005 {
1006 int ret;
1007 struct proc_maps_private *priv;
1008
1009 priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
1010 if (!priv)
1011 return -ENOMEM;
1012
1013 ret = single_open(file, show_smaps_rollup, priv);
1014 if (ret)
1015 goto out_free;
1016
1017 priv->inode = inode;
1018 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
1019 if (IS_ERR(priv->mm)) {
1020 ret = PTR_ERR(priv->mm);
1021
1022 single_release(inode, file);
1023 goto out_free;
1024 }
1025
1026 return 0;
1027
1028 out_free:
1029 kfree(priv);
1030 return ret;
1031 }
1032
1033 static int smaps_rollup_release(struct inode *inode, struct file *file)
1034 {
1035 struct seq_file *seq = file->private_data;
1036 struct proc_maps_private *priv = seq->private;
1037
1038 if (priv->mm)
1039 mmdrop(priv->mm);
1040
1041 kfree(priv);
1042 return single_release(inode, file);
1043 }
1044
1045 const struct file_operations proc_pid_smaps_operations = {
1046 .open = pid_smaps_open,
1047 .read = seq_read,
1048 .llseek = seq_lseek,
1049 .release = proc_map_release,
1050 };
1051
1052 const struct file_operations proc_pid_smaps_rollup_operations = {
1053 .open = smaps_rollup_open,
1054 .read = seq_read,
1055 .llseek = seq_lseek,
1056 .release = smaps_rollup_release,
1057 };
1058
1059 enum clear_refs_types {
1060 CLEAR_REFS_ALL = 1,
1061 CLEAR_REFS_ANON,
1062 CLEAR_REFS_MAPPED,
1063 CLEAR_REFS_SOFT_DIRTY,
1064 CLEAR_REFS_MM_HIWATER_RSS,
1065 CLEAR_REFS_LAST,
1066 };
1067
1068 struct clear_refs_private {
1069 enum clear_refs_types type;
1070 };
1071
1072 #ifdef CONFIG_MEM_SOFT_DIRTY
1073
1074 static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1075 {
1076 struct page *page;
1077
1078 if (!pte_write(pte))
1079 return false;
1080 if (!is_cow_mapping(vma->vm_flags))
1081 return false;
1082 if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
1083 return false;
1084 page = vm_normal_page(vma, addr, pte);
1085 if (!page)
1086 return false;
1087 return page_maybe_dma_pinned(page);
1088 }
1089
1090 static inline void clear_soft_dirty(struct vm_area_struct *vma,
1091 unsigned long addr, pte_t *pte)
1092 {
1093
1094
1095
1096
1097
1098
1099 pte_t ptent = *pte;
1100
1101 if (pte_present(ptent)) {
1102 pte_t old_pte;
1103
1104 if (pte_is_pinned(vma, addr, ptent))
1105 return;
1106 old_pte = ptep_modify_prot_start(vma, addr, pte);
1107 ptent = pte_wrprotect(old_pte);
1108 ptent = pte_clear_soft_dirty(ptent);
1109 ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
1110 } else if (is_swap_pte(ptent)) {
1111 ptent = pte_swp_clear_soft_dirty(ptent);
1112 set_pte_at(vma->vm_mm, addr, pte, ptent);
1113 }
1114 }
1115 #else
1116 static inline void clear_soft_dirty(struct vm_area_struct *vma,
1117 unsigned long addr, pte_t *pte)
1118 {
1119 }
1120 #endif
1121
1122 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1123 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1124 unsigned long addr, pmd_t *pmdp)
1125 {
1126 pmd_t old, pmd = *pmdp;
1127
1128 if (pmd_present(pmd)) {
1129
1130 old = pmdp_invalidate(vma, addr, pmdp);
1131 if (pmd_dirty(old))
1132 pmd = pmd_mkdirty(pmd);
1133 if (pmd_young(old))
1134 pmd = pmd_mkyoung(pmd);
1135
1136 pmd = pmd_wrprotect(pmd);
1137 pmd = pmd_clear_soft_dirty(pmd);
1138
1139 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1140 } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
1141 pmd = pmd_swp_clear_soft_dirty(pmd);
1142 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1143 }
1144 }
1145 #else
1146 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1147 unsigned long addr, pmd_t *pmdp)
1148 {
1149 }
1150 #endif
1151
1152 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
1153 unsigned long end, struct mm_walk *walk)
1154 {
1155 struct clear_refs_private *cp = walk->private;
1156 struct vm_area_struct *vma = walk->vma;
1157 pte_t *pte, ptent;
1158 spinlock_t *ptl;
1159 struct page *page;
1160
1161 ptl = pmd_trans_huge_lock(pmd, vma);
1162 if (ptl) {
1163 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1164 clear_soft_dirty_pmd(vma, addr, pmd);
1165 goto out;
1166 }
1167
1168 if (!pmd_present(*pmd))
1169 goto out;
1170
1171 page = pmd_page(*pmd);
1172
1173
1174 pmdp_test_and_clear_young(vma, addr, pmd);
1175 test_and_clear_page_young(page);
1176 ClearPageReferenced(page);
1177 out:
1178 spin_unlock(ptl);
1179 return 0;
1180 }
1181
1182 if (pmd_trans_unstable(pmd))
1183 return 0;
1184
1185 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1186 for (; addr != end; pte++, addr += PAGE_SIZE) {
1187 ptent = *pte;
1188
1189 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1190 clear_soft_dirty(vma, addr, pte);
1191 continue;
1192 }
1193
1194 if (!pte_present(ptent))
1195 continue;
1196
1197 page = vm_normal_page(vma, addr, ptent);
1198 if (!page)
1199 continue;
1200
1201
1202 ptep_test_and_clear_young(vma, addr, pte);
1203 test_and_clear_page_young(page);
1204 ClearPageReferenced(page);
1205 }
1206 pte_unmap_unlock(pte - 1, ptl);
1207 cond_resched();
1208 return 0;
1209 }
1210
1211 static int clear_refs_test_walk(unsigned long start, unsigned long end,
1212 struct mm_walk *walk)
1213 {
1214 struct clear_refs_private *cp = walk->private;
1215 struct vm_area_struct *vma = walk->vma;
1216
1217 if (vma->vm_flags & VM_PFNMAP)
1218 return 1;
1219
1220
1221
1222
1223
1224
1225
1226 if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
1227 return 1;
1228 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
1229 return 1;
1230 return 0;
1231 }
1232
1233 static const struct mm_walk_ops clear_refs_walk_ops = {
1234 .pmd_entry = clear_refs_pte_range,
1235 .test_walk = clear_refs_test_walk,
1236 };
1237
1238 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
1239 size_t count, loff_t *ppos)
1240 {
1241 struct task_struct *task;
1242 char buffer[PROC_NUMBUF];
1243 struct mm_struct *mm;
1244 struct vm_area_struct *vma;
1245 enum clear_refs_types type;
1246 int itype;
1247 int rv;
1248
1249 memset(buffer, 0, sizeof(buffer));
1250 if (count > sizeof(buffer) - 1)
1251 count = sizeof(buffer) - 1;
1252 if (copy_from_user(buffer, buf, count))
1253 return -EFAULT;
1254 rv = kstrtoint(strstrip(buffer), 10, &itype);
1255 if (rv < 0)
1256 return rv;
1257 type = (enum clear_refs_types)itype;
1258 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
1259 return -EINVAL;
1260
1261 task = get_proc_task(file_inode(file));
1262 if (!task)
1263 return -ESRCH;
1264 mm = get_task_mm(task);
1265 if (mm) {
1266 struct mmu_notifier_range range;
1267 struct clear_refs_private cp = {
1268 .type = type,
1269 };
1270
1271 if (mmap_write_lock_killable(mm)) {
1272 count = -EINTR;
1273 goto out_mm;
1274 }
1275 if (type == CLEAR_REFS_MM_HIWATER_RSS) {
1276
1277
1278
1279
1280 reset_mm_hiwater_rss(mm);
1281 goto out_unlock;
1282 }
1283
1284 if (type == CLEAR_REFS_SOFT_DIRTY) {
1285 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1286 if (!(vma->vm_flags & VM_SOFTDIRTY))
1287 continue;
1288 vma->vm_flags &= ~VM_SOFTDIRTY;
1289 vma_set_page_prot(vma);
1290 }
1291
1292 inc_tlb_flush_pending(mm);
1293 mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
1294 0, NULL, mm, 0, -1UL);
1295 mmu_notifier_invalidate_range_start(&range);
1296 }
1297 walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
1298 &cp);
1299 if (type == CLEAR_REFS_SOFT_DIRTY) {
1300 mmu_notifier_invalidate_range_end(&range);
1301 flush_tlb_mm(mm);
1302 dec_tlb_flush_pending(mm);
1303 }
1304 out_unlock:
1305 mmap_write_unlock(mm);
1306 out_mm:
1307 mmput(mm);
1308 }
1309 put_task_struct(task);
1310
1311 return count;
1312 }
1313
1314 const struct file_operations proc_clear_refs_operations = {
1315 .write = clear_refs_write,
1316 .llseek = noop_llseek,
1317 };
1318
1319 typedef struct {
1320 u64 pme;
1321 } pagemap_entry_t;
1322
1323 struct pagemapread {
1324 int pos, len;
1325 pagemap_entry_t *buffer;
1326 bool show_pfn;
1327 };
1328
1329 #define PAGEMAP_WALK_SIZE (PMD_SIZE)
1330 #define PAGEMAP_WALK_MASK (PMD_MASK)
1331
1332 #define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
1333 #define PM_PFRAME_BITS 55
1334 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
1335 #define PM_SOFT_DIRTY BIT_ULL(55)
1336 #define PM_MMAP_EXCLUSIVE BIT_ULL(56)
1337 #define PM_UFFD_WP BIT_ULL(57)
1338 #define PM_FILE BIT_ULL(61)
1339 #define PM_SWAP BIT_ULL(62)
1340 #define PM_PRESENT BIT_ULL(63)
1341
1342 #define PM_END_OF_BUFFER 1
1343
1344 static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
1345 {
1346 return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
1347 }
1348
1349 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
1350 struct pagemapread *pm)
1351 {
1352 pm->buffer[pm->pos++] = *pme;
1353 if (pm->pos >= pm->len)
1354 return PM_END_OF_BUFFER;
1355 return 0;
1356 }
1357
1358 static int pagemap_pte_hole(unsigned long start, unsigned long end,
1359 __always_unused int depth, struct mm_walk *walk)
1360 {
1361 struct pagemapread *pm = walk->private;
1362 unsigned long addr = start;
1363 int err = 0;
1364
1365 while (addr < end) {
1366 struct vm_area_struct *vma = find_vma(walk->mm, addr);
1367 pagemap_entry_t pme = make_pme(0, 0);
1368
1369 unsigned long hole_end;
1370
1371 if (vma)
1372 hole_end = min(end, vma->vm_start);
1373 else
1374 hole_end = end;
1375
1376 for (; addr < hole_end; addr += PAGE_SIZE) {
1377 err = add_to_pagemap(addr, &pme, pm);
1378 if (err)
1379 goto out;
1380 }
1381
1382 if (!vma)
1383 break;
1384
1385
1386 if (vma->vm_flags & VM_SOFTDIRTY)
1387 pme = make_pme(0, PM_SOFT_DIRTY);
1388 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1389 err = add_to_pagemap(addr, &pme, pm);
1390 if (err)
1391 goto out;
1392 }
1393 }
1394 out:
1395 return err;
1396 }
1397
1398 static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1399 struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1400 {
1401 u64 frame = 0, flags = 0;
1402 struct page *page = NULL;
1403 bool migration = false;
1404
1405 if (pte_present(pte)) {
1406 if (pm->show_pfn)
1407 frame = pte_pfn(pte);
1408 flags |= PM_PRESENT;
1409 page = vm_normal_page(vma, addr, pte);
1410 if (pte_soft_dirty(pte))
1411 flags |= PM_SOFT_DIRTY;
1412 if (pte_uffd_wp(pte))
1413 flags |= PM_UFFD_WP;
1414 } else if (is_swap_pte(pte)) {
1415 swp_entry_t entry;
1416 if (pte_swp_soft_dirty(pte))
1417 flags |= PM_SOFT_DIRTY;
1418 if (pte_swp_uffd_wp(pte))
1419 flags |= PM_UFFD_WP;
1420 entry = pte_to_swp_entry(pte);
1421 if (pm->show_pfn)
1422 frame = swp_type(entry) |
1423 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1424 flags |= PM_SWAP;
1425 migration = is_migration_entry(entry);
1426 if (is_pfn_swap_entry(entry))
1427 page = pfn_swap_entry_to_page(entry);
1428 if (pte_marker_entry_uffd_wp(entry))
1429 flags |= PM_UFFD_WP;
1430 }
1431
1432 if (page && !PageAnon(page))
1433 flags |= PM_FILE;
1434 if (page && !migration && page_mapcount(page) == 1)
1435 flags |= PM_MMAP_EXCLUSIVE;
1436 if (vma->vm_flags & VM_SOFTDIRTY)
1437 flags |= PM_SOFT_DIRTY;
1438
1439 return make_pme(frame, flags);
1440 }
1441
1442 static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1443 struct mm_walk *walk)
1444 {
1445 struct vm_area_struct *vma = walk->vma;
1446 struct pagemapread *pm = walk->private;
1447 spinlock_t *ptl;
1448 pte_t *pte, *orig_pte;
1449 int err = 0;
1450 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1451 bool migration = false;
1452
1453 ptl = pmd_trans_huge_lock(pmdp, vma);
1454 if (ptl) {
1455 u64 flags = 0, frame = 0;
1456 pmd_t pmd = *pmdp;
1457 struct page *page = NULL;
1458
1459 if (vma->vm_flags & VM_SOFTDIRTY)
1460 flags |= PM_SOFT_DIRTY;
1461
1462 if (pmd_present(pmd)) {
1463 page = pmd_page(pmd);
1464
1465 flags |= PM_PRESENT;
1466 if (pmd_soft_dirty(pmd))
1467 flags |= PM_SOFT_DIRTY;
1468 if (pmd_uffd_wp(pmd))
1469 flags |= PM_UFFD_WP;
1470 if (pm->show_pfn)
1471 frame = pmd_pfn(pmd) +
1472 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1473 }
1474 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1475 else if (is_swap_pmd(pmd)) {
1476 swp_entry_t entry = pmd_to_swp_entry(pmd);
1477 unsigned long offset;
1478
1479 if (pm->show_pfn) {
1480 offset = swp_offset(entry) +
1481 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1482 frame = swp_type(entry) |
1483 (offset << MAX_SWAPFILES_SHIFT);
1484 }
1485 flags |= PM_SWAP;
1486 if (pmd_swp_soft_dirty(pmd))
1487 flags |= PM_SOFT_DIRTY;
1488 if (pmd_swp_uffd_wp(pmd))
1489 flags |= PM_UFFD_WP;
1490 VM_BUG_ON(!is_pmd_migration_entry(pmd));
1491 migration = is_migration_entry(entry);
1492 page = pfn_swap_entry_to_page(entry);
1493 }
1494 #endif
1495
1496 if (page && !migration && page_mapcount(page) == 1)
1497 flags |= PM_MMAP_EXCLUSIVE;
1498
1499 for (; addr != end; addr += PAGE_SIZE) {
1500 pagemap_entry_t pme = make_pme(frame, flags);
1501
1502 err = add_to_pagemap(addr, &pme, pm);
1503 if (err)
1504 break;
1505 if (pm->show_pfn) {
1506 if (flags & PM_PRESENT)
1507 frame++;
1508 else if (flags & PM_SWAP)
1509 frame += (1 << MAX_SWAPFILES_SHIFT);
1510 }
1511 }
1512 spin_unlock(ptl);
1513 return err;
1514 }
1515
1516 if (pmd_trans_unstable(pmdp))
1517 return 0;
1518 #endif
1519
1520
1521
1522
1523
1524 orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
1525 for (; addr < end; pte++, addr += PAGE_SIZE) {
1526 pagemap_entry_t pme;
1527
1528 pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
1529 err = add_to_pagemap(addr, &pme, pm);
1530 if (err)
1531 break;
1532 }
1533 pte_unmap_unlock(orig_pte, ptl);
1534
1535 cond_resched();
1536
1537 return err;
1538 }
1539
1540 #ifdef CONFIG_HUGETLB_PAGE
1541
1542 static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
1543 unsigned long addr, unsigned long end,
1544 struct mm_walk *walk)
1545 {
1546 struct pagemapread *pm = walk->private;
1547 struct vm_area_struct *vma = walk->vma;
1548 u64 flags = 0, frame = 0;
1549 int err = 0;
1550 pte_t pte;
1551
1552 if (vma->vm_flags & VM_SOFTDIRTY)
1553 flags |= PM_SOFT_DIRTY;
1554
1555 pte = huge_ptep_get(ptep);
1556 if (pte_present(pte)) {
1557 struct page *page = pte_page(pte);
1558
1559 if (!PageAnon(page))
1560 flags |= PM_FILE;
1561
1562 if (page_mapcount(page) == 1)
1563 flags |= PM_MMAP_EXCLUSIVE;
1564
1565 if (huge_pte_uffd_wp(pte))
1566 flags |= PM_UFFD_WP;
1567
1568 flags |= PM_PRESENT;
1569 if (pm->show_pfn)
1570 frame = pte_pfn(pte) +
1571 ((addr & ~hmask) >> PAGE_SHIFT);
1572 } else if (pte_swp_uffd_wp_any(pte)) {
1573 flags |= PM_UFFD_WP;
1574 }
1575
1576 for (; addr != end; addr += PAGE_SIZE) {
1577 pagemap_entry_t pme = make_pme(frame, flags);
1578
1579 err = add_to_pagemap(addr, &pme, pm);
1580 if (err)
1581 return err;
1582 if (pm->show_pfn && (flags & PM_PRESENT))
1583 frame++;
1584 }
1585
1586 cond_resched();
1587
1588 return err;
1589 }
1590 #else
1591 #define pagemap_hugetlb_range NULL
1592 #endif
1593
1594 static const struct mm_walk_ops pagemap_ops = {
1595 .pmd_entry = pagemap_pmd_range,
1596 .pte_hole = pagemap_pte_hole,
1597 .hugetlb_entry = pagemap_hugetlb_range,
1598 };
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627 static ssize_t pagemap_read(struct file *file, char __user *buf,
1628 size_t count, loff_t *ppos)
1629 {
1630 struct mm_struct *mm = file->private_data;
1631 struct pagemapread pm;
1632 unsigned long src;
1633 unsigned long svpfn;
1634 unsigned long start_vaddr;
1635 unsigned long end_vaddr;
1636 int ret = 0, copied = 0;
1637
1638 if (!mm || !mmget_not_zero(mm))
1639 goto out;
1640
1641 ret = -EINVAL;
1642
1643 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
1644 goto out_mm;
1645
1646 ret = 0;
1647 if (!count)
1648 goto out_mm;
1649
1650
1651 pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
1652
1653 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1654 pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
1655 ret = -ENOMEM;
1656 if (!pm.buffer)
1657 goto out_mm;
1658
1659 src = *ppos;
1660 svpfn = src / PM_ENTRY_BYTES;
1661 end_vaddr = mm->task_size;
1662
1663
1664 start_vaddr = end_vaddr;
1665 if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
1666 start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
1667
1668
1669 if (start_vaddr > mm->task_size)
1670 start_vaddr = end_vaddr;
1671
1672
1673
1674
1675
1676
1677
1678 ret = 0;
1679 while (count && (start_vaddr < end_vaddr)) {
1680 int len;
1681 unsigned long end;
1682
1683 pm.pos = 0;
1684 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
1685
1686 if (end < start_vaddr || end > end_vaddr)
1687 end = end_vaddr;
1688 ret = mmap_read_lock_killable(mm);
1689 if (ret)
1690 goto out_free;
1691 ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
1692 mmap_read_unlock(mm);
1693 start_vaddr = end;
1694
1695 len = min(count, PM_ENTRY_BYTES * pm.pos);
1696 if (copy_to_user(buf, pm.buffer, len)) {
1697 ret = -EFAULT;
1698 goto out_free;
1699 }
1700 copied += len;
1701 buf += len;
1702 count -= len;
1703 }
1704 *ppos += copied;
1705 if (!ret || ret == PM_END_OF_BUFFER)
1706 ret = copied;
1707
1708 out_free:
1709 kfree(pm.buffer);
1710 out_mm:
1711 mmput(mm);
1712 out:
1713 return ret;
1714 }
1715
1716 static int pagemap_open(struct inode *inode, struct file *file)
1717 {
1718 struct mm_struct *mm;
1719
1720 mm = proc_mem_open(inode, PTRACE_MODE_READ);
1721 if (IS_ERR(mm))
1722 return PTR_ERR(mm);
1723 file->private_data = mm;
1724 return 0;
1725 }
1726
1727 static int pagemap_release(struct inode *inode, struct file *file)
1728 {
1729 struct mm_struct *mm = file->private_data;
1730
1731 if (mm)
1732 mmdrop(mm);
1733 return 0;
1734 }
1735
1736 const struct file_operations proc_pagemap_operations = {
1737 .llseek = mem_lseek,
1738 .read = pagemap_read,
1739 .open = pagemap_open,
1740 .release = pagemap_release,
1741 };
1742 #endif
1743
1744 #ifdef CONFIG_NUMA
1745
1746 struct numa_maps {
1747 unsigned long pages;
1748 unsigned long anon;
1749 unsigned long active;
1750 unsigned long writeback;
1751 unsigned long mapcount_max;
1752 unsigned long dirty;
1753 unsigned long swapcache;
1754 unsigned long node[MAX_NUMNODES];
1755 };
1756
1757 struct numa_maps_private {
1758 struct proc_maps_private proc_maps;
1759 struct numa_maps md;
1760 };
1761
1762 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
1763 unsigned long nr_pages)
1764 {
1765 int count = page_mapcount(page);
1766
1767 md->pages += nr_pages;
1768 if (pte_dirty || PageDirty(page))
1769 md->dirty += nr_pages;
1770
1771 if (PageSwapCache(page))
1772 md->swapcache += nr_pages;
1773
1774 if (PageActive(page) || PageUnevictable(page))
1775 md->active += nr_pages;
1776
1777 if (PageWriteback(page))
1778 md->writeback += nr_pages;
1779
1780 if (PageAnon(page))
1781 md->anon += nr_pages;
1782
1783 if (count > md->mapcount_max)
1784 md->mapcount_max = count;
1785
1786 md->node[page_to_nid(page)] += nr_pages;
1787 }
1788
1789 static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
1790 unsigned long addr)
1791 {
1792 struct page *page;
1793 int nid;
1794
1795 if (!pte_present(pte))
1796 return NULL;
1797
1798 page = vm_normal_page(vma, addr, pte);
1799 if (!page || is_zone_device_page(page))
1800 return NULL;
1801
1802 if (PageReserved(page))
1803 return NULL;
1804
1805 nid = page_to_nid(page);
1806 if (!node_isset(nid, node_states[N_MEMORY]))
1807 return NULL;
1808
1809 return page;
1810 }
1811
1812 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1813 static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
1814 struct vm_area_struct *vma,
1815 unsigned long addr)
1816 {
1817 struct page *page;
1818 int nid;
1819
1820 if (!pmd_present(pmd))
1821 return NULL;
1822
1823 page = vm_normal_page_pmd(vma, addr, pmd);
1824 if (!page)
1825 return NULL;
1826
1827 if (PageReserved(page))
1828 return NULL;
1829
1830 nid = page_to_nid(page);
1831 if (!node_isset(nid, node_states[N_MEMORY]))
1832 return NULL;
1833
1834 return page;
1835 }
1836 #endif
1837
1838 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1839 unsigned long end, struct mm_walk *walk)
1840 {
1841 struct numa_maps *md = walk->private;
1842 struct vm_area_struct *vma = walk->vma;
1843 spinlock_t *ptl;
1844 pte_t *orig_pte;
1845 pte_t *pte;
1846
1847 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1848 ptl = pmd_trans_huge_lock(pmd, vma);
1849 if (ptl) {
1850 struct page *page;
1851
1852 page = can_gather_numa_stats_pmd(*pmd, vma, addr);
1853 if (page)
1854 gather_stats(page, md, pmd_dirty(*pmd),
1855 HPAGE_PMD_SIZE/PAGE_SIZE);
1856 spin_unlock(ptl);
1857 return 0;
1858 }
1859
1860 if (pmd_trans_unstable(pmd))
1861 return 0;
1862 #endif
1863 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1864 do {
1865 struct page *page = can_gather_numa_stats(*pte, vma, addr);
1866 if (!page)
1867 continue;
1868 gather_stats(page, md, pte_dirty(*pte), 1);
1869
1870 } while (pte++, addr += PAGE_SIZE, addr != end);
1871 pte_unmap_unlock(orig_pte, ptl);
1872 cond_resched();
1873 return 0;
1874 }
1875 #ifdef CONFIG_HUGETLB_PAGE
1876 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1877 unsigned long addr, unsigned long end, struct mm_walk *walk)
1878 {
1879 pte_t huge_pte = huge_ptep_get(pte);
1880 struct numa_maps *md;
1881 struct page *page;
1882
1883 if (!pte_present(huge_pte))
1884 return 0;
1885
1886 page = pte_page(huge_pte);
1887
1888 md = walk->private;
1889 gather_stats(page, md, pte_dirty(huge_pte), 1);
1890 return 0;
1891 }
1892
1893 #else
1894 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1895 unsigned long addr, unsigned long end, struct mm_walk *walk)
1896 {
1897 return 0;
1898 }
1899 #endif
1900
1901 static const struct mm_walk_ops show_numa_ops = {
1902 .hugetlb_entry = gather_hugetlb_stats,
1903 .pmd_entry = gather_pte_stats,
1904 };
1905
1906
1907
1908
1909 static int show_numa_map(struct seq_file *m, void *v)
1910 {
1911 struct numa_maps_private *numa_priv = m->private;
1912 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1913 struct vm_area_struct *vma = v;
1914 struct numa_maps *md = &numa_priv->md;
1915 struct file *file = vma->vm_file;
1916 struct mm_struct *mm = vma->vm_mm;
1917 struct mempolicy *pol;
1918 char buffer[64];
1919 int nid;
1920
1921 if (!mm)
1922 return 0;
1923
1924
1925 memset(md, 0, sizeof(*md));
1926
1927 pol = __get_vma_policy(vma, vma->vm_start);
1928 if (pol) {
1929 mpol_to_str(buffer, sizeof(buffer), pol);
1930 mpol_cond_put(pol);
1931 } else {
1932 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
1933 }
1934
1935 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1936
1937 if (file) {
1938 seq_puts(m, " file=");
1939 seq_file_path(m, file, "\n\t= ");
1940 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1941 seq_puts(m, " heap");
1942 } else if (is_stack(vma)) {
1943 seq_puts(m, " stack");
1944 }
1945
1946 if (is_vm_hugetlb_page(vma))
1947 seq_puts(m, " huge");
1948
1949
1950 walk_page_vma(vma, &show_numa_ops, md);
1951
1952 if (!md->pages)
1953 goto out;
1954
1955 if (md->anon)
1956 seq_printf(m, " anon=%lu", md->anon);
1957
1958 if (md->dirty)
1959 seq_printf(m, " dirty=%lu", md->dirty);
1960
1961 if (md->pages != md->anon && md->pages != md->dirty)
1962 seq_printf(m, " mapped=%lu", md->pages);
1963
1964 if (md->mapcount_max > 1)
1965 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1966
1967 if (md->swapcache)
1968 seq_printf(m, " swapcache=%lu", md->swapcache);
1969
1970 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1971 seq_printf(m, " active=%lu", md->active);
1972
1973 if (md->writeback)
1974 seq_printf(m, " writeback=%lu", md->writeback);
1975
1976 for_each_node_state(nid, N_MEMORY)
1977 if (md->node[nid])
1978 seq_printf(m, " N%d=%lu", nid, md->node[nid]);
1979
1980 seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
1981 out:
1982 seq_putc(m, '\n');
1983 return 0;
1984 }
1985
1986 static const struct seq_operations proc_pid_numa_maps_op = {
1987 .start = m_start,
1988 .next = m_next,
1989 .stop = m_stop,
1990 .show = show_numa_map,
1991 };
1992
1993 static int pid_numa_maps_open(struct inode *inode, struct file *file)
1994 {
1995 return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
1996 sizeof(struct numa_maps_private));
1997 }
1998
1999 const struct file_operations proc_pid_numa_maps_operations = {
2000 .open = pid_numa_maps_open,
2001 .read = seq_read,
2002 .llseek = seq_lseek,
2003 .release = proc_map_release,
2004 };
2005
2006 #endif