0001
0002 #include <linux/debugfs.h>
0003 #include <linux/mm.h>
0004 #include <linux/slab.h>
0005 #include <linux/uaccess.h>
0006 #include <linux/memblock.h>
0007 #include <linux/stacktrace.h>
0008 #include <linux/page_owner.h>
0009 #include <linux/jump_label.h>
0010 #include <linux/migrate.h>
0011 #include <linux/stackdepot.h>
0012 #include <linux/seq_file.h>
0013 #include <linux/memcontrol.h>
0014 #include <linux/sched/clock.h>
0015
0016 #include "internal.h"
0017
0018
0019
0020
0021
0022 #define PAGE_OWNER_STACK_DEPTH (16)
0023
0024 struct page_owner {
0025 unsigned short order;
0026 short last_migrate_reason;
0027 gfp_t gfp_mask;
0028 depot_stack_handle_t handle;
0029 depot_stack_handle_t free_handle;
0030 u64 ts_nsec;
0031 u64 free_ts_nsec;
0032 char comm[TASK_COMM_LEN];
0033 pid_t pid;
0034 pid_t tgid;
0035 };
0036
0037 static bool page_owner_enabled __initdata;
0038 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
0039
0040 static depot_stack_handle_t dummy_handle;
0041 static depot_stack_handle_t failure_handle;
0042 static depot_stack_handle_t early_handle;
0043
0044 static void init_early_allocated_pages(void);
0045
0046 static int __init early_page_owner_param(char *buf)
0047 {
0048 int ret = kstrtobool(buf, &page_owner_enabled);
0049
0050 if (page_owner_enabled)
0051 stack_depot_want_early_init();
0052
0053 return ret;
0054 }
0055 early_param("page_owner", early_page_owner_param);
0056
0057 static __init bool need_page_owner(void)
0058 {
0059 return page_owner_enabled;
0060 }
0061
0062 static __always_inline depot_stack_handle_t create_dummy_stack(void)
0063 {
0064 unsigned long entries[4];
0065 unsigned int nr_entries;
0066
0067 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
0068 return stack_depot_save(entries, nr_entries, GFP_KERNEL);
0069 }
0070
0071 static noinline void register_dummy_stack(void)
0072 {
0073 dummy_handle = create_dummy_stack();
0074 }
0075
0076 static noinline void register_failure_stack(void)
0077 {
0078 failure_handle = create_dummy_stack();
0079 }
0080
0081 static noinline void register_early_stack(void)
0082 {
0083 early_handle = create_dummy_stack();
0084 }
0085
0086 static __init void init_page_owner(void)
0087 {
0088 if (!page_owner_enabled)
0089 return;
0090
0091 register_dummy_stack();
0092 register_failure_stack();
0093 register_early_stack();
0094 static_branch_enable(&page_owner_inited);
0095 init_early_allocated_pages();
0096 }
0097
0098 struct page_ext_operations page_owner_ops = {
0099 .size = sizeof(struct page_owner),
0100 .need = need_page_owner,
0101 .init = init_page_owner,
0102 };
0103
0104 static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
0105 {
0106 return (void *)page_ext + page_owner_ops.offset;
0107 }
0108
0109 static noinline depot_stack_handle_t save_stack(gfp_t flags)
0110 {
0111 unsigned long entries[PAGE_OWNER_STACK_DEPTH];
0112 depot_stack_handle_t handle;
0113 unsigned int nr_entries;
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123 if (current->in_page_owner)
0124 return dummy_handle;
0125 current->in_page_owner = 1;
0126
0127 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
0128 handle = stack_depot_save(entries, nr_entries, flags);
0129 if (!handle)
0130 handle = failure_handle;
0131
0132 current->in_page_owner = 0;
0133 return handle;
0134 }
0135
0136 void __reset_page_owner(struct page *page, unsigned short order)
0137 {
0138 int i;
0139 struct page_ext *page_ext;
0140 depot_stack_handle_t handle;
0141 struct page_owner *page_owner;
0142 u64 free_ts_nsec = local_clock();
0143
0144 page_ext = lookup_page_ext(page);
0145 if (unlikely(!page_ext))
0146 return;
0147
0148 handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
0149 for (i = 0; i < (1 << order); i++) {
0150 __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
0151 page_owner = get_page_owner(page_ext);
0152 page_owner->free_handle = handle;
0153 page_owner->free_ts_nsec = free_ts_nsec;
0154 page_ext = page_ext_next(page_ext);
0155 }
0156 }
0157
0158 static inline void __set_page_owner_handle(struct page_ext *page_ext,
0159 depot_stack_handle_t handle,
0160 unsigned short order, gfp_t gfp_mask)
0161 {
0162 struct page_owner *page_owner;
0163 int i;
0164
0165 for (i = 0; i < (1 << order); i++) {
0166 page_owner = get_page_owner(page_ext);
0167 page_owner->handle = handle;
0168 page_owner->order = order;
0169 page_owner->gfp_mask = gfp_mask;
0170 page_owner->last_migrate_reason = -1;
0171 page_owner->pid = current->pid;
0172 page_owner->tgid = current->tgid;
0173 page_owner->ts_nsec = local_clock();
0174 strscpy(page_owner->comm, current->comm,
0175 sizeof(page_owner->comm));
0176 __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
0177 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
0178
0179 page_ext = page_ext_next(page_ext);
0180 }
0181 }
0182
0183 noinline void __set_page_owner(struct page *page, unsigned short order,
0184 gfp_t gfp_mask)
0185 {
0186 struct page_ext *page_ext = lookup_page_ext(page);
0187 depot_stack_handle_t handle;
0188
0189 if (unlikely(!page_ext))
0190 return;
0191
0192 handle = save_stack(gfp_mask);
0193 __set_page_owner_handle(page_ext, handle, order, gfp_mask);
0194 }
0195
0196 void __set_page_owner_migrate_reason(struct page *page, int reason)
0197 {
0198 struct page_ext *page_ext = lookup_page_ext(page);
0199 struct page_owner *page_owner;
0200
0201 if (unlikely(!page_ext))
0202 return;
0203
0204 page_owner = get_page_owner(page_ext);
0205 page_owner->last_migrate_reason = reason;
0206 }
0207
0208 void __split_page_owner(struct page *page, unsigned int nr)
0209 {
0210 int i;
0211 struct page_ext *page_ext = lookup_page_ext(page);
0212 struct page_owner *page_owner;
0213
0214 if (unlikely(!page_ext))
0215 return;
0216
0217 for (i = 0; i < nr; i++) {
0218 page_owner = get_page_owner(page_ext);
0219 page_owner->order = 0;
0220 page_ext = page_ext_next(page_ext);
0221 }
0222 }
0223
0224 void __folio_copy_owner(struct folio *newfolio, struct folio *old)
0225 {
0226 struct page_ext *old_ext = lookup_page_ext(&old->page);
0227 struct page_ext *new_ext = lookup_page_ext(&newfolio->page);
0228 struct page_owner *old_page_owner, *new_page_owner;
0229
0230 if (unlikely(!old_ext || !new_ext))
0231 return;
0232
0233 old_page_owner = get_page_owner(old_ext);
0234 new_page_owner = get_page_owner(new_ext);
0235 new_page_owner->order = old_page_owner->order;
0236 new_page_owner->gfp_mask = old_page_owner->gfp_mask;
0237 new_page_owner->last_migrate_reason =
0238 old_page_owner->last_migrate_reason;
0239 new_page_owner->handle = old_page_owner->handle;
0240 new_page_owner->pid = old_page_owner->pid;
0241 new_page_owner->tgid = old_page_owner->tgid;
0242 new_page_owner->ts_nsec = old_page_owner->ts_nsec;
0243 new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
0244 strcpy(new_page_owner->comm, old_page_owner->comm);
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
0256 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
0257 }
0258
0259 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
0260 pg_data_t *pgdat, struct zone *zone)
0261 {
0262 struct page *page;
0263 struct page_ext *page_ext;
0264 struct page_owner *page_owner;
0265 unsigned long pfn, block_end_pfn;
0266 unsigned long end_pfn = zone_end_pfn(zone);
0267 unsigned long count[MIGRATE_TYPES] = { 0, };
0268 int pageblock_mt, page_mt;
0269 int i;
0270
0271
0272 pfn = zone->zone_start_pfn;
0273
0274
0275
0276
0277
0278
0279 for (; pfn < end_pfn; ) {
0280 page = pfn_to_online_page(pfn);
0281 if (!page) {
0282 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
0283 continue;
0284 }
0285
0286 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
0287 block_end_pfn = min(block_end_pfn, end_pfn);
0288
0289 pageblock_mt = get_pageblock_migratetype(page);
0290
0291 for (; pfn < block_end_pfn; pfn++) {
0292
0293 page = pfn_to_page(pfn);
0294
0295 if (page_zone(page) != zone)
0296 continue;
0297
0298 if (PageBuddy(page)) {
0299 unsigned long freepage_order;
0300
0301 freepage_order = buddy_order_unsafe(page);
0302 if (freepage_order < MAX_ORDER)
0303 pfn += (1UL << freepage_order) - 1;
0304 continue;
0305 }
0306
0307 if (PageReserved(page))
0308 continue;
0309
0310 page_ext = lookup_page_ext(page);
0311 if (unlikely(!page_ext))
0312 continue;
0313
0314 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
0315 continue;
0316
0317 page_owner = get_page_owner(page_ext);
0318 page_mt = gfp_migratetype(page_owner->gfp_mask);
0319 if (pageblock_mt != page_mt) {
0320 if (is_migrate_cma(pageblock_mt))
0321 count[MIGRATE_MOVABLE]++;
0322 else
0323 count[pageblock_mt]++;
0324
0325 pfn = block_end_pfn;
0326 break;
0327 }
0328 pfn += (1UL << page_owner->order) - 1;
0329 }
0330 }
0331
0332
0333 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
0334 for (i = 0; i < MIGRATE_TYPES; i++)
0335 seq_printf(m, "%12lu ", count[i]);
0336 seq_putc(m, '\n');
0337 }
0338
0339
0340
0341
0342 static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
0343 struct page *page)
0344 {
0345 #ifdef CONFIG_MEMCG
0346 unsigned long memcg_data;
0347 struct mem_cgroup *memcg;
0348 bool online;
0349 char name[80];
0350
0351 rcu_read_lock();
0352 memcg_data = READ_ONCE(page->memcg_data);
0353 if (!memcg_data)
0354 goto out_unlock;
0355
0356 if (memcg_data & MEMCG_DATA_OBJCGS)
0357 ret += scnprintf(kbuf + ret, count - ret,
0358 "Slab cache page\n");
0359
0360 memcg = page_memcg_check(page);
0361 if (!memcg)
0362 goto out_unlock;
0363
0364 online = (memcg->css.flags & CSS_ONLINE);
0365 cgroup_name(memcg->css.cgroup, name, sizeof(name));
0366 ret += scnprintf(kbuf + ret, count - ret,
0367 "Charged %sto %smemcg %s\n",
0368 PageMemcgKmem(page) ? "(via objcg) " : "",
0369 online ? "" : "offline ",
0370 name);
0371 out_unlock:
0372 rcu_read_unlock();
0373 #endif
0374
0375 return ret;
0376 }
0377
0378 static ssize_t
0379 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
0380 struct page *page, struct page_owner *page_owner,
0381 depot_stack_handle_t handle)
0382 {
0383 int ret, pageblock_mt, page_mt;
0384 char *kbuf;
0385
0386 count = min_t(size_t, count, PAGE_SIZE);
0387 kbuf = kmalloc(count, GFP_KERNEL);
0388 if (!kbuf)
0389 return -ENOMEM;
0390
0391 ret = scnprintf(kbuf, count,
0392 "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns, free_ts %llu ns\n",
0393 page_owner->order, page_owner->gfp_mask,
0394 &page_owner->gfp_mask, page_owner->pid,
0395 page_owner->tgid, page_owner->comm,
0396 page_owner->ts_nsec, page_owner->free_ts_nsec);
0397
0398
0399 pageblock_mt = get_pageblock_migratetype(page);
0400 page_mt = gfp_migratetype(page_owner->gfp_mask);
0401 ret += scnprintf(kbuf + ret, count - ret,
0402 "PFN %lu type %s Block %lu type %s Flags %pGp\n",
0403 pfn,
0404 migratetype_names[page_mt],
0405 pfn >> pageblock_order,
0406 migratetype_names[pageblock_mt],
0407 &page->flags);
0408
0409 ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
0410 if (ret >= count)
0411 goto err;
0412
0413 if (page_owner->last_migrate_reason != -1) {
0414 ret += scnprintf(kbuf + ret, count - ret,
0415 "Page has been migrated, last migrate reason: %s\n",
0416 migrate_reason_names[page_owner->last_migrate_reason]);
0417 }
0418
0419 ret = print_page_owner_memcg(kbuf, count, ret, page);
0420
0421 ret += snprintf(kbuf + ret, count - ret, "\n");
0422 if (ret >= count)
0423 goto err;
0424
0425 if (copy_to_user(buf, kbuf, ret))
0426 ret = -EFAULT;
0427
0428 kfree(kbuf);
0429 return ret;
0430
0431 err:
0432 kfree(kbuf);
0433 return -ENOMEM;
0434 }
0435
0436 void __dump_page_owner(const struct page *page)
0437 {
0438 struct page_ext *page_ext = lookup_page_ext(page);
0439 struct page_owner *page_owner;
0440 depot_stack_handle_t handle;
0441 gfp_t gfp_mask;
0442 int mt;
0443
0444 if (unlikely(!page_ext)) {
0445 pr_alert("There is not page extension available.\n");
0446 return;
0447 }
0448
0449 page_owner = get_page_owner(page_ext);
0450 gfp_mask = page_owner->gfp_mask;
0451 mt = gfp_migratetype(gfp_mask);
0452
0453 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
0454 pr_alert("page_owner info is not present (never set?)\n");
0455 return;
0456 }
0457
0458 if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
0459 pr_alert("page_owner tracks the page as allocated\n");
0460 else
0461 pr_alert("page_owner tracks the page as freed\n");
0462
0463 pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
0464 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
0465 page_owner->pid, page_owner->tgid, page_owner->comm,
0466 page_owner->ts_nsec, page_owner->free_ts_nsec);
0467
0468 handle = READ_ONCE(page_owner->handle);
0469 if (!handle)
0470 pr_alert("page_owner allocation stack trace missing\n");
0471 else
0472 stack_depot_print(handle);
0473
0474 handle = READ_ONCE(page_owner->free_handle);
0475 if (!handle) {
0476 pr_alert("page_owner free stack trace missing\n");
0477 } else {
0478 pr_alert("page last free stack trace:\n");
0479 stack_depot_print(handle);
0480 }
0481
0482 if (page_owner->last_migrate_reason != -1)
0483 pr_alert("page has been migrated, last migrate reason: %s\n",
0484 migrate_reason_names[page_owner->last_migrate_reason]);
0485 }
0486
0487 static ssize_t
0488 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
0489 {
0490 unsigned long pfn;
0491 struct page *page;
0492 struct page_ext *page_ext;
0493 struct page_owner *page_owner;
0494 depot_stack_handle_t handle;
0495
0496 if (!static_branch_unlikely(&page_owner_inited))
0497 return -EINVAL;
0498
0499 page = NULL;
0500 pfn = min_low_pfn + *ppos;
0501
0502
0503 while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
0504 pfn++;
0505
0506 drain_all_pages(NULL);
0507
0508
0509 for (; pfn < max_pfn; pfn++) {
0510
0511
0512
0513
0514 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
0515 pfn += MAX_ORDER_NR_PAGES - 1;
0516 continue;
0517 }
0518
0519 page = pfn_to_page(pfn);
0520 if (PageBuddy(page)) {
0521 unsigned long freepage_order = buddy_order_unsafe(page);
0522
0523 if (freepage_order < MAX_ORDER)
0524 pfn += (1UL << freepage_order) - 1;
0525 continue;
0526 }
0527
0528 page_ext = lookup_page_ext(page);
0529 if (unlikely(!page_ext))
0530 continue;
0531
0532
0533
0534
0535
0536 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
0537 continue;
0538
0539
0540
0541
0542
0543 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
0544 continue;
0545
0546 page_owner = get_page_owner(page_ext);
0547
0548
0549
0550
0551
0552 if (!IS_ALIGNED(pfn, 1 << page_owner->order))
0553 continue;
0554
0555
0556
0557
0558
0559 handle = READ_ONCE(page_owner->handle);
0560 if (!handle)
0561 continue;
0562
0563
0564 *ppos = (pfn - min_low_pfn) + 1;
0565
0566 return print_page_owner(buf, count, pfn, page,
0567 page_owner, handle);
0568 }
0569
0570 return 0;
0571 }
0572
0573 static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
0574 {
0575 unsigned long pfn = zone->zone_start_pfn;
0576 unsigned long end_pfn = zone_end_pfn(zone);
0577 unsigned long count = 0;
0578
0579
0580
0581
0582
0583
0584 for (; pfn < end_pfn; ) {
0585 unsigned long block_end_pfn;
0586
0587 if (!pfn_valid(pfn)) {
0588 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
0589 continue;
0590 }
0591
0592 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
0593 block_end_pfn = min(block_end_pfn, end_pfn);
0594
0595 for (; pfn < block_end_pfn; pfn++) {
0596 struct page *page = pfn_to_page(pfn);
0597 struct page_ext *page_ext;
0598
0599 if (page_zone(page) != zone)
0600 continue;
0601
0602
0603
0604
0605
0606
0607
0608
0609 if (PageBuddy(page)) {
0610 unsigned long order = buddy_order_unsafe(page);
0611
0612 if (order > 0 && order < MAX_ORDER)
0613 pfn += (1UL << order) - 1;
0614 continue;
0615 }
0616
0617 if (PageReserved(page))
0618 continue;
0619
0620 page_ext = lookup_page_ext(page);
0621 if (unlikely(!page_ext))
0622 continue;
0623
0624
0625 if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
0626 continue;
0627
0628
0629 __set_page_owner_handle(page_ext, early_handle,
0630 0, 0);
0631 count++;
0632 }
0633 cond_resched();
0634 }
0635
0636 pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
0637 pgdat->node_id, zone->name, count);
0638 }
0639
0640 static void init_zones_in_node(pg_data_t *pgdat)
0641 {
0642 struct zone *zone;
0643 struct zone *node_zones = pgdat->node_zones;
0644
0645 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
0646 if (!populated_zone(zone))
0647 continue;
0648
0649 init_pages_in_zone(pgdat, zone);
0650 }
0651 }
0652
0653 static void init_early_allocated_pages(void)
0654 {
0655 pg_data_t *pgdat;
0656
0657 for_each_online_pgdat(pgdat)
0658 init_zones_in_node(pgdat);
0659 }
0660
0661 static const struct file_operations proc_page_owner_operations = {
0662 .read = read_page_owner,
0663 };
0664
0665 static int __init pageowner_init(void)
0666 {
0667 if (!static_branch_unlikely(&page_owner_inited)) {
0668 pr_info("page_owner is disabled\n");
0669 return 0;
0670 }
0671
0672 debugfs_create_file("page_owner", 0400, NULL, NULL,
0673 &proc_page_owner_operations);
0674
0675 return 0;
0676 }
0677 late_initcall(pageowner_init)