Back to home page

LXR

 
 

    


0001 #include <linux/debugfs.h>
0002 #include <linux/mm.h>
0003 #include <linux/slab.h>
0004 #include <linux/uaccess.h>
0005 #include <linux/bootmem.h>
0006 #include <linux/stacktrace.h>
0007 #include <linux/page_owner.h>
0008 #include <linux/jump_label.h>
0009 #include <linux/migrate.h>
0010 #include <linux/stackdepot.h>
0011 #include <linux/seq_file.h>
0012 
0013 #include "internal.h"
0014 
0015 /*
0016  * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
0017  * to use off stack temporal storage
0018  */
0019 #define PAGE_OWNER_STACK_DEPTH (16)
0020 
0021 struct page_owner {
0022     unsigned int order;
0023     gfp_t gfp_mask;
0024     int last_migrate_reason;
0025     depot_stack_handle_t handle;
0026 };
0027 
0028 static bool page_owner_disabled = true;
0029 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
0030 
0031 static depot_stack_handle_t dummy_handle;
0032 static depot_stack_handle_t failure_handle;
0033 
0034 static void init_early_allocated_pages(void);
0035 
0036 static int early_page_owner_param(char *buf)
0037 {
0038     if (!buf)
0039         return -EINVAL;
0040 
0041     if (strcmp(buf, "on") == 0)
0042         page_owner_disabled = false;
0043 
0044     return 0;
0045 }
0046 early_param("page_owner", early_page_owner_param);
0047 
0048 static bool need_page_owner(void)
0049 {
0050     if (page_owner_disabled)
0051         return false;
0052 
0053     return true;
0054 }
0055 
0056 static noinline void register_dummy_stack(void)
0057 {
0058     unsigned long entries[4];
0059     struct stack_trace dummy;
0060 
0061     dummy.nr_entries = 0;
0062     dummy.max_entries = ARRAY_SIZE(entries);
0063     dummy.entries = &entries[0];
0064     dummy.skip = 0;
0065 
0066     save_stack_trace(&dummy);
0067     dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
0068 }
0069 
0070 static noinline void register_failure_stack(void)
0071 {
0072     unsigned long entries[4];
0073     struct stack_trace failure;
0074 
0075     failure.nr_entries = 0;
0076     failure.max_entries = ARRAY_SIZE(entries);
0077     failure.entries = &entries[0];
0078     failure.skip = 0;
0079 
0080     save_stack_trace(&failure);
0081     failure_handle = depot_save_stack(&failure, GFP_KERNEL);
0082 }
0083 
0084 static void init_page_owner(void)
0085 {
0086     if (page_owner_disabled)
0087         return;
0088 
0089     register_dummy_stack();
0090     register_failure_stack();
0091     static_branch_enable(&page_owner_inited);
0092     init_early_allocated_pages();
0093 }
0094 
0095 struct page_ext_operations page_owner_ops = {
0096     .size = sizeof(struct page_owner),
0097     .need = need_page_owner,
0098     .init = init_page_owner,
0099 };
0100 
0101 static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
0102 {
0103     return (void *)page_ext + page_owner_ops.offset;
0104 }
0105 
0106 void __reset_page_owner(struct page *page, unsigned int order)
0107 {
0108     int i;
0109     struct page_ext *page_ext;
0110 
0111     for (i = 0; i < (1 << order); i++) {
0112         page_ext = lookup_page_ext(page + i);
0113         if (unlikely(!page_ext))
0114             continue;
0115         __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
0116     }
0117 }
0118 
0119 static inline bool check_recursive_alloc(struct stack_trace *trace,
0120                     unsigned long ip)
0121 {
0122     int i, count;
0123 
0124     if (!trace->nr_entries)
0125         return false;
0126 
0127     for (i = 0, count = 0; i < trace->nr_entries; i++) {
0128         if (trace->entries[i] == ip && ++count == 2)
0129             return true;
0130     }
0131 
0132     return false;
0133 }
0134 
0135 static noinline depot_stack_handle_t save_stack(gfp_t flags)
0136 {
0137     unsigned long entries[PAGE_OWNER_STACK_DEPTH];
0138     struct stack_trace trace = {
0139         .nr_entries = 0,
0140         .entries = entries,
0141         .max_entries = PAGE_OWNER_STACK_DEPTH,
0142         .skip = 0
0143     };
0144     depot_stack_handle_t handle;
0145 
0146     save_stack_trace(&trace);
0147     if (trace.nr_entries != 0 &&
0148         trace.entries[trace.nr_entries-1] == ULONG_MAX)
0149         trace.nr_entries--;
0150 
0151     /*
0152      * We need to check recursion here because our request to stackdepot
0153      * could trigger memory allocation to save new entry. New memory
0154      * allocation would reach here and call depot_save_stack() again
0155      * if we don't catch it. There is still not enough memory in stackdepot
0156      * so it would try to allocate memory again and loop forever.
0157      */
0158     if (check_recursive_alloc(&trace, _RET_IP_))
0159         return dummy_handle;
0160 
0161     handle = depot_save_stack(&trace, flags);
0162     if (!handle)
0163         handle = failure_handle;
0164 
0165     return handle;
0166 }
0167 
0168 noinline void __set_page_owner(struct page *page, unsigned int order,
0169                     gfp_t gfp_mask)
0170 {
0171     struct page_ext *page_ext = lookup_page_ext(page);
0172     struct page_owner *page_owner;
0173 
0174     if (unlikely(!page_ext))
0175         return;
0176 
0177     page_owner = get_page_owner(page_ext);
0178     page_owner->handle = save_stack(gfp_mask);
0179     page_owner->order = order;
0180     page_owner->gfp_mask = gfp_mask;
0181     page_owner->last_migrate_reason = -1;
0182 
0183     __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
0184 }
0185 
0186 void __set_page_owner_migrate_reason(struct page *page, int reason)
0187 {
0188     struct page_ext *page_ext = lookup_page_ext(page);
0189     struct page_owner *page_owner;
0190 
0191     if (unlikely(!page_ext))
0192         return;
0193 
0194     page_owner = get_page_owner(page_ext);
0195     page_owner->last_migrate_reason = reason;
0196 }
0197 
0198 void __split_page_owner(struct page *page, unsigned int order)
0199 {
0200     int i;
0201     struct page_ext *page_ext = lookup_page_ext(page);
0202     struct page_owner *page_owner;
0203 
0204     if (unlikely(!page_ext))
0205         return;
0206 
0207     page_owner = get_page_owner(page_ext);
0208     page_owner->order = 0;
0209     for (i = 1; i < (1 << order); i++)
0210         __copy_page_owner(page, page + i);
0211 }
0212 
0213 void __copy_page_owner(struct page *oldpage, struct page *newpage)
0214 {
0215     struct page_ext *old_ext = lookup_page_ext(oldpage);
0216     struct page_ext *new_ext = lookup_page_ext(newpage);
0217     struct page_owner *old_page_owner, *new_page_owner;
0218 
0219     if (unlikely(!old_ext || !new_ext))
0220         return;
0221 
0222     old_page_owner = get_page_owner(old_ext);
0223     new_page_owner = get_page_owner(new_ext);
0224     new_page_owner->order = old_page_owner->order;
0225     new_page_owner->gfp_mask = old_page_owner->gfp_mask;
0226     new_page_owner->last_migrate_reason =
0227         old_page_owner->last_migrate_reason;
0228     new_page_owner->handle = old_page_owner->handle;
0229 
0230     /*
0231      * We don't clear the bit on the oldpage as it's going to be freed
0232      * after migration. Until then, the info can be useful in case of
0233      * a bug, and the overal stats will be off a bit only temporarily.
0234      * Also, migrate_misplaced_transhuge_page() can still fail the
0235      * migration and then we want the oldpage to retain the info. But
0236      * in that case we also don't need to explicitly clear the info from
0237      * the new page, which will be freed.
0238      */
0239     __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
0240 }
0241 
0242 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
0243                        pg_data_t *pgdat, struct zone *zone)
0244 {
0245     struct page *page;
0246     struct page_ext *page_ext;
0247     struct page_owner *page_owner;
0248     unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
0249     unsigned long end_pfn = pfn + zone->spanned_pages;
0250     unsigned long count[MIGRATE_TYPES] = { 0, };
0251     int pageblock_mt, page_mt;
0252     int i;
0253 
0254     /* Scan block by block. First and last block may be incomplete */
0255     pfn = zone->zone_start_pfn;
0256 
0257     /*
0258      * Walk the zone in pageblock_nr_pages steps. If a page block spans
0259      * a zone boundary, it will be double counted between zones. This does
0260      * not matter as the mixed block count will still be correct
0261      */
0262     for (; pfn < end_pfn; ) {
0263         if (!pfn_valid(pfn)) {
0264             pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
0265             continue;
0266         }
0267 
0268         block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
0269         block_end_pfn = min(block_end_pfn, end_pfn);
0270 
0271         page = pfn_to_page(pfn);
0272         pageblock_mt = get_pageblock_migratetype(page);
0273 
0274         for (; pfn < block_end_pfn; pfn++) {
0275             if (!pfn_valid_within(pfn))
0276                 continue;
0277 
0278             page = pfn_to_page(pfn);
0279 
0280             if (page_zone(page) != zone)
0281                 continue;
0282 
0283             if (PageBuddy(page)) {
0284                 pfn += (1UL << page_order(page)) - 1;
0285                 continue;
0286             }
0287 
0288             if (PageReserved(page))
0289                 continue;
0290 
0291             page_ext = lookup_page_ext(page);
0292             if (unlikely(!page_ext))
0293                 continue;
0294 
0295             if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
0296                 continue;
0297 
0298             page_owner = get_page_owner(page_ext);
0299             page_mt = gfpflags_to_migratetype(
0300                     page_owner->gfp_mask);
0301             if (pageblock_mt != page_mt) {
0302                 if (is_migrate_cma(pageblock_mt))
0303                     count[MIGRATE_MOVABLE]++;
0304                 else
0305                     count[pageblock_mt]++;
0306 
0307                 pfn = block_end_pfn;
0308                 break;
0309             }
0310             pfn += (1UL << page_owner->order) - 1;
0311         }
0312     }
0313 
0314     /* Print counts */
0315     seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
0316     for (i = 0; i < MIGRATE_TYPES; i++)
0317         seq_printf(m, "%12lu ", count[i]);
0318     seq_putc(m, '\n');
0319 }
0320 
0321 static ssize_t
0322 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
0323         struct page *page, struct page_owner *page_owner,
0324         depot_stack_handle_t handle)
0325 {
0326     int ret;
0327     int pageblock_mt, page_mt;
0328     char *kbuf;
0329     unsigned long entries[PAGE_OWNER_STACK_DEPTH];
0330     struct stack_trace trace = {
0331         .nr_entries = 0,
0332         .entries = entries,
0333         .max_entries = PAGE_OWNER_STACK_DEPTH,
0334         .skip = 0
0335     };
0336 
0337     kbuf = kmalloc(count, GFP_KERNEL);
0338     if (!kbuf)
0339         return -ENOMEM;
0340 
0341     ret = snprintf(kbuf, count,
0342             "Page allocated via order %u, mask %#x(%pGg)\n",
0343             page_owner->order, page_owner->gfp_mask,
0344             &page_owner->gfp_mask);
0345 
0346     if (ret >= count)
0347         goto err;
0348 
0349     /* Print information relevant to grouping pages by mobility */
0350     pageblock_mt = get_pageblock_migratetype(page);
0351     page_mt  = gfpflags_to_migratetype(page_owner->gfp_mask);
0352     ret += snprintf(kbuf + ret, count - ret,
0353             "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
0354             pfn,
0355             migratetype_names[page_mt],
0356             pfn >> pageblock_order,
0357             migratetype_names[pageblock_mt],
0358             page->flags, &page->flags);
0359 
0360     if (ret >= count)
0361         goto err;
0362 
0363     depot_fetch_stack(handle, &trace);
0364     ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
0365     if (ret >= count)
0366         goto err;
0367 
0368     if (page_owner->last_migrate_reason != -1) {
0369         ret += snprintf(kbuf + ret, count - ret,
0370             "Page has been migrated, last migrate reason: %s\n",
0371             migrate_reason_names[page_owner->last_migrate_reason]);
0372         if (ret >= count)
0373             goto err;
0374     }
0375 
0376     ret += snprintf(kbuf + ret, count - ret, "\n");
0377     if (ret >= count)
0378         goto err;
0379 
0380     if (copy_to_user(buf, kbuf, ret))
0381         ret = -EFAULT;
0382 
0383     kfree(kbuf);
0384     return ret;
0385 
0386 err:
0387     kfree(kbuf);
0388     return -ENOMEM;
0389 }
0390 
0391 void __dump_page_owner(struct page *page)
0392 {
0393     struct page_ext *page_ext = lookup_page_ext(page);
0394     struct page_owner *page_owner;
0395     unsigned long entries[PAGE_OWNER_STACK_DEPTH];
0396     struct stack_trace trace = {
0397         .nr_entries = 0,
0398         .entries = entries,
0399         .max_entries = PAGE_OWNER_STACK_DEPTH,
0400         .skip = 0
0401     };
0402     depot_stack_handle_t handle;
0403     gfp_t gfp_mask;
0404     int mt;
0405 
0406     if (unlikely(!page_ext)) {
0407         pr_alert("There is not page extension available.\n");
0408         return;
0409     }
0410 
0411     page_owner = get_page_owner(page_ext);
0412     gfp_mask = page_owner->gfp_mask;
0413     mt = gfpflags_to_migratetype(gfp_mask);
0414 
0415     if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
0416         pr_alert("page_owner info is not active (free page?)\n");
0417         return;
0418     }
0419 
0420     handle = READ_ONCE(page_owner->handle);
0421     if (!handle) {
0422         pr_alert("page_owner info is not active (free page?)\n");
0423         return;
0424     }
0425 
0426     depot_fetch_stack(handle, &trace);
0427     pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
0428          page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
0429     print_stack_trace(&trace, 0);
0430 
0431     if (page_owner->last_migrate_reason != -1)
0432         pr_alert("page has been migrated, last migrate reason: %s\n",
0433             migrate_reason_names[page_owner->last_migrate_reason]);
0434 }
0435 
0436 static ssize_t
0437 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
0438 {
0439     unsigned long pfn;
0440     struct page *page;
0441     struct page_ext *page_ext;
0442     struct page_owner *page_owner;
0443     depot_stack_handle_t handle;
0444 
0445     if (!static_branch_unlikely(&page_owner_inited))
0446         return -EINVAL;
0447 
0448     page = NULL;
0449     pfn = min_low_pfn + *ppos;
0450 
0451     /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
0452     while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
0453         pfn++;
0454 
0455     drain_all_pages(NULL);
0456 
0457     /* Find an allocated page */
0458     for (; pfn < max_pfn; pfn++) {
0459         /*
0460          * If the new page is in a new MAX_ORDER_NR_PAGES area,
0461          * validate the area as existing, skip it if not
0462          */
0463         if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
0464             pfn += MAX_ORDER_NR_PAGES - 1;
0465             continue;
0466         }
0467 
0468         /* Check for holes within a MAX_ORDER area */
0469         if (!pfn_valid_within(pfn))
0470             continue;
0471 
0472         page = pfn_to_page(pfn);
0473         if (PageBuddy(page)) {
0474             unsigned long freepage_order = page_order_unsafe(page);
0475 
0476             if (freepage_order < MAX_ORDER)
0477                 pfn += (1UL << freepage_order) - 1;
0478             continue;
0479         }
0480 
0481         page_ext = lookup_page_ext(page);
0482         if (unlikely(!page_ext))
0483             continue;
0484 
0485         /*
0486          * Some pages could be missed by concurrent allocation or free,
0487          * because we don't hold the zone lock.
0488          */
0489         if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
0490             continue;
0491 
0492         page_owner = get_page_owner(page_ext);
0493 
0494         /*
0495          * Access to page_ext->handle isn't synchronous so we should
0496          * be careful to access it.
0497          */
0498         handle = READ_ONCE(page_owner->handle);
0499         if (!handle)
0500             continue;
0501 
0502         /* Record the next PFN to read in the file offset */
0503         *ppos = (pfn - min_low_pfn) + 1;
0504 
0505         return print_page_owner(buf, count, pfn, page,
0506                 page_owner, handle);
0507     }
0508 
0509     return 0;
0510 }
0511 
0512 static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
0513 {
0514     struct page *page;
0515     struct page_ext *page_ext;
0516     unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
0517     unsigned long end_pfn = pfn + zone->spanned_pages;
0518     unsigned long count = 0;
0519 
0520     /* Scan block by block. First and last block may be incomplete */
0521     pfn = zone->zone_start_pfn;
0522 
0523     /*
0524      * Walk the zone in pageblock_nr_pages steps. If a page block spans
0525      * a zone boundary, it will be double counted between zones. This does
0526      * not matter as the mixed block count will still be correct
0527      */
0528     for (; pfn < end_pfn; ) {
0529         if (!pfn_valid(pfn)) {
0530             pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
0531             continue;
0532         }
0533 
0534         block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
0535         block_end_pfn = min(block_end_pfn, end_pfn);
0536 
0537         page = pfn_to_page(pfn);
0538 
0539         for (; pfn < block_end_pfn; pfn++) {
0540             if (!pfn_valid_within(pfn))
0541                 continue;
0542 
0543             page = pfn_to_page(pfn);
0544 
0545             if (page_zone(page) != zone)
0546                 continue;
0547 
0548             /*
0549              * We are safe to check buddy flag and order, because
0550              * this is init stage and only single thread runs.
0551              */
0552             if (PageBuddy(page)) {
0553                 pfn += (1UL << page_order(page)) - 1;
0554                 continue;
0555             }
0556 
0557             if (PageReserved(page))
0558                 continue;
0559 
0560             page_ext = lookup_page_ext(page);
0561             if (unlikely(!page_ext))
0562                 continue;
0563 
0564             /* Maybe overraping zone */
0565             if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
0566                 continue;
0567 
0568             /* Found early allocated page */
0569             set_page_owner(page, 0, 0);
0570             count++;
0571         }
0572     }
0573 
0574     pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
0575         pgdat->node_id, zone->name, count);
0576 }
0577 
0578 static void init_zones_in_node(pg_data_t *pgdat)
0579 {
0580     struct zone *zone;
0581     struct zone *node_zones = pgdat->node_zones;
0582     unsigned long flags;
0583 
0584     for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
0585         if (!populated_zone(zone))
0586             continue;
0587 
0588         spin_lock_irqsave(&zone->lock, flags);
0589         init_pages_in_zone(pgdat, zone);
0590         spin_unlock_irqrestore(&zone->lock, flags);
0591     }
0592 }
0593 
0594 static void init_early_allocated_pages(void)
0595 {
0596     pg_data_t *pgdat;
0597 
0598     drain_all_pages(NULL);
0599     for_each_online_pgdat(pgdat)
0600         init_zones_in_node(pgdat);
0601 }
0602 
0603 static const struct file_operations proc_page_owner_operations = {
0604     .read       = read_page_owner,
0605 };
0606 
0607 static int __init pageowner_init(void)
0608 {
0609     struct dentry *dentry;
0610 
0611     if (!static_branch_unlikely(&page_owner_inited)) {
0612         pr_info("page_owner is disabled\n");
0613         return 0;
0614     }
0615 
0616     dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
0617             NULL, &proc_page_owner_operations);
0618     if (IS_ERR(dentry))
0619         return PTR_ERR(dentry);
0620 
0621     return 0;
0622 }
0623 late_initcall(pageowner_init)