0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/fs.h>
0014 #include <linux/mm.h>
0015 #include <linux/err.h>
0016 #include <linux/module.h>
0017 #include <linux/slab.h>
0018 #include <linux/cpu.h>
0019 #include <linux/cpumask.h>
0020 #include <linux/vmstat.h>
0021 #include <linux/proc_fs.h>
0022 #include <linux/seq_file.h>
0023 #include <linux/debugfs.h>
0024 #include <linux/sched.h>
0025 #include <linux/math64.h>
0026 #include <linux/writeback.h>
0027 #include <linux/compaction.h>
0028 #include <linux/mm_inline.h>
0029 #include <linux/page_ext.h>
0030 #include <linux/page_owner.h>
0031 #include <linux/migrate.h>
0032
0033 #include "internal.h"
0034
0035 #ifdef CONFIG_NUMA
0036 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
0037
0038
0039 static void zero_zone_numa_counters(struct zone *zone)
0040 {
0041 int item, cpu;
0042
0043 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
0044 atomic_long_set(&zone->vm_numa_event[item], 0);
0045 for_each_online_cpu(cpu) {
0046 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
0047 = 0;
0048 }
0049 }
0050 }
0051
0052
0053 static void zero_zones_numa_counters(void)
0054 {
0055 struct zone *zone;
0056
0057 for_each_populated_zone(zone)
0058 zero_zone_numa_counters(zone);
0059 }
0060
0061
0062 static void zero_global_numa_counters(void)
0063 {
0064 int item;
0065
0066 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
0067 atomic_long_set(&vm_numa_event[item], 0);
0068 }
0069
0070 static void invalid_numa_statistics(void)
0071 {
0072 zero_zones_numa_counters();
0073 zero_global_numa_counters();
0074 }
0075
0076 static DEFINE_MUTEX(vm_numa_stat_lock);
0077
0078 int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
0079 void *buffer, size_t *length, loff_t *ppos)
0080 {
0081 int ret, oldval;
0082
0083 mutex_lock(&vm_numa_stat_lock);
0084 if (write)
0085 oldval = sysctl_vm_numa_stat;
0086 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
0087 if (ret || !write)
0088 goto out;
0089
0090 if (oldval == sysctl_vm_numa_stat)
0091 goto out;
0092 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
0093 static_branch_enable(&vm_numa_stat_key);
0094 pr_info("enable numa statistics\n");
0095 } else {
0096 static_branch_disable(&vm_numa_stat_key);
0097 invalid_numa_statistics();
0098 pr_info("disable numa statistics, and clear numa counters\n");
0099 }
0100
0101 out:
0102 mutex_unlock(&vm_numa_stat_lock);
0103 return ret;
0104 }
0105 #endif
0106
0107 #ifdef CONFIG_VM_EVENT_COUNTERS
0108 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
0109 EXPORT_PER_CPU_SYMBOL(vm_event_states);
0110
0111 static void sum_vm_events(unsigned long *ret)
0112 {
0113 int cpu;
0114 int i;
0115
0116 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
0117
0118 for_each_online_cpu(cpu) {
0119 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
0120
0121 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
0122 ret[i] += this->event[i];
0123 }
0124 }
0125
0126
0127
0128
0129
0130
0131 void all_vm_events(unsigned long *ret)
0132 {
0133 cpus_read_lock();
0134 sum_vm_events(ret);
0135 cpus_read_unlock();
0136 }
0137 EXPORT_SYMBOL_GPL(all_vm_events);
0138
0139
0140
0141
0142
0143
0144
0145 void vm_events_fold_cpu(int cpu)
0146 {
0147 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
0148 int i;
0149
0150 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
0151 count_vm_events(i, fold_state->event[i]);
0152 fold_state->event[i] = 0;
0153 }
0154 }
0155
0156 #endif
0157
0158
0159
0160
0161
0162
0163 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
0164 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
0165 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
0166 EXPORT_SYMBOL(vm_zone_stat);
0167 EXPORT_SYMBOL(vm_node_stat);
0168
0169 #ifdef CONFIG_NUMA
0170 static void fold_vm_zone_numa_events(struct zone *zone)
0171 {
0172 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
0173 int cpu;
0174 enum numa_stat_item item;
0175
0176 for_each_online_cpu(cpu) {
0177 struct per_cpu_zonestat *pzstats;
0178
0179 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
0180 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
0181 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
0182 }
0183
0184 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
0185 zone_numa_event_add(zone_numa_events[item], zone, item);
0186 }
0187
0188 void fold_vm_numa_events(void)
0189 {
0190 struct zone *zone;
0191
0192 for_each_populated_zone(zone)
0193 fold_vm_zone_numa_events(zone);
0194 }
0195 #endif
0196
0197 #ifdef CONFIG_SMP
0198
0199 int calculate_pressure_threshold(struct zone *zone)
0200 {
0201 int threshold;
0202 int watermark_distance;
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
0213 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
0214
0215
0216
0217
0218 threshold = min(125, threshold);
0219
0220 return threshold;
0221 }
0222
0223 int calculate_normal_threshold(struct zone *zone)
0224 {
0225 int threshold;
0226 int mem;
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
0259
0260 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
0261
0262
0263
0264
0265 threshold = min(125, threshold);
0266
0267 return threshold;
0268 }
0269
0270
0271
0272
0273 void refresh_zone_stat_thresholds(void)
0274 {
0275 struct pglist_data *pgdat;
0276 struct zone *zone;
0277 int cpu;
0278 int threshold;
0279
0280
0281 for_each_online_pgdat(pgdat) {
0282 for_each_online_cpu(cpu) {
0283 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
0284 }
0285 }
0286
0287 for_each_populated_zone(zone) {
0288 struct pglist_data *pgdat = zone->zone_pgdat;
0289 unsigned long max_drift, tolerate_drift;
0290
0291 threshold = calculate_normal_threshold(zone);
0292
0293 for_each_online_cpu(cpu) {
0294 int pgdat_threshold;
0295
0296 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
0297 = threshold;
0298
0299
0300 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
0301 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
0302 = max(threshold, pgdat_threshold);
0303 }
0304
0305
0306
0307
0308
0309
0310 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
0311 max_drift = num_online_cpus() * threshold;
0312 if (max_drift > tolerate_drift)
0313 zone->percpu_drift_mark = high_wmark_pages(zone) +
0314 max_drift;
0315 }
0316 }
0317
0318 void set_pgdat_percpu_threshold(pg_data_t *pgdat,
0319 int (*calculate_pressure)(struct zone *))
0320 {
0321 struct zone *zone;
0322 int cpu;
0323 int threshold;
0324 int i;
0325
0326 for (i = 0; i < pgdat->nr_zones; i++) {
0327 zone = &pgdat->node_zones[i];
0328 if (!zone->percpu_drift_mark)
0329 continue;
0330
0331 threshold = (*calculate_pressure)(zone);
0332 for_each_online_cpu(cpu)
0333 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
0334 = threshold;
0335 }
0336 }
0337
0338
0339
0340
0341
0342
0343 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
0344 long delta)
0345 {
0346 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
0347 s8 __percpu *p = pcp->vm_stat_diff + item;
0348 long x;
0349 long t;
0350
0351
0352
0353
0354
0355
0356
0357
0358 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0359 preempt_disable();
0360
0361 x = delta + __this_cpu_read(*p);
0362
0363 t = __this_cpu_read(pcp->stat_threshold);
0364
0365 if (unlikely(abs(x) > t)) {
0366 zone_page_state_add(x, zone, item);
0367 x = 0;
0368 }
0369 __this_cpu_write(*p, x);
0370
0371 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0372 preempt_enable();
0373 }
0374 EXPORT_SYMBOL(__mod_zone_page_state);
0375
0376 void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
0377 long delta)
0378 {
0379 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
0380 s8 __percpu *p = pcp->vm_node_stat_diff + item;
0381 long x;
0382 long t;
0383
0384 if (vmstat_item_in_bytes(item)) {
0385
0386
0387
0388
0389
0390
0391 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
0392 delta >>= PAGE_SHIFT;
0393 }
0394
0395
0396 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0397 preempt_disable();
0398
0399 x = delta + __this_cpu_read(*p);
0400
0401 t = __this_cpu_read(pcp->stat_threshold);
0402
0403 if (unlikely(abs(x) > t)) {
0404 node_page_state_add(x, pgdat, item);
0405 x = 0;
0406 }
0407 __this_cpu_write(*p, x);
0408
0409 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0410 preempt_enable();
0411 }
0412 EXPORT_SYMBOL(__mod_node_page_state);
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
0438 {
0439 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
0440 s8 __percpu *p = pcp->vm_stat_diff + item;
0441 s8 v, t;
0442
0443
0444 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0445 preempt_disable();
0446
0447 v = __this_cpu_inc_return(*p);
0448 t = __this_cpu_read(pcp->stat_threshold);
0449 if (unlikely(v > t)) {
0450 s8 overstep = t >> 1;
0451
0452 zone_page_state_add(v + overstep, zone, item);
0453 __this_cpu_write(*p, -overstep);
0454 }
0455
0456 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0457 preempt_enable();
0458 }
0459
0460 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
0461 {
0462 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
0463 s8 __percpu *p = pcp->vm_node_stat_diff + item;
0464 s8 v, t;
0465
0466 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
0467
0468
0469 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0470 preempt_disable();
0471
0472 v = __this_cpu_inc_return(*p);
0473 t = __this_cpu_read(pcp->stat_threshold);
0474 if (unlikely(v > t)) {
0475 s8 overstep = t >> 1;
0476
0477 node_page_state_add(v + overstep, pgdat, item);
0478 __this_cpu_write(*p, -overstep);
0479 }
0480
0481 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0482 preempt_enable();
0483 }
0484
0485 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
0486 {
0487 __inc_zone_state(page_zone(page), item);
0488 }
0489 EXPORT_SYMBOL(__inc_zone_page_state);
0490
0491 void __inc_node_page_state(struct page *page, enum node_stat_item item)
0492 {
0493 __inc_node_state(page_pgdat(page), item);
0494 }
0495 EXPORT_SYMBOL(__inc_node_page_state);
0496
0497 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
0498 {
0499 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
0500 s8 __percpu *p = pcp->vm_stat_diff + item;
0501 s8 v, t;
0502
0503
0504 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0505 preempt_disable();
0506
0507 v = __this_cpu_dec_return(*p);
0508 t = __this_cpu_read(pcp->stat_threshold);
0509 if (unlikely(v < - t)) {
0510 s8 overstep = t >> 1;
0511
0512 zone_page_state_add(v - overstep, zone, item);
0513 __this_cpu_write(*p, overstep);
0514 }
0515
0516 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0517 preempt_enable();
0518 }
0519
0520 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
0521 {
0522 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
0523 s8 __percpu *p = pcp->vm_node_stat_diff + item;
0524 s8 v, t;
0525
0526 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
0527
0528
0529 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0530 preempt_disable();
0531
0532 v = __this_cpu_dec_return(*p);
0533 t = __this_cpu_read(pcp->stat_threshold);
0534 if (unlikely(v < - t)) {
0535 s8 overstep = t >> 1;
0536
0537 node_page_state_add(v - overstep, pgdat, item);
0538 __this_cpu_write(*p, overstep);
0539 }
0540
0541 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0542 preempt_enable();
0543 }
0544
0545 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
0546 {
0547 __dec_zone_state(page_zone(page), item);
0548 }
0549 EXPORT_SYMBOL(__dec_zone_page_state);
0550
0551 void __dec_node_page_state(struct page *page, enum node_stat_item item)
0552 {
0553 __dec_node_state(page_pgdat(page), item);
0554 }
0555 EXPORT_SYMBOL(__dec_node_page_state);
0556
0557 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570 static inline void mod_zone_state(struct zone *zone,
0571 enum zone_stat_item item, long delta, int overstep_mode)
0572 {
0573 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
0574 s8 __percpu *p = pcp->vm_stat_diff + item;
0575 long o, n, t, z;
0576
0577 do {
0578 z = 0;
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590 t = this_cpu_read(pcp->stat_threshold);
0591
0592 o = this_cpu_read(*p);
0593 n = delta + o;
0594
0595 if (abs(n) > t) {
0596 int os = overstep_mode * (t >> 1) ;
0597
0598
0599 z = n + os;
0600 n = -os;
0601 }
0602 } while (this_cpu_cmpxchg(*p, o, n) != o);
0603
0604 if (z)
0605 zone_page_state_add(z, zone, item);
0606 }
0607
0608 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
0609 long delta)
0610 {
0611 mod_zone_state(zone, item, delta, 0);
0612 }
0613 EXPORT_SYMBOL(mod_zone_page_state);
0614
0615 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
0616 {
0617 mod_zone_state(page_zone(page), item, 1, 1);
0618 }
0619 EXPORT_SYMBOL(inc_zone_page_state);
0620
0621 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
0622 {
0623 mod_zone_state(page_zone(page), item, -1, -1);
0624 }
0625 EXPORT_SYMBOL(dec_zone_page_state);
0626
0627 static inline void mod_node_state(struct pglist_data *pgdat,
0628 enum node_stat_item item, int delta, int overstep_mode)
0629 {
0630 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
0631 s8 __percpu *p = pcp->vm_node_stat_diff + item;
0632 long o, n, t, z;
0633
0634 if (vmstat_item_in_bytes(item)) {
0635
0636
0637
0638
0639
0640
0641 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
0642 delta >>= PAGE_SHIFT;
0643 }
0644
0645 do {
0646 z = 0;
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658 t = this_cpu_read(pcp->stat_threshold);
0659
0660 o = this_cpu_read(*p);
0661 n = delta + o;
0662
0663 if (abs(n) > t) {
0664 int os = overstep_mode * (t >> 1) ;
0665
0666
0667 z = n + os;
0668 n = -os;
0669 }
0670 } while (this_cpu_cmpxchg(*p, o, n) != o);
0671
0672 if (z)
0673 node_page_state_add(z, pgdat, item);
0674 }
0675
0676 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
0677 long delta)
0678 {
0679 mod_node_state(pgdat, item, delta, 0);
0680 }
0681 EXPORT_SYMBOL(mod_node_page_state);
0682
0683 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
0684 {
0685 mod_node_state(pgdat, item, 1, 1);
0686 }
0687
0688 void inc_node_page_state(struct page *page, enum node_stat_item item)
0689 {
0690 mod_node_state(page_pgdat(page), item, 1, 1);
0691 }
0692 EXPORT_SYMBOL(inc_node_page_state);
0693
0694 void dec_node_page_state(struct page *page, enum node_stat_item item)
0695 {
0696 mod_node_state(page_pgdat(page), item, -1, -1);
0697 }
0698 EXPORT_SYMBOL(dec_node_page_state);
0699 #else
0700
0701
0702
0703 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
0704 long delta)
0705 {
0706 unsigned long flags;
0707
0708 local_irq_save(flags);
0709 __mod_zone_page_state(zone, item, delta);
0710 local_irq_restore(flags);
0711 }
0712 EXPORT_SYMBOL(mod_zone_page_state);
0713
0714 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
0715 {
0716 unsigned long flags;
0717 struct zone *zone;
0718
0719 zone = page_zone(page);
0720 local_irq_save(flags);
0721 __inc_zone_state(zone, item);
0722 local_irq_restore(flags);
0723 }
0724 EXPORT_SYMBOL(inc_zone_page_state);
0725
0726 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
0727 {
0728 unsigned long flags;
0729
0730 local_irq_save(flags);
0731 __dec_zone_page_state(page, item);
0732 local_irq_restore(flags);
0733 }
0734 EXPORT_SYMBOL(dec_zone_page_state);
0735
0736 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
0737 {
0738 unsigned long flags;
0739
0740 local_irq_save(flags);
0741 __inc_node_state(pgdat, item);
0742 local_irq_restore(flags);
0743 }
0744 EXPORT_SYMBOL(inc_node_state);
0745
0746 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
0747 long delta)
0748 {
0749 unsigned long flags;
0750
0751 local_irq_save(flags);
0752 __mod_node_page_state(pgdat, item, delta);
0753 local_irq_restore(flags);
0754 }
0755 EXPORT_SYMBOL(mod_node_page_state);
0756
0757 void inc_node_page_state(struct page *page, enum node_stat_item item)
0758 {
0759 unsigned long flags;
0760 struct pglist_data *pgdat;
0761
0762 pgdat = page_pgdat(page);
0763 local_irq_save(flags);
0764 __inc_node_state(pgdat, item);
0765 local_irq_restore(flags);
0766 }
0767 EXPORT_SYMBOL(inc_node_page_state);
0768
0769 void dec_node_page_state(struct page *page, enum node_stat_item item)
0770 {
0771 unsigned long flags;
0772
0773 local_irq_save(flags);
0774 __dec_node_page_state(page, item);
0775 local_irq_restore(flags);
0776 }
0777 EXPORT_SYMBOL(dec_node_page_state);
0778 #endif
0779
0780
0781
0782
0783
0784 static int fold_diff(int *zone_diff, int *node_diff)
0785 {
0786 int i;
0787 int changes = 0;
0788
0789 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
0790 if (zone_diff[i]) {
0791 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
0792 changes++;
0793 }
0794
0795 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
0796 if (node_diff[i]) {
0797 atomic_long_add(node_diff[i], &vm_node_stat[i]);
0798 changes++;
0799 }
0800 return changes;
0801 }
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819 static int refresh_cpu_vm_stats(bool do_pagesets)
0820 {
0821 struct pglist_data *pgdat;
0822 struct zone *zone;
0823 int i;
0824 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
0825 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
0826 int changes = 0;
0827
0828 for_each_populated_zone(zone) {
0829 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
0830 #ifdef CONFIG_NUMA
0831 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
0832 #endif
0833
0834 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
0835 int v;
0836
0837 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
0838 if (v) {
0839
0840 atomic_long_add(v, &zone->vm_stat[i]);
0841 global_zone_diff[i] += v;
0842 #ifdef CONFIG_NUMA
0843
0844 __this_cpu_write(pcp->expire, 3);
0845 #endif
0846 }
0847 }
0848 #ifdef CONFIG_NUMA
0849
0850 if (do_pagesets) {
0851 cond_resched();
0852
0853
0854
0855
0856
0857
0858
0859 if (!__this_cpu_read(pcp->expire) ||
0860 !__this_cpu_read(pcp->count))
0861 continue;
0862
0863
0864
0865
0866 if (zone_to_nid(zone) == numa_node_id()) {
0867 __this_cpu_write(pcp->expire, 0);
0868 continue;
0869 }
0870
0871 if (__this_cpu_dec_return(pcp->expire))
0872 continue;
0873
0874 if (__this_cpu_read(pcp->count)) {
0875 drain_zone_pages(zone, this_cpu_ptr(pcp));
0876 changes++;
0877 }
0878 }
0879 #endif
0880 }
0881
0882 for_each_online_pgdat(pgdat) {
0883 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
0884
0885 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
0886 int v;
0887
0888 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
0889 if (v) {
0890 atomic_long_add(v, &pgdat->vm_stat[i]);
0891 global_node_diff[i] += v;
0892 }
0893 }
0894 }
0895
0896 changes += fold_diff(global_zone_diff, global_node_diff);
0897 return changes;
0898 }
0899
0900
0901
0902
0903
0904
0905 void cpu_vm_stats_fold(int cpu)
0906 {
0907 struct pglist_data *pgdat;
0908 struct zone *zone;
0909 int i;
0910 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
0911 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
0912
0913 for_each_populated_zone(zone) {
0914 struct per_cpu_zonestat *pzstats;
0915
0916 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
0917
0918 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
0919 if (pzstats->vm_stat_diff[i]) {
0920 int v;
0921
0922 v = pzstats->vm_stat_diff[i];
0923 pzstats->vm_stat_diff[i] = 0;
0924 atomic_long_add(v, &zone->vm_stat[i]);
0925 global_zone_diff[i] += v;
0926 }
0927 }
0928 #ifdef CONFIG_NUMA
0929 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
0930 if (pzstats->vm_numa_event[i]) {
0931 unsigned long v;
0932
0933 v = pzstats->vm_numa_event[i];
0934 pzstats->vm_numa_event[i] = 0;
0935 zone_numa_event_add(v, zone, i);
0936 }
0937 }
0938 #endif
0939 }
0940
0941 for_each_online_pgdat(pgdat) {
0942 struct per_cpu_nodestat *p;
0943
0944 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
0945
0946 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
0947 if (p->vm_node_stat_diff[i]) {
0948 int v;
0949
0950 v = p->vm_node_stat_diff[i];
0951 p->vm_node_stat_diff[i] = 0;
0952 atomic_long_add(v, &pgdat->vm_stat[i]);
0953 global_node_diff[i] += v;
0954 }
0955 }
0956
0957 fold_diff(global_zone_diff, global_node_diff);
0958 }
0959
0960
0961
0962
0963
0964 void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
0965 {
0966 unsigned long v;
0967 int i;
0968
0969 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
0970 if (pzstats->vm_stat_diff[i]) {
0971 v = pzstats->vm_stat_diff[i];
0972 pzstats->vm_stat_diff[i] = 0;
0973 zone_page_state_add(v, zone, i);
0974 }
0975 }
0976
0977 #ifdef CONFIG_NUMA
0978 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
0979 if (pzstats->vm_numa_event[i]) {
0980 v = pzstats->vm_numa_event[i];
0981 pzstats->vm_numa_event[i] = 0;
0982 zone_numa_event_add(v, zone, i);
0983 }
0984 }
0985 #endif
0986 }
0987 #endif
0988
0989 #ifdef CONFIG_NUMA
0990
0991
0992
0993
0994
0995 unsigned long sum_zone_node_page_state(int node,
0996 enum zone_stat_item item)
0997 {
0998 struct zone *zones = NODE_DATA(node)->node_zones;
0999 int i;
1000 unsigned long count = 0;
1001
1002 for (i = 0; i < MAX_NR_ZONES; i++)
1003 count += zone_page_state(zones + i, item);
1004
1005 return count;
1006 }
1007
1008
1009 unsigned long sum_zone_numa_event_state(int node,
1010 enum numa_stat_item item)
1011 {
1012 struct zone *zones = NODE_DATA(node)->node_zones;
1013 unsigned long count = 0;
1014 int i;
1015
1016 for (i = 0; i < MAX_NR_ZONES; i++)
1017 count += zone_numa_event_state(zones + i, item);
1018
1019 return count;
1020 }
1021
1022
1023
1024
1025 unsigned long node_page_state_pages(struct pglist_data *pgdat,
1026 enum node_stat_item item)
1027 {
1028 long x = atomic_long_read(&pgdat->vm_stat[item]);
1029 #ifdef CONFIG_SMP
1030 if (x < 0)
1031 x = 0;
1032 #endif
1033 return x;
1034 }
1035
1036 unsigned long node_page_state(struct pglist_data *pgdat,
1037 enum node_stat_item item)
1038 {
1039 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1040
1041 return node_page_state_pages(pgdat, item);
1042 }
1043 #endif
1044
1045 #ifdef CONFIG_COMPACTION
1046
1047 struct contig_page_info {
1048 unsigned long free_pages;
1049 unsigned long free_blocks_total;
1050 unsigned long free_blocks_suitable;
1051 };
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061 static void fill_contig_page_info(struct zone *zone,
1062 unsigned int suitable_order,
1063 struct contig_page_info *info)
1064 {
1065 unsigned int order;
1066
1067 info->free_pages = 0;
1068 info->free_blocks_total = 0;
1069 info->free_blocks_suitable = 0;
1070
1071 for (order = 0; order < MAX_ORDER; order++) {
1072 unsigned long blocks;
1073
1074
1075
1076
1077
1078
1079
1080 blocks = data_race(zone->free_area[order].nr_free);
1081 info->free_blocks_total += blocks;
1082
1083
1084 info->free_pages += blocks << order;
1085
1086
1087 if (order >= suitable_order)
1088 info->free_blocks_suitable += blocks <<
1089 (order - suitable_order);
1090 }
1091 }
1092
1093
1094
1095
1096
1097
1098
1099
1100 static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1101 {
1102 unsigned long requested = 1UL << order;
1103
1104 if (WARN_ON_ONCE(order >= MAX_ORDER))
1105 return 0;
1106
1107 if (!info->free_blocks_total)
1108 return 0;
1109
1110
1111 if (info->free_blocks_suitable)
1112 return -1000;
1113
1114
1115
1116
1117
1118
1119
1120 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1121 }
1122
1123
1124
1125
1126
1127
1128 unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1129 {
1130 struct contig_page_info info;
1131
1132 fill_contig_page_info(zone, order, &info);
1133 if (info.free_pages == 0)
1134 return 0;
1135
1136 return div_u64((info.free_pages -
1137 (info.free_blocks_suitable << order)) * 100,
1138 info.free_pages);
1139 }
1140
1141
1142 int fragmentation_index(struct zone *zone, unsigned int order)
1143 {
1144 struct contig_page_info info;
1145
1146 fill_contig_page_info(zone, order, &info);
1147 return __fragmentation_index(order, &info);
1148 }
1149 #endif
1150
1151 #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1152 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1153 #ifdef CONFIG_ZONE_DMA
1154 #define TEXT_FOR_DMA(xx) xx "_dma",
1155 #else
1156 #define TEXT_FOR_DMA(xx)
1157 #endif
1158
1159 #ifdef CONFIG_ZONE_DMA32
1160 #define TEXT_FOR_DMA32(xx) xx "_dma32",
1161 #else
1162 #define TEXT_FOR_DMA32(xx)
1163 #endif
1164
1165 #ifdef CONFIG_HIGHMEM
1166 #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1167 #else
1168 #define TEXT_FOR_HIGHMEM(xx)
1169 #endif
1170
1171 #ifdef CONFIG_ZONE_DEVICE
1172 #define TEXT_FOR_DEVICE(xx) xx "_device",
1173 #else
1174 #define TEXT_FOR_DEVICE(xx)
1175 #endif
1176
1177 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1178 TEXT_FOR_HIGHMEM(xx) xx "_movable", \
1179 TEXT_FOR_DEVICE(xx)
1180
1181 const char * const vmstat_text[] = {
1182
1183 "nr_free_pages",
1184 "nr_zone_inactive_anon",
1185 "nr_zone_active_anon",
1186 "nr_zone_inactive_file",
1187 "nr_zone_active_file",
1188 "nr_zone_unevictable",
1189 "nr_zone_write_pending",
1190 "nr_mlock",
1191 "nr_bounce",
1192 #if IS_ENABLED(CONFIG_ZSMALLOC)
1193 "nr_zspages",
1194 #endif
1195 "nr_free_cma",
1196
1197
1198 #ifdef CONFIG_NUMA
1199 "numa_hit",
1200 "numa_miss",
1201 "numa_foreign",
1202 "numa_interleave",
1203 "numa_local",
1204 "numa_other",
1205 #endif
1206
1207
1208 "nr_inactive_anon",
1209 "nr_active_anon",
1210 "nr_inactive_file",
1211 "nr_active_file",
1212 "nr_unevictable",
1213 "nr_slab_reclaimable",
1214 "nr_slab_unreclaimable",
1215 "nr_isolated_anon",
1216 "nr_isolated_file",
1217 "workingset_nodes",
1218 "workingset_refault_anon",
1219 "workingset_refault_file",
1220 "workingset_activate_anon",
1221 "workingset_activate_file",
1222 "workingset_restore_anon",
1223 "workingset_restore_file",
1224 "workingset_nodereclaim",
1225 "nr_anon_pages",
1226 "nr_mapped",
1227 "nr_file_pages",
1228 "nr_dirty",
1229 "nr_writeback",
1230 "nr_writeback_temp",
1231 "nr_shmem",
1232 "nr_shmem_hugepages",
1233 "nr_shmem_pmdmapped",
1234 "nr_file_hugepages",
1235 "nr_file_pmdmapped",
1236 "nr_anon_transparent_hugepages",
1237 "nr_vmscan_write",
1238 "nr_vmscan_immediate_reclaim",
1239 "nr_dirtied",
1240 "nr_written",
1241 "nr_throttled_written",
1242 "nr_kernel_misc_reclaimable",
1243 "nr_foll_pin_acquired",
1244 "nr_foll_pin_released",
1245 "nr_kernel_stack",
1246 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1247 "nr_shadow_call_stack",
1248 #endif
1249 "nr_page_table_pages",
1250 #ifdef CONFIG_SWAP
1251 "nr_swapcached",
1252 #endif
1253 #ifdef CONFIG_NUMA_BALANCING
1254 "pgpromote_success",
1255 #endif
1256
1257
1258 "nr_dirty_threshold",
1259 "nr_dirty_background_threshold",
1260
1261 #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
1262
1263 "pgpgin",
1264 "pgpgout",
1265 "pswpin",
1266 "pswpout",
1267
1268 TEXTS_FOR_ZONES("pgalloc")
1269 TEXTS_FOR_ZONES("allocstall")
1270 TEXTS_FOR_ZONES("pgskip")
1271
1272 "pgfree",
1273 "pgactivate",
1274 "pgdeactivate",
1275 "pglazyfree",
1276
1277 "pgfault",
1278 "pgmajfault",
1279 "pglazyfreed",
1280
1281 "pgrefill",
1282 "pgreuse",
1283 "pgsteal_kswapd",
1284 "pgsteal_direct",
1285 "pgdemote_kswapd",
1286 "pgdemote_direct",
1287 "pgscan_kswapd",
1288 "pgscan_direct",
1289 "pgscan_direct_throttle",
1290 "pgscan_anon",
1291 "pgscan_file",
1292 "pgsteal_anon",
1293 "pgsteal_file",
1294
1295 #ifdef CONFIG_NUMA
1296 "zone_reclaim_failed",
1297 #endif
1298 "pginodesteal",
1299 "slabs_scanned",
1300 "kswapd_inodesteal",
1301 "kswapd_low_wmark_hit_quickly",
1302 "kswapd_high_wmark_hit_quickly",
1303 "pageoutrun",
1304
1305 "pgrotated",
1306
1307 "drop_pagecache",
1308 "drop_slab",
1309 "oom_kill",
1310
1311 #ifdef CONFIG_NUMA_BALANCING
1312 "numa_pte_updates",
1313 "numa_huge_pte_updates",
1314 "numa_hint_faults",
1315 "numa_hint_faults_local",
1316 "numa_pages_migrated",
1317 #endif
1318 #ifdef CONFIG_MIGRATION
1319 "pgmigrate_success",
1320 "pgmigrate_fail",
1321 "thp_migration_success",
1322 "thp_migration_fail",
1323 "thp_migration_split",
1324 #endif
1325 #ifdef CONFIG_COMPACTION
1326 "compact_migrate_scanned",
1327 "compact_free_scanned",
1328 "compact_isolated",
1329 "compact_stall",
1330 "compact_fail",
1331 "compact_success",
1332 "compact_daemon_wake",
1333 "compact_daemon_migrate_scanned",
1334 "compact_daemon_free_scanned",
1335 #endif
1336
1337 #ifdef CONFIG_HUGETLB_PAGE
1338 "htlb_buddy_alloc_success",
1339 "htlb_buddy_alloc_fail",
1340 #endif
1341 #ifdef CONFIG_CMA
1342 "cma_alloc_success",
1343 "cma_alloc_fail",
1344 #endif
1345 "unevictable_pgs_culled",
1346 "unevictable_pgs_scanned",
1347 "unevictable_pgs_rescued",
1348 "unevictable_pgs_mlocked",
1349 "unevictable_pgs_munlocked",
1350 "unevictable_pgs_cleared",
1351 "unevictable_pgs_stranded",
1352
1353 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1354 "thp_fault_alloc",
1355 "thp_fault_fallback",
1356 "thp_fault_fallback_charge",
1357 "thp_collapse_alloc",
1358 "thp_collapse_alloc_failed",
1359 "thp_file_alloc",
1360 "thp_file_fallback",
1361 "thp_file_fallback_charge",
1362 "thp_file_mapped",
1363 "thp_split_page",
1364 "thp_split_page_failed",
1365 "thp_deferred_split_page",
1366 "thp_split_pmd",
1367 "thp_scan_exceed_none_pte",
1368 "thp_scan_exceed_swap_pte",
1369 "thp_scan_exceed_share_pte",
1370 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1371 "thp_split_pud",
1372 #endif
1373 "thp_zero_page_alloc",
1374 "thp_zero_page_alloc_failed",
1375 "thp_swpout",
1376 "thp_swpout_fallback",
1377 #endif
1378 #ifdef CONFIG_MEMORY_BALLOON
1379 "balloon_inflate",
1380 "balloon_deflate",
1381 #ifdef CONFIG_BALLOON_COMPACTION
1382 "balloon_migrate",
1383 #endif
1384 #endif
1385 #ifdef CONFIG_DEBUG_TLBFLUSH
1386 "nr_tlb_remote_flush",
1387 "nr_tlb_remote_flush_received",
1388 "nr_tlb_local_flush_all",
1389 "nr_tlb_local_flush_one",
1390 #endif
1391
1392 #ifdef CONFIG_DEBUG_VM_VMACACHE
1393 "vmacache_find_calls",
1394 "vmacache_find_hits",
1395 #endif
1396 #ifdef CONFIG_SWAP
1397 "swap_ra",
1398 "swap_ra_hit",
1399 #ifdef CONFIG_KSM
1400 "ksm_swpin_copy",
1401 #endif
1402 #endif
1403 #ifdef CONFIG_KSM
1404 "cow_ksm",
1405 #endif
1406 #ifdef CONFIG_ZSWAP
1407 "zswpin",
1408 "zswpout",
1409 #endif
1410 #ifdef CONFIG_X86
1411 "direct_map_level2_splits",
1412 "direct_map_level3_splits",
1413 #endif
1414 #endif
1415 };
1416 #endif
1417
1418 #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1419 defined(CONFIG_PROC_FS)
1420 static void *frag_start(struct seq_file *m, loff_t *pos)
1421 {
1422 pg_data_t *pgdat;
1423 loff_t node = *pos;
1424
1425 for (pgdat = first_online_pgdat();
1426 pgdat && node;
1427 pgdat = next_online_pgdat(pgdat))
1428 --node;
1429
1430 return pgdat;
1431 }
1432
1433 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1434 {
1435 pg_data_t *pgdat = (pg_data_t *)arg;
1436
1437 (*pos)++;
1438 return next_online_pgdat(pgdat);
1439 }
1440
1441 static void frag_stop(struct seq_file *m, void *arg)
1442 {
1443 }
1444
1445
1446
1447
1448
1449 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1450 bool assert_populated, bool nolock,
1451 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1452 {
1453 struct zone *zone;
1454 struct zone *node_zones = pgdat->node_zones;
1455 unsigned long flags;
1456
1457 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1458 if (assert_populated && !populated_zone(zone))
1459 continue;
1460
1461 if (!nolock)
1462 spin_lock_irqsave(&zone->lock, flags);
1463 print(m, pgdat, zone);
1464 if (!nolock)
1465 spin_unlock_irqrestore(&zone->lock, flags);
1466 }
1467 }
1468 #endif
1469
1470 #ifdef CONFIG_PROC_FS
1471 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1472 struct zone *zone)
1473 {
1474 int order;
1475
1476 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1477 for (order = 0; order < MAX_ORDER; ++order)
1478
1479
1480
1481
1482 seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free));
1483 seq_putc(m, '\n');
1484 }
1485
1486
1487
1488
1489 static int frag_show(struct seq_file *m, void *arg)
1490 {
1491 pg_data_t *pgdat = (pg_data_t *)arg;
1492 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1493 return 0;
1494 }
1495
1496 static void pagetypeinfo_showfree_print(struct seq_file *m,
1497 pg_data_t *pgdat, struct zone *zone)
1498 {
1499 int order, mtype;
1500
1501 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1502 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1503 pgdat->node_id,
1504 zone->name,
1505 migratetype_names[mtype]);
1506 for (order = 0; order < MAX_ORDER; ++order) {
1507 unsigned long freecount = 0;
1508 struct free_area *area;
1509 struct list_head *curr;
1510 bool overflow = false;
1511
1512 area = &(zone->free_area[order]);
1513
1514 list_for_each(curr, &area->free_list[mtype]) {
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524 if (++freecount >= 100000) {
1525 overflow = true;
1526 break;
1527 }
1528 }
1529 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1530 spin_unlock_irq(&zone->lock);
1531 cond_resched();
1532 spin_lock_irq(&zone->lock);
1533 }
1534 seq_putc(m, '\n');
1535 }
1536 }
1537
1538
1539 static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
1540 {
1541 int order;
1542 pg_data_t *pgdat = (pg_data_t *)arg;
1543
1544
1545 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1546 for (order = 0; order < MAX_ORDER; ++order)
1547 seq_printf(m, "%6d ", order);
1548 seq_putc(m, '\n');
1549
1550 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1551 }
1552
1553 static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1554 pg_data_t *pgdat, struct zone *zone)
1555 {
1556 int mtype;
1557 unsigned long pfn;
1558 unsigned long start_pfn = zone->zone_start_pfn;
1559 unsigned long end_pfn = zone_end_pfn(zone);
1560 unsigned long count[MIGRATE_TYPES] = { 0, };
1561
1562 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1563 struct page *page;
1564
1565 page = pfn_to_online_page(pfn);
1566 if (!page)
1567 continue;
1568
1569 if (page_zone(page) != zone)
1570 continue;
1571
1572 mtype = get_pageblock_migratetype(page);
1573
1574 if (mtype < MIGRATE_TYPES)
1575 count[mtype]++;
1576 }
1577
1578
1579 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1580 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1581 seq_printf(m, "%12lu ", count[mtype]);
1582 seq_putc(m, '\n');
1583 }
1584
1585
1586 static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1587 {
1588 int mtype;
1589 pg_data_t *pgdat = (pg_data_t *)arg;
1590
1591 seq_printf(m, "\n%-23s", "Number of blocks type ");
1592 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1593 seq_printf(m, "%12s ", migratetype_names[mtype]);
1594 seq_putc(m, '\n');
1595 walk_zones_in_node(m, pgdat, true, false,
1596 pagetypeinfo_showblockcount_print);
1597 }
1598
1599
1600
1601
1602
1603
1604
1605 static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1606 {
1607 #ifdef CONFIG_PAGE_OWNER
1608 int mtype;
1609
1610 if (!static_branch_unlikely(&page_owner_inited))
1611 return;
1612
1613 drain_all_pages(NULL);
1614
1615 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1616 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1617 seq_printf(m, "%12s ", migratetype_names[mtype]);
1618 seq_putc(m, '\n');
1619
1620 walk_zones_in_node(m, pgdat, true, true,
1621 pagetypeinfo_showmixedcount_print);
1622 #endif
1623 }
1624
1625
1626
1627
1628
1629 static int pagetypeinfo_show(struct seq_file *m, void *arg)
1630 {
1631 pg_data_t *pgdat = (pg_data_t *)arg;
1632
1633
1634 if (!node_state(pgdat->node_id, N_MEMORY))
1635 return 0;
1636
1637 seq_printf(m, "Page block order: %d\n", pageblock_order);
1638 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1639 seq_putc(m, '\n');
1640 pagetypeinfo_showfree(m, pgdat);
1641 pagetypeinfo_showblockcount(m, pgdat);
1642 pagetypeinfo_showmixedcount(m, pgdat);
1643
1644 return 0;
1645 }
1646
1647 static const struct seq_operations fragmentation_op = {
1648 .start = frag_start,
1649 .next = frag_next,
1650 .stop = frag_stop,
1651 .show = frag_show,
1652 };
1653
1654 static const struct seq_operations pagetypeinfo_op = {
1655 .start = frag_start,
1656 .next = frag_next,
1657 .stop = frag_stop,
1658 .show = pagetypeinfo_show,
1659 };
1660
1661 static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1662 {
1663 int zid;
1664
1665 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1666 struct zone *compare = &pgdat->node_zones[zid];
1667
1668 if (populated_zone(compare))
1669 return zone == compare;
1670 }
1671
1672 return false;
1673 }
1674
1675 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1676 struct zone *zone)
1677 {
1678 int i;
1679 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1680 if (is_zone_first_populated(pgdat, zone)) {
1681 seq_printf(m, "\n per-node stats");
1682 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1683 unsigned long pages = node_page_state_pages(pgdat, i);
1684
1685 if (vmstat_item_print_in_thp(i))
1686 pages /= HPAGE_PMD_NR;
1687 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
1688 pages);
1689 }
1690 }
1691 seq_printf(m,
1692 "\n pages free %lu"
1693 "\n boost %lu"
1694 "\n min %lu"
1695 "\n low %lu"
1696 "\n high %lu"
1697 "\n spanned %lu"
1698 "\n present %lu"
1699 "\n managed %lu"
1700 "\n cma %lu",
1701 zone_page_state(zone, NR_FREE_PAGES),
1702 zone->watermark_boost,
1703 min_wmark_pages(zone),
1704 low_wmark_pages(zone),
1705 high_wmark_pages(zone),
1706 zone->spanned_pages,
1707 zone->present_pages,
1708 zone_managed_pages(zone),
1709 zone_cma_pages(zone));
1710
1711 seq_printf(m,
1712 "\n protection: (%ld",
1713 zone->lowmem_reserve[0]);
1714 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1715 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1716 seq_putc(m, ')');
1717
1718
1719 if (!populated_zone(zone)) {
1720 seq_putc(m, '\n');
1721 return;
1722 }
1723
1724 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1725 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1726 zone_page_state(zone, i));
1727
1728 #ifdef CONFIG_NUMA
1729 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1730 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
1731 zone_numa_event_state(zone, i));
1732 #endif
1733
1734 seq_printf(m, "\n pagesets");
1735 for_each_online_cpu(i) {
1736 struct per_cpu_pages *pcp;
1737 struct per_cpu_zonestat __maybe_unused *pzstats;
1738
1739 pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
1740 seq_printf(m,
1741 "\n cpu: %i"
1742 "\n count: %i"
1743 "\n high: %i"
1744 "\n batch: %i",
1745 i,
1746 pcp->count,
1747 pcp->high,
1748 pcp->batch);
1749 #ifdef CONFIG_SMP
1750 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
1751 seq_printf(m, "\n vm stats threshold: %d",
1752 pzstats->stat_threshold);
1753 #endif
1754 }
1755 seq_printf(m,
1756 "\n node_unreclaimable: %u"
1757 "\n start_pfn: %lu",
1758 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1759 zone->zone_start_pfn);
1760 seq_putc(m, '\n');
1761 }
1762
1763
1764
1765
1766
1767
1768
1769 static int zoneinfo_show(struct seq_file *m, void *arg)
1770 {
1771 pg_data_t *pgdat = (pg_data_t *)arg;
1772 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1773 return 0;
1774 }
1775
1776 static const struct seq_operations zoneinfo_op = {
1777 .start = frag_start,
1778
1779 .next = frag_next,
1780 .stop = frag_stop,
1781 .show = zoneinfo_show,
1782 };
1783
1784 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1785 NR_VM_NUMA_EVENT_ITEMS + \
1786 NR_VM_NODE_STAT_ITEMS + \
1787 NR_VM_WRITEBACK_STAT_ITEMS + \
1788 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1789 NR_VM_EVENT_ITEMS : 0))
1790
1791 static void *vmstat_start(struct seq_file *m, loff_t *pos)
1792 {
1793 unsigned long *v;
1794 int i;
1795
1796 if (*pos >= NR_VMSTAT_ITEMS)
1797 return NULL;
1798
1799 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1800 fold_vm_numa_events();
1801 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
1802 m->private = v;
1803 if (!v)
1804 return ERR_PTR(-ENOMEM);
1805 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1806 v[i] = global_zone_page_state(i);
1807 v += NR_VM_ZONE_STAT_ITEMS;
1808
1809 #ifdef CONFIG_NUMA
1810 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1811 v[i] = global_numa_event_state(i);
1812 v += NR_VM_NUMA_EVENT_ITEMS;
1813 #endif
1814
1815 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1816 v[i] = global_node_page_state_pages(i);
1817 if (vmstat_item_print_in_thp(i))
1818 v[i] /= HPAGE_PMD_NR;
1819 }
1820 v += NR_VM_NODE_STAT_ITEMS;
1821
1822 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1823 v + NR_DIRTY_THRESHOLD);
1824 v += NR_VM_WRITEBACK_STAT_ITEMS;
1825
1826 #ifdef CONFIG_VM_EVENT_COUNTERS
1827 all_vm_events(v);
1828 v[PGPGIN] /= 2;
1829 v[PGPGOUT] /= 2;
1830 #endif
1831 return (unsigned long *)m->private + *pos;
1832 }
1833
1834 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1835 {
1836 (*pos)++;
1837 if (*pos >= NR_VMSTAT_ITEMS)
1838 return NULL;
1839 return (unsigned long *)m->private + *pos;
1840 }
1841
1842 static int vmstat_show(struct seq_file *m, void *arg)
1843 {
1844 unsigned long *l = arg;
1845 unsigned long off = l - (unsigned long *)m->private;
1846
1847 seq_puts(m, vmstat_text[off]);
1848 seq_put_decimal_ull(m, " ", *l);
1849 seq_putc(m, '\n');
1850
1851 if (off == NR_VMSTAT_ITEMS - 1) {
1852
1853
1854
1855
1856 seq_puts(m, "nr_unstable 0\n");
1857 }
1858 return 0;
1859 }
1860
1861 static void vmstat_stop(struct seq_file *m, void *arg)
1862 {
1863 kfree(m->private);
1864 m->private = NULL;
1865 }
1866
1867 static const struct seq_operations vmstat_op = {
1868 .start = vmstat_start,
1869 .next = vmstat_next,
1870 .stop = vmstat_stop,
1871 .show = vmstat_show,
1872 };
1873 #endif
1874
1875 #ifdef CONFIG_SMP
1876 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1877 int sysctl_stat_interval __read_mostly = HZ;
1878
1879 #ifdef CONFIG_PROC_FS
1880 static void refresh_vm_stats(struct work_struct *work)
1881 {
1882 refresh_cpu_vm_stats(true);
1883 }
1884
1885 int vmstat_refresh(struct ctl_table *table, int write,
1886 void *buffer, size_t *lenp, loff_t *ppos)
1887 {
1888 long val;
1889 int err;
1890 int i;
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904 err = schedule_on_each_cpu(refresh_vm_stats);
1905 if (err)
1906 return err;
1907 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1908
1909
1910
1911 switch (i) {
1912 case NR_ZONE_WRITE_PENDING:
1913 case NR_FREE_CMA_PAGES:
1914 continue;
1915 }
1916 val = atomic_long_read(&vm_zone_stat[i]);
1917 if (val < 0) {
1918 pr_warn("%s: %s %ld\n",
1919 __func__, zone_stat_name(i), val);
1920 }
1921 }
1922 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1923
1924
1925
1926 switch (i) {
1927 case NR_WRITEBACK:
1928 continue;
1929 }
1930 val = atomic_long_read(&vm_node_stat[i]);
1931 if (val < 0) {
1932 pr_warn("%s: %s %ld\n",
1933 __func__, node_stat_name(i), val);
1934 }
1935 }
1936 if (write)
1937 *ppos += *lenp;
1938 else
1939 *lenp = 0;
1940 return 0;
1941 }
1942 #endif
1943
1944 static void vmstat_update(struct work_struct *w)
1945 {
1946 if (refresh_cpu_vm_stats(true)) {
1947
1948
1949
1950
1951
1952 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1953 this_cpu_ptr(&vmstat_work),
1954 round_jiffies_relative(sysctl_stat_interval));
1955 }
1956 }
1957
1958
1959
1960
1961
1962 static bool need_update(int cpu)
1963 {
1964 pg_data_t *last_pgdat = NULL;
1965 struct zone *zone;
1966
1967 for_each_populated_zone(zone) {
1968 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
1969 struct per_cpu_nodestat *n;
1970
1971
1972
1973
1974 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
1975 return true;
1976
1977 if (last_pgdat == zone->zone_pgdat)
1978 continue;
1979 last_pgdat = zone->zone_pgdat;
1980 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
1981 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
1982 return true;
1983 }
1984 return false;
1985 }
1986
1987
1988
1989
1990
1991
1992 void quiet_vmstat(void)
1993 {
1994 if (system_state != SYSTEM_RUNNING)
1995 return;
1996
1997 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1998 return;
1999
2000 if (!need_update(smp_processor_id()))
2001 return;
2002
2003
2004
2005
2006
2007
2008
2009 refresh_cpu_vm_stats(false);
2010 }
2011
2012
2013
2014
2015
2016
2017
2018 static void vmstat_shepherd(struct work_struct *w);
2019
2020 static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
2021
2022 static void vmstat_shepherd(struct work_struct *w)
2023 {
2024 int cpu;
2025
2026 cpus_read_lock();
2027
2028 for_each_online_cpu(cpu) {
2029 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
2030
2031 if (!delayed_work_pending(dw) && need_update(cpu))
2032 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
2033
2034 cond_resched();
2035 }
2036 cpus_read_unlock();
2037
2038 schedule_delayed_work(&shepherd,
2039 round_jiffies_relative(sysctl_stat_interval));
2040 }
2041
2042 static void __init start_shepherd_timer(void)
2043 {
2044 int cpu;
2045
2046 for_each_possible_cpu(cpu)
2047 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
2048 vmstat_update);
2049
2050 schedule_delayed_work(&shepherd,
2051 round_jiffies_relative(sysctl_stat_interval));
2052 }
2053
2054 static void __init init_cpu_node_state(void)
2055 {
2056 int node;
2057
2058 for_each_online_node(node) {
2059 if (!cpumask_empty(cpumask_of_node(node)))
2060 node_set_state(node, N_CPU);
2061 }
2062 }
2063
2064 static int vmstat_cpu_online(unsigned int cpu)
2065 {
2066 refresh_zone_stat_thresholds();
2067
2068 if (!node_state(cpu_to_node(cpu), N_CPU)) {
2069 node_set_state(cpu_to_node(cpu), N_CPU);
2070 set_migration_target_nodes();
2071 }
2072
2073 return 0;
2074 }
2075
2076 static int vmstat_cpu_down_prep(unsigned int cpu)
2077 {
2078 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2079 return 0;
2080 }
2081
2082 static int vmstat_cpu_dead(unsigned int cpu)
2083 {
2084 const struct cpumask *node_cpus;
2085 int node;
2086
2087 node = cpu_to_node(cpu);
2088
2089 refresh_zone_stat_thresholds();
2090 node_cpus = cpumask_of_node(node);
2091 if (!cpumask_empty(node_cpus))
2092 return 0;
2093
2094 node_clear_state(node, N_CPU);
2095 set_migration_target_nodes();
2096
2097 return 0;
2098 }
2099
2100 #endif
2101
2102 struct workqueue_struct *mm_percpu_wq;
2103
2104 void __init init_mm_internals(void)
2105 {
2106 int ret __maybe_unused;
2107
2108 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2109
2110 #ifdef CONFIG_SMP
2111 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2112 NULL, vmstat_cpu_dead);
2113 if (ret < 0)
2114 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2115
2116 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2117 vmstat_cpu_online,
2118 vmstat_cpu_down_prep);
2119 if (ret < 0)
2120 pr_err("vmstat: failed to register 'online' hotplug state\n");
2121
2122 cpus_read_lock();
2123 init_cpu_node_state();
2124 cpus_read_unlock();
2125
2126 start_shepherd_timer();
2127 #endif
2128 migrate_on_reclaim_init();
2129 #ifdef CONFIG_PROC_FS
2130 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2131 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2132 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2133 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
2134 #endif
2135 }
2136
2137 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2138
2139
2140
2141
2142
2143 static int unusable_free_index(unsigned int order,
2144 struct contig_page_info *info)
2145 {
2146
2147 if (info->free_pages == 0)
2148 return 1000;
2149
2150
2151
2152
2153
2154
2155
2156
2157 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2158
2159 }
2160
2161 static void unusable_show_print(struct seq_file *m,
2162 pg_data_t *pgdat, struct zone *zone)
2163 {
2164 unsigned int order;
2165 int index;
2166 struct contig_page_info info;
2167
2168 seq_printf(m, "Node %d, zone %8s ",
2169 pgdat->node_id,
2170 zone->name);
2171 for (order = 0; order < MAX_ORDER; ++order) {
2172 fill_contig_page_info(zone, order, &info);
2173 index = unusable_free_index(order, &info);
2174 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2175 }
2176
2177 seq_putc(m, '\n');
2178 }
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189 static int unusable_show(struct seq_file *m, void *arg)
2190 {
2191 pg_data_t *pgdat = (pg_data_t *)arg;
2192
2193
2194 if (!node_state(pgdat->node_id, N_MEMORY))
2195 return 0;
2196
2197 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2198
2199 return 0;
2200 }
2201
2202 static const struct seq_operations unusable_sops = {
2203 .start = frag_start,
2204 .next = frag_next,
2205 .stop = frag_stop,
2206 .show = unusable_show,
2207 };
2208
2209 DEFINE_SEQ_ATTRIBUTE(unusable);
2210
2211 static void extfrag_show_print(struct seq_file *m,
2212 pg_data_t *pgdat, struct zone *zone)
2213 {
2214 unsigned int order;
2215 int index;
2216
2217
2218 struct contig_page_info info;
2219
2220 seq_printf(m, "Node %d, zone %8s ",
2221 pgdat->node_id,
2222 zone->name);
2223 for (order = 0; order < MAX_ORDER; ++order) {
2224 fill_contig_page_info(zone, order, &info);
2225 index = __fragmentation_index(order, &info);
2226 seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
2227 }
2228
2229 seq_putc(m, '\n');
2230 }
2231
2232
2233
2234
2235 static int extfrag_show(struct seq_file *m, void *arg)
2236 {
2237 pg_data_t *pgdat = (pg_data_t *)arg;
2238
2239 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2240
2241 return 0;
2242 }
2243
2244 static const struct seq_operations extfrag_sops = {
2245 .start = frag_start,
2246 .next = frag_next,
2247 .stop = frag_stop,
2248 .show = extfrag_show,
2249 };
2250
2251 DEFINE_SEQ_ATTRIBUTE(extfrag);
2252
2253 static int __init extfrag_debug_init(void)
2254 {
2255 struct dentry *extfrag_debug_root;
2256
2257 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2258
2259 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
2260 &unusable_fops);
2261
2262 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
2263 &extfrag_fops);
2264
2265 return 0;
2266 }
2267
2268 module_init(extfrag_debug_init);
2269 #endif