0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #include <linux/kernel.h>
0016 #include <linux/export.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/fs.h>
0019 #include <linux/mm.h>
0020 #include <linux/swap.h>
0021 #include <linux/slab.h>
0022 #include <linux/pagemap.h>
0023 #include <linux/writeback.h>
0024 #include <linux/init.h>
0025 #include <linux/backing-dev.h>
0026 #include <linux/task_io_accounting_ops.h>
0027 #include <linux/blkdev.h>
0028 #include <linux/mpage.h>
0029 #include <linux/rmap.h>
0030 #include <linux/percpu.h>
0031 #include <linux/smp.h>
0032 #include <linux/sysctl.h>
0033 #include <linux/cpu.h>
0034 #include <linux/syscalls.h>
0035 #include <linux/pagevec.h>
0036 #include <linux/timer.h>
0037 #include <linux/sched/rt.h>
0038 #include <linux/sched/signal.h>
0039 #include <linux/mm_inline.h>
0040 #include <trace/events/writeback.h>
0041
0042 #include "internal.h"
0043
0044
0045
0046
0047 #define MAX_PAUSE max(HZ/5, 1)
0048
0049
0050
0051
0052
0053 #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
0054
0055
0056
0057
0058 #define BANDWIDTH_INTERVAL max(HZ/5, 1)
0059
0060 #define RATELIMIT_CALC_SHIFT 10
0061
0062
0063
0064
0065
0066 static long ratelimit_pages = 32;
0067
0068
0069
0070
0071
0072
0073 static int dirty_background_ratio = 10;
0074
0075
0076
0077
0078
0079 static unsigned long dirty_background_bytes;
0080
0081
0082
0083
0084
0085 static int vm_highmem_is_dirtyable;
0086
0087
0088
0089
0090 static int vm_dirty_ratio = 20;
0091
0092
0093
0094
0095
0096 static unsigned long vm_dirty_bytes;
0097
0098
0099
0100
0101 unsigned int dirty_writeback_interval = 5 * 100;
0102
0103 EXPORT_SYMBOL_GPL(dirty_writeback_interval);
0104
0105
0106
0107
0108 unsigned int dirty_expire_interval = 30 * 100;
0109
0110
0111
0112
0113
0114 int laptop_mode;
0115
0116 EXPORT_SYMBOL(laptop_mode);
0117
0118
0119
0120 struct wb_domain global_wb_domain;
0121
0122
0123 struct dirty_throttle_control {
0124 #ifdef CONFIG_CGROUP_WRITEBACK
0125 struct wb_domain *dom;
0126 struct dirty_throttle_control *gdtc;
0127 #endif
0128 struct bdi_writeback *wb;
0129 struct fprop_local_percpu *wb_completions;
0130
0131 unsigned long avail;
0132 unsigned long dirty;
0133 unsigned long thresh;
0134 unsigned long bg_thresh;
0135
0136 unsigned long wb_dirty;
0137 unsigned long wb_thresh;
0138 unsigned long wb_bg_thresh;
0139
0140 unsigned long pos_ratio;
0141 };
0142
0143
0144
0145
0146
0147
0148 #define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
0149
0150 #ifdef CONFIG_CGROUP_WRITEBACK
0151
0152 #define GDTC_INIT(__wb) .wb = (__wb), \
0153 .dom = &global_wb_domain, \
0154 .wb_completions = &(__wb)->completions
0155
0156 #define GDTC_INIT_NO_WB .dom = &global_wb_domain
0157
0158 #define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
0159 .dom = mem_cgroup_wb_domain(__wb), \
0160 .wb_completions = &(__wb)->memcg_completions, \
0161 .gdtc = __gdtc
0162
0163 static bool mdtc_valid(struct dirty_throttle_control *dtc)
0164 {
0165 return dtc->dom;
0166 }
0167
0168 static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
0169 {
0170 return dtc->dom;
0171 }
0172
0173 static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
0174 {
0175 return mdtc->gdtc;
0176 }
0177
0178 static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
0179 {
0180 return &wb->memcg_completions;
0181 }
0182
0183 static void wb_min_max_ratio(struct bdi_writeback *wb,
0184 unsigned long *minp, unsigned long *maxp)
0185 {
0186 unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
0187 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
0188 unsigned long long min = wb->bdi->min_ratio;
0189 unsigned long long max = wb->bdi->max_ratio;
0190
0191
0192
0193
0194
0195 if (this_bw < tot_bw) {
0196 if (min) {
0197 min *= this_bw;
0198 min = div64_ul(min, tot_bw);
0199 }
0200 if (max < 100) {
0201 max *= this_bw;
0202 max = div64_ul(max, tot_bw);
0203 }
0204 }
0205
0206 *minp = min;
0207 *maxp = max;
0208 }
0209
0210 #else
0211
0212 #define GDTC_INIT(__wb) .wb = (__wb), \
0213 .wb_completions = &(__wb)->completions
0214 #define GDTC_INIT_NO_WB
0215 #define MDTC_INIT(__wb, __gdtc)
0216
0217 static bool mdtc_valid(struct dirty_throttle_control *dtc)
0218 {
0219 return false;
0220 }
0221
0222 static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
0223 {
0224 return &global_wb_domain;
0225 }
0226
0227 static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
0228 {
0229 return NULL;
0230 }
0231
0232 static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
0233 {
0234 return NULL;
0235 }
0236
0237 static void wb_min_max_ratio(struct bdi_writeback *wb,
0238 unsigned long *minp, unsigned long *maxp)
0239 {
0240 *minp = wb->bdi->min_ratio;
0241 *maxp = wb->bdi->max_ratio;
0242 }
0243
0244 #endif
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271 static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
0272 {
0273 unsigned long nr_pages = 0;
0274 int z;
0275
0276 for (z = 0; z < MAX_NR_ZONES; z++) {
0277 struct zone *zone = pgdat->node_zones + z;
0278
0279 if (!populated_zone(zone))
0280 continue;
0281
0282 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
0283 }
0284
0285
0286
0287
0288
0289
0290 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
0291
0292 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
0293 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
0294
0295 return nr_pages;
0296 }
0297
0298 static unsigned long highmem_dirtyable_memory(unsigned long total)
0299 {
0300 #ifdef CONFIG_HIGHMEM
0301 int node;
0302 unsigned long x = 0;
0303 int i;
0304
0305 for_each_node_state(node, N_HIGH_MEMORY) {
0306 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
0307 struct zone *z;
0308 unsigned long nr_pages;
0309
0310 if (!is_highmem_idx(i))
0311 continue;
0312
0313 z = &NODE_DATA(node)->node_zones[i];
0314 if (!populated_zone(z))
0315 continue;
0316
0317 nr_pages = zone_page_state(z, NR_FREE_PAGES);
0318
0319 nr_pages -= min(nr_pages, high_wmark_pages(z));
0320 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
0321 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
0322 x += nr_pages;
0323 }
0324 }
0325
0326
0327
0328
0329
0330
0331
0332 return min(x, total);
0333 #else
0334 return 0;
0335 #endif
0336 }
0337
0338
0339
0340
0341
0342
0343
0344 static unsigned long global_dirtyable_memory(void)
0345 {
0346 unsigned long x;
0347
0348 x = global_zone_page_state(NR_FREE_PAGES);
0349
0350
0351
0352
0353
0354 x -= min(x, totalreserve_pages);
0355
0356 x += global_node_page_state(NR_INACTIVE_FILE);
0357 x += global_node_page_state(NR_ACTIVE_FILE);
0358
0359 if (!vm_highmem_is_dirtyable)
0360 x -= highmem_dirtyable_memory(x);
0361
0362 return x + 1;
0363 }
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374 static void domain_dirty_limits(struct dirty_throttle_control *dtc)
0375 {
0376 const unsigned long available_memory = dtc->avail;
0377 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
0378 unsigned long bytes = vm_dirty_bytes;
0379 unsigned long bg_bytes = dirty_background_bytes;
0380
0381 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
0382 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
0383 unsigned long thresh;
0384 unsigned long bg_thresh;
0385 struct task_struct *tsk;
0386
0387
0388 if (gdtc) {
0389 unsigned long global_avail = gdtc->avail;
0390
0391
0392
0393
0394
0395
0396
0397
0398 if (bytes)
0399 ratio = min(DIV_ROUND_UP(bytes, global_avail),
0400 PAGE_SIZE);
0401 if (bg_bytes)
0402 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
0403 PAGE_SIZE);
0404 bytes = bg_bytes = 0;
0405 }
0406
0407 if (bytes)
0408 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
0409 else
0410 thresh = (ratio * available_memory) / PAGE_SIZE;
0411
0412 if (bg_bytes)
0413 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
0414 else
0415 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
0416
0417 if (bg_thresh >= thresh)
0418 bg_thresh = thresh / 2;
0419 tsk = current;
0420 if (rt_task(tsk)) {
0421 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
0422 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
0423 }
0424 dtc->thresh = thresh;
0425 dtc->bg_thresh = bg_thresh;
0426
0427
0428 if (!gdtc)
0429 trace_global_dirty_state(bg_thresh, thresh);
0430 }
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
0441 {
0442 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
0443
0444 gdtc.avail = global_dirtyable_memory();
0445 domain_dirty_limits(&gdtc);
0446
0447 *pbackground = gdtc.bg_thresh;
0448 *pdirty = gdtc.thresh;
0449 }
0450
0451
0452
0453
0454
0455
0456
0457
0458 static unsigned long node_dirty_limit(struct pglist_data *pgdat)
0459 {
0460 unsigned long node_memory = node_dirtyable_memory(pgdat);
0461 struct task_struct *tsk = current;
0462 unsigned long dirty;
0463
0464 if (vm_dirty_bytes)
0465 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
0466 node_memory / global_dirtyable_memory();
0467 else
0468 dirty = vm_dirty_ratio * node_memory / 100;
0469
0470 if (rt_task(tsk))
0471 dirty += dirty / 4;
0472
0473 return dirty;
0474 }
0475
0476
0477
0478
0479
0480
0481
0482
0483 bool node_dirty_ok(struct pglist_data *pgdat)
0484 {
0485 unsigned long limit = node_dirty_limit(pgdat);
0486 unsigned long nr_pages = 0;
0487
0488 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
0489 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
0490
0491 return nr_pages <= limit;
0492 }
0493
0494 #ifdef CONFIG_SYSCTL
0495 static int dirty_background_ratio_handler(struct ctl_table *table, int write,
0496 void *buffer, size_t *lenp, loff_t *ppos)
0497 {
0498 int ret;
0499
0500 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0501 if (ret == 0 && write)
0502 dirty_background_bytes = 0;
0503 return ret;
0504 }
0505
0506 static int dirty_background_bytes_handler(struct ctl_table *table, int write,
0507 void *buffer, size_t *lenp, loff_t *ppos)
0508 {
0509 int ret;
0510
0511 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0512 if (ret == 0 && write)
0513 dirty_background_ratio = 0;
0514 return ret;
0515 }
0516
0517 static int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
0518 size_t *lenp, loff_t *ppos)
0519 {
0520 int old_ratio = vm_dirty_ratio;
0521 int ret;
0522
0523 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0524 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
0525 writeback_set_ratelimit();
0526 vm_dirty_bytes = 0;
0527 }
0528 return ret;
0529 }
0530
0531 static int dirty_bytes_handler(struct ctl_table *table, int write,
0532 void *buffer, size_t *lenp, loff_t *ppos)
0533 {
0534 unsigned long old_bytes = vm_dirty_bytes;
0535 int ret;
0536
0537 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0538 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
0539 writeback_set_ratelimit();
0540 vm_dirty_ratio = 0;
0541 }
0542 return ret;
0543 }
0544 #endif
0545
0546 static unsigned long wp_next_time(unsigned long cur_time)
0547 {
0548 cur_time += VM_COMPLETIONS_PERIOD_LEN;
0549
0550 if (!cur_time)
0551 return 1;
0552 return cur_time;
0553 }
0554
0555 static void wb_domain_writeout_add(struct wb_domain *dom,
0556 struct fprop_local_percpu *completions,
0557 unsigned int max_prop_frac, long nr)
0558 {
0559 __fprop_add_percpu_max(&dom->completions, completions,
0560 max_prop_frac, nr);
0561
0562 if (unlikely(!dom->period_time)) {
0563
0564
0565
0566
0567
0568
0569 dom->period_time = wp_next_time(jiffies);
0570 mod_timer(&dom->period_timer, dom->period_time);
0571 }
0572 }
0573
0574
0575
0576
0577
0578 static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr)
0579 {
0580 struct wb_domain *cgdom;
0581
0582 wb_stat_mod(wb, WB_WRITTEN, nr);
0583 wb_domain_writeout_add(&global_wb_domain, &wb->completions,
0584 wb->bdi->max_prop_frac, nr);
0585
0586 cgdom = mem_cgroup_wb_domain(wb);
0587 if (cgdom)
0588 wb_domain_writeout_add(cgdom, wb_memcg_completions(wb),
0589 wb->bdi->max_prop_frac, nr);
0590 }
0591
0592 void wb_writeout_inc(struct bdi_writeback *wb)
0593 {
0594 unsigned long flags;
0595
0596 local_irq_save(flags);
0597 __wb_writeout_add(wb, 1);
0598 local_irq_restore(flags);
0599 }
0600 EXPORT_SYMBOL_GPL(wb_writeout_inc);
0601
0602
0603
0604
0605
0606 static void writeout_period(struct timer_list *t)
0607 {
0608 struct wb_domain *dom = from_timer(dom, t, period_timer);
0609 int miss_periods = (jiffies - dom->period_time) /
0610 VM_COMPLETIONS_PERIOD_LEN;
0611
0612 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
0613 dom->period_time = wp_next_time(dom->period_time +
0614 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
0615 mod_timer(&dom->period_timer, dom->period_time);
0616 } else {
0617
0618
0619
0620
0621 dom->period_time = 0;
0622 }
0623 }
0624
0625 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
0626 {
0627 memset(dom, 0, sizeof(*dom));
0628
0629 spin_lock_init(&dom->lock);
0630
0631 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
0632
0633 dom->dirty_limit_tstamp = jiffies;
0634
0635 return fprop_global_init(&dom->completions, gfp);
0636 }
0637
0638 #ifdef CONFIG_CGROUP_WRITEBACK
0639 void wb_domain_exit(struct wb_domain *dom)
0640 {
0641 del_timer_sync(&dom->period_timer);
0642 fprop_global_destroy(&dom->completions);
0643 }
0644 #endif
0645
0646
0647
0648
0649
0650
0651 static unsigned int bdi_min_ratio;
0652
0653 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
0654 {
0655 unsigned int delta;
0656 int ret = 0;
0657
0658 spin_lock_bh(&bdi_lock);
0659 if (min_ratio > bdi->max_ratio) {
0660 ret = -EINVAL;
0661 } else {
0662 if (min_ratio < bdi->min_ratio) {
0663 delta = bdi->min_ratio - min_ratio;
0664 bdi_min_ratio -= delta;
0665 bdi->min_ratio = min_ratio;
0666 } else {
0667 delta = min_ratio - bdi->min_ratio;
0668 if (bdi_min_ratio + delta < 100) {
0669 bdi_min_ratio += delta;
0670 bdi->min_ratio = min_ratio;
0671 } else {
0672 ret = -EINVAL;
0673 }
0674 }
0675 }
0676 spin_unlock_bh(&bdi_lock);
0677
0678 return ret;
0679 }
0680
0681 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
0682 {
0683 int ret = 0;
0684
0685 if (max_ratio > 100)
0686 return -EINVAL;
0687
0688 spin_lock_bh(&bdi_lock);
0689 if (bdi->min_ratio > max_ratio) {
0690 ret = -EINVAL;
0691 } else {
0692 bdi->max_ratio = max_ratio;
0693 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
0694 }
0695 spin_unlock_bh(&bdi_lock);
0696
0697 return ret;
0698 }
0699 EXPORT_SYMBOL(bdi_set_max_ratio);
0700
0701 static unsigned long dirty_freerun_ceiling(unsigned long thresh,
0702 unsigned long bg_thresh)
0703 {
0704 return (thresh + bg_thresh) / 2;
0705 }
0706
0707 static unsigned long hard_dirty_limit(struct wb_domain *dom,
0708 unsigned long thresh)
0709 {
0710 return max(thresh, dom->dirty_limit);
0711 }
0712
0713
0714
0715
0716
0717 static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
0718 unsigned long filepages, unsigned long headroom)
0719 {
0720 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
0721 unsigned long clean = filepages - min(filepages, mdtc->dirty);
0722 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
0723 unsigned long other_clean = global_clean - min(global_clean, clean);
0724
0725 mdtc->avail = filepages + min(headroom, other_clean);
0726 }
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749 static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
0750 {
0751 struct wb_domain *dom = dtc_dom(dtc);
0752 unsigned long thresh = dtc->thresh;
0753 u64 wb_thresh;
0754 unsigned long numerator, denominator;
0755 unsigned long wb_min_ratio, wb_max_ratio;
0756
0757
0758
0759
0760 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
0761 &numerator, &denominator);
0762
0763 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
0764 wb_thresh *= numerator;
0765 wb_thresh = div64_ul(wb_thresh, denominator);
0766
0767 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
0768
0769 wb_thresh += (thresh * wb_min_ratio) / 100;
0770 if (wb_thresh > (thresh * wb_max_ratio) / 100)
0771 wb_thresh = thresh * wb_max_ratio / 100;
0772
0773 return wb_thresh;
0774 }
0775
0776 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
0777 {
0778 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
0779 .thresh = thresh };
0780 return __wb_calc_thresh(&gdtc);
0781 }
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797 static long long pos_ratio_polynom(unsigned long setpoint,
0798 unsigned long dirty,
0799 unsigned long limit)
0800 {
0801 long long pos_ratio;
0802 long x;
0803
0804 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
0805 (limit - setpoint) | 1);
0806 pos_ratio = x;
0807 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
0808 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
0809 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
0810
0811 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
0812 }
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884
0885
0886
0887
0888
0889 static void wb_position_ratio(struct dirty_throttle_control *dtc)
0890 {
0891 struct bdi_writeback *wb = dtc->wb;
0892 unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
0893 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
0894 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
0895 unsigned long wb_thresh = dtc->wb_thresh;
0896 unsigned long x_intercept;
0897 unsigned long setpoint;
0898 unsigned long wb_setpoint;
0899 unsigned long span;
0900 long long pos_ratio;
0901 long x;
0902
0903 dtc->pos_ratio = 0;
0904
0905 if (unlikely(dtc->dirty >= limit))
0906 return;
0907
0908
0909
0910
0911
0912
0913 setpoint = (freerun + limit) / 2;
0914 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
0915
0916
0917
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
0942 long long wb_pos_ratio;
0943
0944 if (dtc->wb_dirty < 8) {
0945 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
0946 2 << RATELIMIT_CALC_SHIFT);
0947 return;
0948 }
0949
0950 if (dtc->wb_dirty >= wb_thresh)
0951 return;
0952
0953 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
0954 dtc->wb_bg_thresh);
0955
0956 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
0957 return;
0958
0959 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
0960 wb_thresh);
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977
0978
0979
0980
0981
0982
0983 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
0984 return;
0985 }
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 if (unlikely(wb_thresh > dtc->thresh))
1019 wb_thresh = dtc->thresh;
1020
1021
1022
1023
1024
1025
1026
1027 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1028
1029
1030
1031
1032 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1033 wb_setpoint = setpoint * (u64)x >> 16;
1034
1035
1036
1037
1038
1039
1040
1041
1042 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1043 x_intercept = wb_setpoint + span;
1044
1045 if (dtc->wb_dirty < x_intercept - span / 4) {
1046 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1047 (x_intercept - wb_setpoint) | 1);
1048 } else
1049 pos_ratio /= 4;
1050
1051
1052
1053
1054
1055
1056 x_intercept = wb_thresh / 2;
1057 if (dtc->wb_dirty < x_intercept) {
1058 if (dtc->wb_dirty > x_intercept / 8)
1059 pos_ratio = div_u64(pos_ratio * x_intercept,
1060 dtc->wb_dirty);
1061 else
1062 pos_ratio *= 8;
1063 }
1064
1065 dtc->pos_ratio = pos_ratio;
1066 }
1067
1068 static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1069 unsigned long elapsed,
1070 unsigned long written)
1071 {
1072 const unsigned long period = roundup_pow_of_two(3 * HZ);
1073 unsigned long avg = wb->avg_write_bandwidth;
1074 unsigned long old = wb->write_bandwidth;
1075 u64 bw;
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 bw = written - min(written, wb->written_stamp);
1088 bw *= HZ;
1089 if (unlikely(elapsed > period)) {
1090 bw = div64_ul(bw, elapsed);
1091 avg = bw;
1092 goto out;
1093 }
1094 bw += (u64)wb->write_bandwidth * (period - elapsed);
1095 bw >>= ilog2(period);
1096
1097
1098
1099
1100 if (avg > old && old >= (unsigned long)bw)
1101 avg -= (avg - old) >> 3;
1102
1103 if (avg < old && old <= (unsigned long)bw)
1104 avg += (old - avg) >> 3;
1105
1106 out:
1107
1108 avg = max(avg, 1LU);
1109 if (wb_has_dirty_io(wb)) {
1110 long delta = avg - wb->avg_write_bandwidth;
1111 WARN_ON_ONCE(atomic_long_add_return(delta,
1112 &wb->bdi->tot_write_bandwidth) <= 0);
1113 }
1114 wb->write_bandwidth = bw;
1115 WRITE_ONCE(wb->avg_write_bandwidth, avg);
1116 }
1117
1118 static void update_dirty_limit(struct dirty_throttle_control *dtc)
1119 {
1120 struct wb_domain *dom = dtc_dom(dtc);
1121 unsigned long thresh = dtc->thresh;
1122 unsigned long limit = dom->dirty_limit;
1123
1124
1125
1126
1127 if (limit < thresh) {
1128 limit = thresh;
1129 goto update;
1130 }
1131
1132
1133
1134
1135
1136
1137 thresh = max(thresh, dtc->dirty);
1138 if (limit > thresh) {
1139 limit -= (limit - thresh) >> 5;
1140 goto update;
1141 }
1142 return;
1143 update:
1144 dom->dirty_limit = limit;
1145 }
1146
1147 static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
1148 unsigned long now)
1149 {
1150 struct wb_domain *dom = dtc_dom(dtc);
1151
1152
1153
1154
1155 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1156 return;
1157
1158 spin_lock(&dom->lock);
1159 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1160 update_dirty_limit(dtc);
1161 dom->dirty_limit_tstamp = now;
1162 }
1163 spin_unlock(&dom->lock);
1164 }
1165
1166
1167
1168
1169
1170
1171
1172 static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1173 unsigned long dirtied,
1174 unsigned long elapsed)
1175 {
1176 struct bdi_writeback *wb = dtc->wb;
1177 unsigned long dirty = dtc->dirty;
1178 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1179 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1180 unsigned long setpoint = (freerun + limit) / 2;
1181 unsigned long write_bw = wb->avg_write_bandwidth;
1182 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1183 unsigned long dirty_rate;
1184 unsigned long task_ratelimit;
1185 unsigned long balanced_dirty_ratelimit;
1186 unsigned long step;
1187 unsigned long x;
1188 unsigned long shift;
1189
1190
1191
1192
1193
1194 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1195
1196
1197
1198
1199 task_ratelimit = (u64)dirty_ratelimit *
1200 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1201 task_ratelimit++;
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1234 dirty_rate | 1);
1235
1236
1237
1238 if (unlikely(balanced_dirty_ratelimit > write_bw))
1239 balanced_dirty_ratelimit = write_bw;
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275 step = 0;
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1289 dirty = dtc->wb_dirty;
1290 if (dtc->wb_dirty < 8)
1291 setpoint = dtc->wb_dirty + 1;
1292 else
1293 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1294 }
1295
1296 if (dirty < setpoint) {
1297 x = min3(wb->balanced_dirty_ratelimit,
1298 balanced_dirty_ratelimit, task_ratelimit);
1299 if (dirty_ratelimit < x)
1300 step = x - dirty_ratelimit;
1301 } else {
1302 x = max3(wb->balanced_dirty_ratelimit,
1303 balanced_dirty_ratelimit, task_ratelimit);
1304 if (dirty_ratelimit > x)
1305 step = dirty_ratelimit - x;
1306 }
1307
1308
1309
1310
1311
1312
1313 shift = dirty_ratelimit / (2 * step + 1);
1314 if (shift < BITS_PER_LONG)
1315 step = DIV_ROUND_UP(step >> shift, 8);
1316 else
1317 step = 0;
1318
1319 if (dirty_ratelimit < balanced_dirty_ratelimit)
1320 dirty_ratelimit += step;
1321 else
1322 dirty_ratelimit -= step;
1323
1324 WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
1325 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1326
1327 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1328 }
1329
1330 static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1331 struct dirty_throttle_control *mdtc,
1332 bool update_ratelimit)
1333 {
1334 struct bdi_writeback *wb = gdtc->wb;
1335 unsigned long now = jiffies;
1336 unsigned long elapsed;
1337 unsigned long dirtied;
1338 unsigned long written;
1339
1340 spin_lock(&wb->list_lock);
1341
1342
1343
1344
1345
1346
1347
1348 elapsed = max(now - wb->bw_time_stamp, 1UL);
1349 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1350 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1351
1352 if (update_ratelimit) {
1353 domain_update_dirty_limit(gdtc, now);
1354 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1355
1356
1357
1358
1359
1360 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1361 domain_update_dirty_limit(mdtc, now);
1362 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1363 }
1364 }
1365 wb_update_write_bandwidth(wb, elapsed, written);
1366
1367 wb->dirtied_stamp = dirtied;
1368 wb->written_stamp = written;
1369 WRITE_ONCE(wb->bw_time_stamp, now);
1370 spin_unlock(&wb->list_lock);
1371 }
1372
1373 void wb_update_bandwidth(struct bdi_writeback *wb)
1374 {
1375 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1376
1377 __wb_update_bandwidth(&gdtc, NULL, false);
1378 }
1379
1380
1381 #define WB_BANDWIDTH_IDLE_JIF (HZ)
1382
1383 static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
1384 {
1385 unsigned long now = jiffies;
1386 unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);
1387
1388 if (elapsed > WB_BANDWIDTH_IDLE_JIF &&
1389 !atomic_read(&wb->writeback_inodes)) {
1390 spin_lock(&wb->list_lock);
1391 wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
1392 wb->written_stamp = wb_stat(wb, WB_WRITTEN);
1393 WRITE_ONCE(wb->bw_time_stamp, now);
1394 spin_unlock(&wb->list_lock);
1395 }
1396 }
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406 static unsigned long dirty_poll_interval(unsigned long dirty,
1407 unsigned long thresh)
1408 {
1409 if (thresh > dirty)
1410 return 1UL << (ilog2(thresh - dirty) >> 1);
1411
1412 return 1;
1413 }
1414
1415 static unsigned long wb_max_pause(struct bdi_writeback *wb,
1416 unsigned long wb_dirty)
1417 {
1418 unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
1419 unsigned long t;
1420
1421
1422
1423
1424
1425
1426
1427
1428 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1429 t++;
1430
1431 return min_t(unsigned long, t, MAX_PAUSE);
1432 }
1433
1434 static long wb_min_pause(struct bdi_writeback *wb,
1435 long max_pause,
1436 unsigned long task_ratelimit,
1437 unsigned long dirty_ratelimit,
1438 int *nr_dirtied_pause)
1439 {
1440 long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
1441 long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
1442 long t;
1443 long pause;
1444 int pages;
1445
1446
1447 t = max(1, HZ / 100);
1448
1449
1450
1451
1452
1453
1454
1455 if (hi > lo)
1456 t += (hi - lo) * (10 * HZ) / 1024;
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476 t = min(t, 1 + max_pause / 2);
1477 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487 if (pages < DIRTY_POLL_THRESH) {
1488 t = max_pause;
1489 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1490 if (pages > DIRTY_POLL_THRESH) {
1491 pages = DIRTY_POLL_THRESH;
1492 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1493 }
1494 }
1495
1496 pause = HZ * pages / (task_ratelimit + 1);
1497 if (pause > max_pause) {
1498 t = max_pause;
1499 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1500 }
1501
1502 *nr_dirtied_pause = pages;
1503
1504
1505
1506 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1507 }
1508
1509 static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1510 {
1511 struct bdi_writeback *wb = dtc->wb;
1512 unsigned long wb_reclaimable;
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527 dtc->wb_thresh = __wb_calc_thresh(dtc);
1528 dtc->wb_bg_thresh = dtc->thresh ?
1529 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1542 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1543 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1544 } else {
1545 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1546 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1547 }
1548 }
1549
1550
1551
1552
1553
1554
1555
1556
1557 static int balance_dirty_pages(struct bdi_writeback *wb,
1558 unsigned long pages_dirtied, unsigned int flags)
1559 {
1560 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1561 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1562 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1563 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1564 &mdtc_stor : NULL;
1565 struct dirty_throttle_control *sdtc;
1566 unsigned long nr_reclaimable;
1567 long period;
1568 long pause;
1569 long max_pause;
1570 long min_pause;
1571 int nr_dirtied_pause;
1572 bool dirty_exceeded = false;
1573 unsigned long task_ratelimit;
1574 unsigned long dirty_ratelimit;
1575 struct backing_dev_info *bdi = wb->bdi;
1576 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1577 unsigned long start_time = jiffies;
1578 int ret = 0;
1579
1580 for (;;) {
1581 unsigned long now = jiffies;
1582 unsigned long dirty, thresh, bg_thresh;
1583 unsigned long m_dirty = 0;
1584 unsigned long m_thresh = 0;
1585 unsigned long m_bg_thresh = 0;
1586
1587 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
1588 gdtc->avail = global_dirtyable_memory();
1589 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1590
1591 domain_dirty_limits(gdtc);
1592
1593 if (unlikely(strictlimit)) {
1594 wb_dirty_limits(gdtc);
1595
1596 dirty = gdtc->wb_dirty;
1597 thresh = gdtc->wb_thresh;
1598 bg_thresh = gdtc->wb_bg_thresh;
1599 } else {
1600 dirty = gdtc->dirty;
1601 thresh = gdtc->thresh;
1602 bg_thresh = gdtc->bg_thresh;
1603 }
1604
1605 if (mdtc) {
1606 unsigned long filepages, headroom, writeback;
1607
1608
1609
1610
1611
1612 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1613 &mdtc->dirty, &writeback);
1614 mdtc->dirty += writeback;
1615 mdtc_calc_avail(mdtc, filepages, headroom);
1616
1617 domain_dirty_limits(mdtc);
1618
1619 if (unlikely(strictlimit)) {
1620 wb_dirty_limits(mdtc);
1621 m_dirty = mdtc->wb_dirty;
1622 m_thresh = mdtc->wb_thresh;
1623 m_bg_thresh = mdtc->wb_bg_thresh;
1624 } else {
1625 m_dirty = mdtc->dirty;
1626 m_thresh = mdtc->thresh;
1627 m_bg_thresh = mdtc->bg_thresh;
1628 }
1629 }
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640 if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh &&
1641 !writeback_in_progress(wb))
1642 wb_start_background_writeback(wb);
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1657 (!mdtc ||
1658 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1659 unsigned long intv;
1660 unsigned long m_intv;
1661
1662 free_running:
1663 intv = dirty_poll_interval(dirty, thresh);
1664 m_intv = ULONG_MAX;
1665
1666 current->dirty_paused_when = now;
1667 current->nr_dirtied = 0;
1668 if (mdtc)
1669 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1670 current->nr_dirtied_pause = min(intv, m_intv);
1671 break;
1672 }
1673
1674
1675 if (unlikely(!writeback_in_progress(wb)))
1676 wb_start_background_writeback(wb);
1677
1678 mem_cgroup_flush_foreign(wb);
1679
1680
1681
1682
1683
1684 if (!strictlimit) {
1685 wb_dirty_limits(gdtc);
1686
1687 if ((current->flags & PF_LOCAL_THROTTLE) &&
1688 gdtc->wb_dirty <
1689 dirty_freerun_ceiling(gdtc->wb_thresh,
1690 gdtc->wb_bg_thresh))
1691
1692
1693
1694
1695 goto free_running;
1696 }
1697
1698 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1699 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1700
1701 wb_position_ratio(gdtc);
1702 sdtc = gdtc;
1703
1704 if (mdtc) {
1705
1706
1707
1708
1709
1710
1711 if (!strictlimit) {
1712 wb_dirty_limits(mdtc);
1713
1714 if ((current->flags & PF_LOCAL_THROTTLE) &&
1715 mdtc->wb_dirty <
1716 dirty_freerun_ceiling(mdtc->wb_thresh,
1717 mdtc->wb_bg_thresh))
1718
1719
1720
1721
1722
1723 goto free_running;
1724 }
1725 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1726 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1727
1728 wb_position_ratio(mdtc);
1729 if (mdtc->pos_ratio < gdtc->pos_ratio)
1730 sdtc = mdtc;
1731 }
1732
1733 if (dirty_exceeded != wb->dirty_exceeded)
1734 wb->dirty_exceeded = dirty_exceeded;
1735
1736 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
1737 BANDWIDTH_INTERVAL))
1738 __wb_update_bandwidth(gdtc, mdtc, true);
1739
1740
1741 dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
1742 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1743 RATELIMIT_CALC_SHIFT;
1744 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1745 min_pause = wb_min_pause(wb, max_pause,
1746 task_ratelimit, dirty_ratelimit,
1747 &nr_dirtied_pause);
1748
1749 if (unlikely(task_ratelimit == 0)) {
1750 period = max_pause;
1751 pause = max_pause;
1752 goto pause;
1753 }
1754 period = HZ * pages_dirtied / task_ratelimit;
1755 pause = period;
1756 if (current->dirty_paused_when)
1757 pause -= now - current->dirty_paused_when;
1758
1759
1760
1761
1762
1763
1764
1765 if (pause < min_pause) {
1766 trace_balance_dirty_pages(wb,
1767 sdtc->thresh,
1768 sdtc->bg_thresh,
1769 sdtc->dirty,
1770 sdtc->wb_thresh,
1771 sdtc->wb_dirty,
1772 dirty_ratelimit,
1773 task_ratelimit,
1774 pages_dirtied,
1775 period,
1776 min(pause, 0L),
1777 start_time);
1778 if (pause < -HZ) {
1779 current->dirty_paused_when = now;
1780 current->nr_dirtied = 0;
1781 } else if (period) {
1782 current->dirty_paused_when += period;
1783 current->nr_dirtied = 0;
1784 } else if (current->nr_dirtied_pause <= pages_dirtied)
1785 current->nr_dirtied_pause += pages_dirtied;
1786 break;
1787 }
1788 if (unlikely(pause > max_pause)) {
1789
1790 now += min(pause - max_pause, max_pause);
1791 pause = max_pause;
1792 }
1793
1794 pause:
1795 trace_balance_dirty_pages(wb,
1796 sdtc->thresh,
1797 sdtc->bg_thresh,
1798 sdtc->dirty,
1799 sdtc->wb_thresh,
1800 sdtc->wb_dirty,
1801 dirty_ratelimit,
1802 task_ratelimit,
1803 pages_dirtied,
1804 period,
1805 pause,
1806 start_time);
1807 if (flags & BDP_ASYNC) {
1808 ret = -EAGAIN;
1809 break;
1810 }
1811 __set_current_state(TASK_KILLABLE);
1812 wb->dirty_sleep = now;
1813 io_schedule_timeout(pause);
1814
1815 current->dirty_paused_when = now + pause;
1816 current->nr_dirtied = 0;
1817 current->nr_dirtied_pause = nr_dirtied_pause;
1818
1819
1820
1821
1822
1823 if (task_ratelimit)
1824 break;
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836 if (sdtc->wb_dirty <= wb_stat_error())
1837 break;
1838
1839 if (fatal_signal_pending(current))
1840 break;
1841 }
1842 return ret;
1843 }
1844
1845 static DEFINE_PER_CPU(int, bdp_ratelimits);
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861 DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880 int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
1881 unsigned int flags)
1882 {
1883 struct inode *inode = mapping->host;
1884 struct backing_dev_info *bdi = inode_to_bdi(inode);
1885 struct bdi_writeback *wb = NULL;
1886 int ratelimit;
1887 int ret = 0;
1888 int *p;
1889
1890 if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
1891 return ret;
1892
1893 if (inode_cgwb_enabled(inode))
1894 wb = wb_get_create_current(bdi, GFP_KERNEL);
1895 if (!wb)
1896 wb = &bdi->wb;
1897
1898 ratelimit = current->nr_dirtied_pause;
1899 if (wb->dirty_exceeded)
1900 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1901
1902 preempt_disable();
1903
1904
1905
1906
1907
1908
1909 p = this_cpu_ptr(&bdp_ratelimits);
1910 if (unlikely(current->nr_dirtied >= ratelimit))
1911 *p = 0;
1912 else if (unlikely(*p >= ratelimit_pages)) {
1913 *p = 0;
1914 ratelimit = 0;
1915 }
1916
1917
1918
1919
1920
1921 p = this_cpu_ptr(&dirty_throttle_leaks);
1922 if (*p > 0 && current->nr_dirtied < ratelimit) {
1923 unsigned long nr_pages_dirtied;
1924 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1925 *p -= nr_pages_dirtied;
1926 current->nr_dirtied += nr_pages_dirtied;
1927 }
1928 preempt_enable();
1929
1930 if (unlikely(current->nr_dirtied >= ratelimit))
1931 ret = balance_dirty_pages(wb, current->nr_dirtied, flags);
1932
1933 wb_put(wb);
1934 return ret;
1935 }
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949 void balance_dirty_pages_ratelimited(struct address_space *mapping)
1950 {
1951 balance_dirty_pages_ratelimited_flags(mapping, 0);
1952 }
1953 EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964 bool wb_over_bg_thresh(struct bdi_writeback *wb)
1965 {
1966 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1967 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1968 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1969 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1970 &mdtc_stor : NULL;
1971 unsigned long reclaimable;
1972 unsigned long thresh;
1973
1974
1975
1976
1977
1978 gdtc->avail = global_dirtyable_memory();
1979 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
1980 domain_dirty_limits(gdtc);
1981
1982 if (gdtc->dirty > gdtc->bg_thresh)
1983 return true;
1984
1985 thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
1986 if (thresh < 2 * wb_stat_error())
1987 reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1988 else
1989 reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1990
1991 if (reclaimable > thresh)
1992 return true;
1993
1994 if (mdtc) {
1995 unsigned long filepages, headroom, writeback;
1996
1997 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1998 &writeback);
1999 mdtc_calc_avail(mdtc, filepages, headroom);
2000 domain_dirty_limits(mdtc);
2001
2002 if (mdtc->dirty > mdtc->bg_thresh)
2003 return true;
2004
2005 thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
2006 if (thresh < 2 * wb_stat_error())
2007 reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
2008 else
2009 reclaimable = wb_stat(wb, WB_RECLAIMABLE);
2010
2011 if (reclaimable > thresh)
2012 return true;
2013 }
2014
2015 return false;
2016 }
2017
2018 #ifdef CONFIG_SYSCTL
2019
2020
2021
2022 static int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
2023 void *buffer, size_t *length, loff_t *ppos)
2024 {
2025 unsigned int old_interval = dirty_writeback_interval;
2026 int ret;
2027
2028 ret = proc_dointvec(table, write, buffer, length, ppos);
2029
2030
2031
2032
2033
2034
2035
2036
2037 if (!ret && write && dirty_writeback_interval &&
2038 dirty_writeback_interval != old_interval)
2039 wakeup_flusher_threads(WB_REASON_PERIODIC);
2040
2041 return ret;
2042 }
2043 #endif
2044
2045 void laptop_mode_timer_fn(struct timer_list *t)
2046 {
2047 struct backing_dev_info *backing_dev_info =
2048 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
2049
2050 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
2051 }
2052
2053
2054
2055
2056
2057
2058 void laptop_io_completion(struct backing_dev_info *info)
2059 {
2060 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2061 }
2062
2063
2064
2065
2066
2067
2068 void laptop_sync_completion(void)
2069 {
2070 struct backing_dev_info *bdi;
2071
2072 rcu_read_lock();
2073
2074 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2075 del_timer(&bdi->laptop_mode_wb_timer);
2076
2077 rcu_read_unlock();
2078 }
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089 void writeback_set_ratelimit(void)
2090 {
2091 struct wb_domain *dom = &global_wb_domain;
2092 unsigned long background_thresh;
2093 unsigned long dirty_thresh;
2094
2095 global_dirty_limits(&background_thresh, &dirty_thresh);
2096 dom->dirty_limit = dirty_thresh;
2097 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2098 if (ratelimit_pages < 16)
2099 ratelimit_pages = 16;
2100 }
2101
2102 static int page_writeback_cpu_online(unsigned int cpu)
2103 {
2104 writeback_set_ratelimit();
2105 return 0;
2106 }
2107
2108 #ifdef CONFIG_SYSCTL
2109
2110
2111 static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
2112
2113 static struct ctl_table vm_page_writeback_sysctls[] = {
2114 {
2115 .procname = "dirty_background_ratio",
2116 .data = &dirty_background_ratio,
2117 .maxlen = sizeof(dirty_background_ratio),
2118 .mode = 0644,
2119 .proc_handler = dirty_background_ratio_handler,
2120 .extra1 = SYSCTL_ZERO,
2121 .extra2 = SYSCTL_ONE_HUNDRED,
2122 },
2123 {
2124 .procname = "dirty_background_bytes",
2125 .data = &dirty_background_bytes,
2126 .maxlen = sizeof(dirty_background_bytes),
2127 .mode = 0644,
2128 .proc_handler = dirty_background_bytes_handler,
2129 .extra1 = SYSCTL_LONG_ONE,
2130 },
2131 {
2132 .procname = "dirty_ratio",
2133 .data = &vm_dirty_ratio,
2134 .maxlen = sizeof(vm_dirty_ratio),
2135 .mode = 0644,
2136 .proc_handler = dirty_ratio_handler,
2137 .extra1 = SYSCTL_ZERO,
2138 .extra2 = SYSCTL_ONE_HUNDRED,
2139 },
2140 {
2141 .procname = "dirty_bytes",
2142 .data = &vm_dirty_bytes,
2143 .maxlen = sizeof(vm_dirty_bytes),
2144 .mode = 0644,
2145 .proc_handler = dirty_bytes_handler,
2146 .extra1 = (void *)&dirty_bytes_min,
2147 },
2148 {
2149 .procname = "dirty_writeback_centisecs",
2150 .data = &dirty_writeback_interval,
2151 .maxlen = sizeof(dirty_writeback_interval),
2152 .mode = 0644,
2153 .proc_handler = dirty_writeback_centisecs_handler,
2154 },
2155 {
2156 .procname = "dirty_expire_centisecs",
2157 .data = &dirty_expire_interval,
2158 .maxlen = sizeof(dirty_expire_interval),
2159 .mode = 0644,
2160 .proc_handler = proc_dointvec_minmax,
2161 .extra1 = SYSCTL_ZERO,
2162 },
2163 #ifdef CONFIG_HIGHMEM
2164 {
2165 .procname = "highmem_is_dirtyable",
2166 .data = &vm_highmem_is_dirtyable,
2167 .maxlen = sizeof(vm_highmem_is_dirtyable),
2168 .mode = 0644,
2169 .proc_handler = proc_dointvec_minmax,
2170 .extra1 = SYSCTL_ZERO,
2171 .extra2 = SYSCTL_ONE,
2172 },
2173 #endif
2174 {
2175 .procname = "laptop_mode",
2176 .data = &laptop_mode,
2177 .maxlen = sizeof(laptop_mode),
2178 .mode = 0644,
2179 .proc_handler = proc_dointvec_jiffies,
2180 },
2181 {}
2182 };
2183 #endif
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201 void __init page_writeback_init(void)
2202 {
2203 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2204
2205 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2206 page_writeback_cpu_online, NULL);
2207 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2208 page_writeback_cpu_online);
2209 #ifdef CONFIG_SYSCTL
2210 register_sysctl_init("vm", vm_page_writeback_sysctls);
2211 #endif
2212 }
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228 void tag_pages_for_writeback(struct address_space *mapping,
2229 pgoff_t start, pgoff_t end)
2230 {
2231 XA_STATE(xas, &mapping->i_pages, start);
2232 unsigned int tagged = 0;
2233 void *page;
2234
2235 xas_lock_irq(&xas);
2236 xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
2237 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
2238 if (++tagged % XA_CHECK_SCHED)
2239 continue;
2240
2241 xas_pause(&xas);
2242 xas_unlock_irq(&xas);
2243 cond_resched();
2244 xas_lock_irq(&xas);
2245 }
2246 xas_unlock_irq(&xas);
2247 }
2248 EXPORT_SYMBOL(tag_pages_for_writeback);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281 int write_cache_pages(struct address_space *mapping,
2282 struct writeback_control *wbc, writepage_t writepage,
2283 void *data)
2284 {
2285 int ret = 0;
2286 int done = 0;
2287 int error;
2288 struct pagevec pvec;
2289 int nr_pages;
2290 pgoff_t index;
2291 pgoff_t end;
2292 pgoff_t done_index;
2293 int range_whole = 0;
2294 xa_mark_t tag;
2295
2296 pagevec_init(&pvec);
2297 if (wbc->range_cyclic) {
2298 index = mapping->writeback_index;
2299 end = -1;
2300 } else {
2301 index = wbc->range_start >> PAGE_SHIFT;
2302 end = wbc->range_end >> PAGE_SHIFT;
2303 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2304 range_whole = 1;
2305 }
2306 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
2307 tag_pages_for_writeback(mapping, index, end);
2308 tag = PAGECACHE_TAG_TOWRITE;
2309 } else {
2310 tag = PAGECACHE_TAG_DIRTY;
2311 }
2312 done_index = index;
2313 while (!done && (index <= end)) {
2314 int i;
2315
2316 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2317 tag);
2318 if (nr_pages == 0)
2319 break;
2320
2321 for (i = 0; i < nr_pages; i++) {
2322 struct page *page = pvec.pages[i];
2323
2324 done_index = page->index;
2325
2326 lock_page(page);
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 if (unlikely(page->mapping != mapping)) {
2337 continue_unlock:
2338 unlock_page(page);
2339 continue;
2340 }
2341
2342 if (!PageDirty(page)) {
2343
2344 goto continue_unlock;
2345 }
2346
2347 if (PageWriteback(page)) {
2348 if (wbc->sync_mode != WB_SYNC_NONE)
2349 wait_on_page_writeback(page);
2350 else
2351 goto continue_unlock;
2352 }
2353
2354 BUG_ON(PageWriteback(page));
2355 if (!clear_page_dirty_for_io(page))
2356 goto continue_unlock;
2357
2358 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2359 error = (*writepage)(page, wbc, data);
2360 if (unlikely(error)) {
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373 if (error == AOP_WRITEPAGE_ACTIVATE) {
2374 unlock_page(page);
2375 error = 0;
2376 } else if (wbc->sync_mode != WB_SYNC_ALL) {
2377 ret = error;
2378 done_index = page->index + 1;
2379 done = 1;
2380 break;
2381 }
2382 if (!ret)
2383 ret = error;
2384 }
2385
2386
2387
2388
2389
2390
2391
2392 if (--wbc->nr_to_write <= 0 &&
2393 wbc->sync_mode == WB_SYNC_NONE) {
2394 done = 1;
2395 break;
2396 }
2397 }
2398 pagevec_release(&pvec);
2399 cond_resched();
2400 }
2401
2402
2403
2404
2405
2406
2407 if (wbc->range_cyclic && !done)
2408 done_index = 0;
2409 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2410 mapping->writeback_index = done_index;
2411
2412 return ret;
2413 }
2414 EXPORT_SYMBOL(write_cache_pages);
2415
2416
2417
2418
2419
2420 static int __writepage(struct page *page, struct writeback_control *wbc,
2421 void *data)
2422 {
2423 struct address_space *mapping = data;
2424 int ret = mapping->a_ops->writepage(page, wbc);
2425 mapping_set_error(mapping, ret);
2426 return ret;
2427 }
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439 int generic_writepages(struct address_space *mapping,
2440 struct writeback_control *wbc)
2441 {
2442 struct blk_plug plug;
2443 int ret;
2444
2445
2446 if (!mapping->a_ops->writepage)
2447 return 0;
2448
2449 blk_start_plug(&plug);
2450 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2451 blk_finish_plug(&plug);
2452 return ret;
2453 }
2454
2455 EXPORT_SYMBOL(generic_writepages);
2456
2457 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2458 {
2459 int ret;
2460 struct bdi_writeback *wb;
2461
2462 if (wbc->nr_to_write <= 0)
2463 return 0;
2464 wb = inode_to_wb_wbc(mapping->host, wbc);
2465 wb_bandwidth_estimate_start(wb);
2466 while (1) {
2467 if (mapping->a_ops->writepages)
2468 ret = mapping->a_ops->writepages(mapping, wbc);
2469 else
2470 ret = generic_writepages(mapping, wbc);
2471 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2472 break;
2473
2474
2475
2476
2477
2478
2479
2480 reclaim_throttle(NODE_DATA(numa_node_id()),
2481 VMSCAN_THROTTLE_WRITEBACK);
2482 }
2483
2484
2485
2486
2487
2488 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
2489 BANDWIDTH_INTERVAL))
2490 wb_update_bandwidth(wb);
2491 return ret;
2492 }
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505 int folio_write_one(struct folio *folio)
2506 {
2507 struct address_space *mapping = folio->mapping;
2508 int ret = 0;
2509 struct writeback_control wbc = {
2510 .sync_mode = WB_SYNC_ALL,
2511 .nr_to_write = folio_nr_pages(folio),
2512 };
2513
2514 BUG_ON(!folio_test_locked(folio));
2515
2516 folio_wait_writeback(folio);
2517
2518 if (folio_clear_dirty_for_io(folio)) {
2519 folio_get(folio);
2520 ret = mapping->a_ops->writepage(&folio->page, &wbc);
2521 if (ret == 0)
2522 folio_wait_writeback(folio);
2523 folio_put(folio);
2524 } else {
2525 folio_unlock(folio);
2526 }
2527
2528 if (!ret)
2529 ret = filemap_check_errors(mapping);
2530 return ret;
2531 }
2532 EXPORT_SYMBOL(folio_write_one);
2533
2534
2535
2536
2537 bool noop_dirty_folio(struct address_space *mapping, struct folio *folio)
2538 {
2539 if (!folio_test_dirty(folio))
2540 return !folio_test_set_dirty(folio);
2541 return false;
2542 }
2543 EXPORT_SYMBOL(noop_dirty_folio);
2544
2545
2546
2547
2548
2549
2550
2551
2552 static void folio_account_dirtied(struct folio *folio,
2553 struct address_space *mapping)
2554 {
2555 struct inode *inode = mapping->host;
2556
2557 trace_writeback_dirty_folio(folio, mapping);
2558
2559 if (mapping_can_writeback(mapping)) {
2560 struct bdi_writeback *wb;
2561 long nr = folio_nr_pages(folio);
2562
2563 inode_attach_wb(inode, &folio->page);
2564 wb = inode_to_wb(inode);
2565
2566 __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr);
2567 __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
2568 __node_stat_mod_folio(folio, NR_DIRTIED, nr);
2569 wb_stat_mod(wb, WB_RECLAIMABLE, nr);
2570 wb_stat_mod(wb, WB_DIRTIED, nr);
2571 task_io_account_write(nr * PAGE_SIZE);
2572 current->nr_dirtied += nr;
2573 __this_cpu_add(bdp_ratelimits, nr);
2574
2575 mem_cgroup_track_foreign_dirty(folio, wb);
2576 }
2577 }
2578
2579
2580
2581
2582
2583
2584 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
2585 {
2586 long nr = folio_nr_pages(folio);
2587
2588 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
2589 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
2590 wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
2591 task_io_account_cancelled_write(nr * PAGE_SIZE);
2592 }
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607 void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
2608 int warn)
2609 {
2610 unsigned long flags;
2611
2612 xa_lock_irqsave(&mapping->i_pages, flags);
2613 if (folio->mapping) {
2614 WARN_ON_ONCE(warn && !folio_test_uptodate(folio));
2615 folio_account_dirtied(folio, mapping);
2616 __xa_set_mark(&mapping->i_pages, folio_index(folio),
2617 PAGECACHE_TAG_DIRTY);
2618 }
2619 xa_unlock_irqrestore(&mapping->i_pages, flags);
2620 }
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
2642 {
2643 folio_memcg_lock(folio);
2644 if (folio_test_set_dirty(folio)) {
2645 folio_memcg_unlock(folio);
2646 return false;
2647 }
2648
2649 __folio_mark_dirty(folio, mapping, !folio_test_private(folio));
2650 folio_memcg_unlock(folio);
2651
2652 if (mapping->host) {
2653
2654 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2655 }
2656 return true;
2657 }
2658 EXPORT_SYMBOL(filemap_dirty_folio);
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672 void folio_account_redirty(struct folio *folio)
2673 {
2674 struct address_space *mapping = folio->mapping;
2675
2676 if (mapping && mapping_can_writeback(mapping)) {
2677 struct inode *inode = mapping->host;
2678 struct bdi_writeback *wb;
2679 struct wb_lock_cookie cookie = {};
2680 long nr = folio_nr_pages(folio);
2681
2682 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2683 current->nr_dirtied -= nr;
2684 node_stat_mod_folio(folio, NR_DIRTIED, -nr);
2685 wb_stat_mod(wb, WB_DIRTIED, -nr);
2686 unlocked_inode_to_wb_end(inode, &cookie);
2687 }
2688 }
2689 EXPORT_SYMBOL(folio_account_redirty);
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 bool folio_redirty_for_writepage(struct writeback_control *wbc,
2704 struct folio *folio)
2705 {
2706 bool ret;
2707 long nr = folio_nr_pages(folio);
2708
2709 wbc->pages_skipped += nr;
2710 ret = filemap_dirty_folio(folio->mapping, folio);
2711 folio_account_redirty(folio);
2712
2713 return ret;
2714 }
2715 EXPORT_SYMBOL(folio_redirty_for_writepage);
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730 bool folio_mark_dirty(struct folio *folio)
2731 {
2732 struct address_space *mapping = folio_mapping(folio);
2733
2734 if (likely(mapping)) {
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746 if (folio_test_reclaim(folio))
2747 folio_clear_reclaim(folio);
2748 return mapping->a_ops->dirty_folio(mapping, folio);
2749 }
2750
2751 return noop_dirty_folio(mapping, folio);
2752 }
2753 EXPORT_SYMBOL(folio_mark_dirty);
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765 int set_page_dirty_lock(struct page *page)
2766 {
2767 int ret;
2768
2769 lock_page(page);
2770 ret = set_page_dirty(page);
2771 unlock_page(page);
2772 return ret;
2773 }
2774 EXPORT_SYMBOL(set_page_dirty_lock);
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789 void __folio_cancel_dirty(struct folio *folio)
2790 {
2791 struct address_space *mapping = folio_mapping(folio);
2792
2793 if (mapping_can_writeback(mapping)) {
2794 struct inode *inode = mapping->host;
2795 struct bdi_writeback *wb;
2796 struct wb_lock_cookie cookie = {};
2797
2798 folio_memcg_lock(folio);
2799 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2800
2801 if (folio_test_clear_dirty(folio))
2802 folio_account_cleaned(folio, wb);
2803
2804 unlocked_inode_to_wb_end(inode, &cookie);
2805 folio_memcg_unlock(folio);
2806 } else {
2807 folio_clear_dirty(folio);
2808 }
2809 }
2810 EXPORT_SYMBOL(__folio_cancel_dirty);
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826 bool folio_clear_dirty_for_io(struct folio *folio)
2827 {
2828 struct address_space *mapping = folio_mapping(folio);
2829 bool ret = false;
2830
2831 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
2832
2833 if (mapping && mapping_can_writeback(mapping)) {
2834 struct inode *inode = mapping->host;
2835 struct bdi_writeback *wb;
2836 struct wb_lock_cookie cookie = {};
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863 if (folio_mkclean(folio))
2864 folio_mark_dirty(folio);
2865
2866
2867
2868
2869
2870
2871
2872
2873 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2874 if (folio_test_clear_dirty(folio)) {
2875 long nr = folio_nr_pages(folio);
2876 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
2877 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
2878 wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
2879 ret = true;
2880 }
2881 unlocked_inode_to_wb_end(inode, &cookie);
2882 return ret;
2883 }
2884 return folio_test_clear_dirty(folio);
2885 }
2886 EXPORT_SYMBOL(folio_clear_dirty_for_io);
2887
2888 static void wb_inode_writeback_start(struct bdi_writeback *wb)
2889 {
2890 atomic_inc(&wb->writeback_inodes);
2891 }
2892
2893 static void wb_inode_writeback_end(struct bdi_writeback *wb)
2894 {
2895 unsigned long flags;
2896 atomic_dec(&wb->writeback_inodes);
2897
2898
2899
2900
2901
2902
2903
2904 spin_lock_irqsave(&wb->work_lock, flags);
2905 if (test_bit(WB_registered, &wb->state))
2906 queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
2907 spin_unlock_irqrestore(&wb->work_lock, flags);
2908 }
2909
2910 bool __folio_end_writeback(struct folio *folio)
2911 {
2912 long nr = folio_nr_pages(folio);
2913 struct address_space *mapping = folio_mapping(folio);
2914 bool ret;
2915
2916 folio_memcg_lock(folio);
2917 if (mapping && mapping_use_writeback_tags(mapping)) {
2918 struct inode *inode = mapping->host;
2919 struct backing_dev_info *bdi = inode_to_bdi(inode);
2920 unsigned long flags;
2921
2922 xa_lock_irqsave(&mapping->i_pages, flags);
2923 ret = folio_test_clear_writeback(folio);
2924 if (ret) {
2925 __xa_clear_mark(&mapping->i_pages, folio_index(folio),
2926 PAGECACHE_TAG_WRITEBACK);
2927 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
2928 struct bdi_writeback *wb = inode_to_wb(inode);
2929
2930 wb_stat_mod(wb, WB_WRITEBACK, -nr);
2931 __wb_writeout_add(wb, nr);
2932 if (!mapping_tagged(mapping,
2933 PAGECACHE_TAG_WRITEBACK))
2934 wb_inode_writeback_end(wb);
2935 }
2936 }
2937
2938 if (mapping->host && !mapping_tagged(mapping,
2939 PAGECACHE_TAG_WRITEBACK))
2940 sb_clear_inode_writeback(mapping->host);
2941
2942 xa_unlock_irqrestore(&mapping->i_pages, flags);
2943 } else {
2944 ret = folio_test_clear_writeback(folio);
2945 }
2946 if (ret) {
2947 lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
2948 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
2949 node_stat_mod_folio(folio, NR_WRITTEN, nr);
2950 }
2951 folio_memcg_unlock(folio);
2952 return ret;
2953 }
2954
2955 bool __folio_start_writeback(struct folio *folio, bool keep_write)
2956 {
2957 long nr = folio_nr_pages(folio);
2958 struct address_space *mapping = folio_mapping(folio);
2959 bool ret;
2960 int access_ret;
2961
2962 folio_memcg_lock(folio);
2963 if (mapping && mapping_use_writeback_tags(mapping)) {
2964 XA_STATE(xas, &mapping->i_pages, folio_index(folio));
2965 struct inode *inode = mapping->host;
2966 struct backing_dev_info *bdi = inode_to_bdi(inode);
2967 unsigned long flags;
2968
2969 xas_lock_irqsave(&xas, flags);
2970 xas_load(&xas);
2971 ret = folio_test_set_writeback(folio);
2972 if (!ret) {
2973 bool on_wblist;
2974
2975 on_wblist = mapping_tagged(mapping,
2976 PAGECACHE_TAG_WRITEBACK);
2977
2978 xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
2979 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
2980 struct bdi_writeback *wb = inode_to_wb(inode);
2981
2982 wb_stat_mod(wb, WB_WRITEBACK, nr);
2983 if (!on_wblist)
2984 wb_inode_writeback_start(wb);
2985 }
2986
2987
2988
2989
2990
2991
2992 if (mapping->host && !on_wblist)
2993 sb_mark_inode_writeback(mapping->host);
2994 }
2995 if (!folio_test_dirty(folio))
2996 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
2997 if (!keep_write)
2998 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
2999 xas_unlock_irqrestore(&xas, flags);
3000 } else {
3001 ret = folio_test_set_writeback(folio);
3002 }
3003 if (!ret) {
3004 lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
3005 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
3006 }
3007 folio_memcg_unlock(folio);
3008 access_ret = arch_make_folio_accessible(folio);
3009
3010
3011
3012
3013 VM_BUG_ON_FOLIO(access_ret != 0, folio);
3014
3015 return ret;
3016 }
3017 EXPORT_SYMBOL(__folio_start_writeback);
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031 void folio_wait_writeback(struct folio *folio)
3032 {
3033 while (folio_test_writeback(folio)) {
3034 trace_folio_wait_writeback(folio, folio_mapping(folio));
3035 folio_wait_bit(folio, PG_writeback);
3036 }
3037 }
3038 EXPORT_SYMBOL_GPL(folio_wait_writeback);
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053 int folio_wait_writeback_killable(struct folio *folio)
3054 {
3055 while (folio_test_writeback(folio)) {
3056 trace_folio_wait_writeback(folio, folio_mapping(folio));
3057 if (folio_wait_bit_killable(folio, PG_writeback))
3058 return -EINTR;
3059 }
3060
3061 return 0;
3062 }
3063 EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078 void folio_wait_stable(struct folio *folio)
3079 {
3080 if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
3081 folio_wait_writeback(folio);
3082 }
3083 EXPORT_SYMBOL_GPL(folio_wait_stable);