Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * include/linux/writeback.h
0004  */
0005 #ifndef WRITEBACK_H
0006 #define WRITEBACK_H
0007 
0008 #include <linux/sched.h>
0009 #include <linux/workqueue.h>
0010 #include <linux/fs.h>
0011 #include <linux/flex_proportions.h>
0012 #include <linux/backing-dev-defs.h>
0013 #include <linux/blk_types.h>
0014 
0015 struct bio;
0016 
0017 DECLARE_PER_CPU(int, dirty_throttle_leaks);
0018 
0019 /*
0020  * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
0021  *
0022  *  (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
0023  *
0024  * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
0025  * time) for the dirty pages to drop, unless written enough pages.
0026  *
0027  * The global dirty threshold is normally equal to the global dirty limit,
0028  * except when the system suddenly allocates a lot of anonymous memory and
0029  * knocks down the global dirty threshold quickly, in which case the global
0030  * dirty limit will follow down slowly to prevent livelocking all dirtier tasks.
0031  */
0032 #define DIRTY_SCOPE     8
0033 #define DIRTY_FULL_SCOPE    (DIRTY_SCOPE / 2)
0034 
0035 struct backing_dev_info;
0036 
0037 /*
0038  * fs/fs-writeback.c
0039  */
0040 enum writeback_sync_modes {
0041     WB_SYNC_NONE,   /* Don't wait on anything */
0042     WB_SYNC_ALL,    /* Wait on every mapping */
0043 };
0044 
0045 /*
0046  * A control structure which tells the writeback code what to do.  These are
0047  * always on the stack, and hence need no locking.  They are always initialised
0048  * in a manner such that unspecified fields are set to zero.
0049  */
0050 struct writeback_control {
0051     long nr_to_write;       /* Write this many pages, and decrement
0052                        this for each page written */
0053     long pages_skipped;     /* Pages which were not written */
0054 
0055     /*
0056      * For a_ops->writepages(): if start or end are non-zero then this is
0057      * a hint that the filesystem need only write out the pages inside that
0058      * byterange.  The byte at `end' is included in the writeout request.
0059      */
0060     loff_t range_start;
0061     loff_t range_end;
0062 
0063     enum writeback_sync_modes sync_mode;
0064 
0065     unsigned for_kupdate:1;     /* A kupdate writeback */
0066     unsigned for_background:1;  /* A background writeback */
0067     unsigned tagged_writepages:1;   /* tag-and-write to avoid livelock */
0068     unsigned for_reclaim:1;     /* Invoked from the page allocator */
0069     unsigned range_cyclic:1;    /* range_start is cyclic */
0070     unsigned for_sync:1;        /* sync(2) WB_SYNC_ALL writeback */
0071     unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */
0072 
0073     /*
0074      * When writeback IOs are bounced through async layers, only the
0075      * initial synchronous phase should be accounted towards inode
0076      * cgroup ownership arbitration to avoid confusion.  Later stages
0077      * can set the following flag to disable the accounting.
0078      */
0079     unsigned no_cgroup_owner:1;
0080 
0081     unsigned punt_to_cgroup:1;  /* cgrp punting, see __REQ_CGROUP_PUNT */
0082 
0083     /* To enable batching of swap writes to non-block-device backends,
0084      * "plug" can be set point to a 'struct swap_iocb *'.  When all swap
0085      * writes have been submitted, if with swap_iocb is not NULL,
0086      * swap_write_unplug() should be called.
0087      */
0088     struct swap_iocb **swap_plug;
0089 
0090 #ifdef CONFIG_CGROUP_WRITEBACK
0091     struct bdi_writeback *wb;   /* wb this writeback is issued under */
0092     struct inode *inode;        /* inode being written out */
0093 
0094     /* foreign inode detection, see wbc_detach_inode() */
0095     int wb_id;          /* current wb id */
0096     int wb_lcand_id;        /* last foreign candidate wb id */
0097     int wb_tcand_id;        /* this foreign candidate wb id */
0098     size_t wb_bytes;        /* bytes written by current wb */
0099     size_t wb_lcand_bytes;      /* bytes written by last candidate */
0100     size_t wb_tcand_bytes;      /* bytes written by this candidate */
0101 #endif
0102 };
0103 
0104 static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc)
0105 {
0106     blk_opf_t flags = 0;
0107 
0108     if (wbc->punt_to_cgroup)
0109         flags = REQ_CGROUP_PUNT;
0110 
0111     if (wbc->sync_mode == WB_SYNC_ALL)
0112         flags |= REQ_SYNC;
0113     else if (wbc->for_kupdate || wbc->for_background)
0114         flags |= REQ_BACKGROUND;
0115 
0116     return flags;
0117 }
0118 
0119 #ifdef CONFIG_CGROUP_WRITEBACK
0120 #define wbc_blkcg_css(wbc) \
0121     ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css)
0122 #else
0123 #define wbc_blkcg_css(wbc)      (blkcg_root_css)
0124 #endif /* CONFIG_CGROUP_WRITEBACK */
0125 
0126 /*
0127  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
0128  * and are measured against each other in.  There always is one global
0129  * domain, global_wb_domain, that every wb in the system is a member of.
0130  * This allows measuring the relative bandwidth of each wb to distribute
0131  * dirtyable memory accordingly.
0132  */
0133 struct wb_domain {
0134     spinlock_t lock;
0135 
0136     /*
0137      * Scale the writeback cache size proportional to the relative
0138      * writeout speed.
0139      *
0140      * We do this by keeping a floating proportion between BDIs, based
0141      * on page writeback completions [end_page_writeback()]. Those
0142      * devices that write out pages fastest will get the larger share,
0143      * while the slower will get a smaller share.
0144      *
0145      * We use page writeout completions because we are interested in
0146      * getting rid of dirty pages. Having them written out is the
0147      * primary goal.
0148      *
0149      * We introduce a concept of time, a period over which we measure
0150      * these events, because demand can/will vary over time. The length
0151      * of this period itself is measured in page writeback completions.
0152      */
0153     struct fprop_global completions;
0154     struct timer_list period_timer; /* timer for aging of completions */
0155     unsigned long period_time;
0156 
0157     /*
0158      * The dirtyable memory and dirty threshold could be suddenly
0159      * knocked down by a large amount (eg. on the startup of KVM in a
0160      * swapless system). This may throw the system into deep dirty
0161      * exceeded state and throttle heavy/light dirtiers alike. To
0162      * retain good responsiveness, maintain global_dirty_limit for
0163      * tracking slowly down to the knocked down dirty threshold.
0164      *
0165      * Both fields are protected by ->lock.
0166      */
0167     unsigned long dirty_limit_tstamp;
0168     unsigned long dirty_limit;
0169 };
0170 
0171 /**
0172  * wb_domain_size_changed - memory available to a wb_domain has changed
0173  * @dom: wb_domain of interest
0174  *
0175  * This function should be called when the amount of memory available to
0176  * @dom has changed.  It resets @dom's dirty limit parameters to prevent
0177  * the past values which don't match the current configuration from skewing
0178  * dirty throttling.  Without this, when memory size of a wb_domain is
0179  * greatly reduced, the dirty throttling logic may allow too many pages to
0180  * be dirtied leading to consecutive unnecessary OOMs and may get stuck in
0181  * that situation.
0182  */
0183 static inline void wb_domain_size_changed(struct wb_domain *dom)
0184 {
0185     spin_lock(&dom->lock);
0186     dom->dirty_limit_tstamp = jiffies;
0187     dom->dirty_limit = 0;
0188     spin_unlock(&dom->lock);
0189 }
0190 
0191 /*
0192  * fs/fs-writeback.c
0193  */ 
0194 struct bdi_writeback;
0195 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
0196 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
0197                             enum wb_reason reason);
0198 void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason);
0199 void sync_inodes_sb(struct super_block *);
0200 void wakeup_flusher_threads(enum wb_reason reason);
0201 void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
0202                 enum wb_reason reason);
0203 void inode_wait_for_writeback(struct inode *inode);
0204 void inode_io_list_del(struct inode *inode);
0205 
0206 /* writeback.h requires fs.h; it, too, is not included from here. */
0207 static inline void wait_on_inode(struct inode *inode)
0208 {
0209     might_sleep();
0210     wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
0211 }
0212 
0213 #ifdef CONFIG_CGROUP_WRITEBACK
0214 
0215 #include <linux/cgroup.h>
0216 #include <linux/bio.h>
0217 
0218 void __inode_attach_wb(struct inode *inode, struct page *page);
0219 void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
0220                  struct inode *inode)
0221     __releases(&inode->i_lock);
0222 void wbc_detach_inode(struct writeback_control *wbc);
0223 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
0224                   size_t bytes);
0225 int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
0226                enum wb_reason reason, struct wb_completion *done);
0227 void cgroup_writeback_umount(void);
0228 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
0229 
0230 /**
0231  * inode_attach_wb - associate an inode with its wb
0232  * @inode: inode of interest
0233  * @page: page being dirtied (may be NULL)
0234  *
0235  * If @inode doesn't have its wb, associate it with the wb matching the
0236  * memcg of @page or, if @page is NULL, %current.  May be called w/ or w/o
0237  * @inode->i_lock.
0238  */
0239 static inline void inode_attach_wb(struct inode *inode, struct page *page)
0240 {
0241     if (!inode->i_wb)
0242         __inode_attach_wb(inode, page);
0243 }
0244 
0245 /**
0246  * inode_detach_wb - disassociate an inode from its wb
0247  * @inode: inode of interest
0248  *
0249  * @inode is being freed.  Detach from its wb.
0250  */
0251 static inline void inode_detach_wb(struct inode *inode)
0252 {
0253     if (inode->i_wb) {
0254         WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
0255         wb_put(inode->i_wb);
0256         inode->i_wb = NULL;
0257     }
0258 }
0259 
0260 /**
0261  * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
0262  * @wbc: writeback_control of interest
0263  * @inode: target inode
0264  *
0265  * This function is to be used by __filemap_fdatawrite_range(), which is an
0266  * alternative entry point into writeback code, and first ensures @inode is
0267  * associated with a bdi_writeback and attaches it to @wbc.
0268  */
0269 static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
0270                            struct inode *inode)
0271 {
0272     spin_lock(&inode->i_lock);
0273     inode_attach_wb(inode, NULL);
0274     wbc_attach_and_unlock_inode(wbc, inode);
0275 }
0276 
0277 /**
0278  * wbc_init_bio - writeback specific initializtion of bio
0279  * @wbc: writeback_control for the writeback in progress
0280  * @bio: bio to be initialized
0281  *
0282  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
0283  * writeback specific initialization.  This is used to apply the cgroup
0284  * writeback context.  Must be called after the bio has been associated with
0285  * a device.
0286  */
0287 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
0288 {
0289     /*
0290      * pageout() path doesn't attach @wbc to the inode being written
0291      * out.  This is intentional as we don't want the function to block
0292      * behind a slow cgroup.  Ultimately, we want pageout() to kick off
0293      * regular writeback instead of writing things out itself.
0294      */
0295     if (wbc->wb)
0296         bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
0297 }
0298 
0299 #else   /* CONFIG_CGROUP_WRITEBACK */
0300 
0301 static inline void inode_attach_wb(struct inode *inode, struct page *page)
0302 {
0303 }
0304 
0305 static inline void inode_detach_wb(struct inode *inode)
0306 {
0307 }
0308 
0309 static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
0310                            struct inode *inode)
0311     __releases(&inode->i_lock)
0312 {
0313     spin_unlock(&inode->i_lock);
0314 }
0315 
0316 static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
0317                            struct inode *inode)
0318 {
0319 }
0320 
0321 static inline void wbc_detach_inode(struct writeback_control *wbc)
0322 {
0323 }
0324 
0325 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
0326 {
0327 }
0328 
0329 static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
0330                         struct page *page, size_t bytes)
0331 {
0332 }
0333 
0334 static inline void cgroup_writeback_umount(void)
0335 {
0336 }
0337 
0338 #endif  /* CONFIG_CGROUP_WRITEBACK */
0339 
0340 /*
0341  * mm/page-writeback.c
0342  */
0343 void laptop_io_completion(struct backing_dev_info *info);
0344 void laptop_sync_completion(void);
0345 void laptop_mode_timer_fn(struct timer_list *t);
0346 bool node_dirty_ok(struct pglist_data *pgdat);
0347 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
0348 #ifdef CONFIG_CGROUP_WRITEBACK
0349 void wb_domain_exit(struct wb_domain *dom);
0350 #endif
0351 
0352 extern struct wb_domain global_wb_domain;
0353 
0354 /* These are exported to sysctl. */
0355 extern unsigned int dirty_writeback_interval;
0356 extern unsigned int dirty_expire_interval;
0357 extern unsigned int dirtytime_expire_interval;
0358 extern int laptop_mode;
0359 
0360 int dirtytime_interval_handler(struct ctl_table *table, int write,
0361         void *buffer, size_t *lenp, loff_t *ppos);
0362 
0363 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
0364 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
0365 
0366 void wb_update_bandwidth(struct bdi_writeback *wb);
0367 
0368 /* Invoke balance dirty pages in async mode. */
0369 #define BDP_ASYNC 0x0001
0370 
0371 void balance_dirty_pages_ratelimited(struct address_space *mapping);
0372 int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
0373         unsigned int flags);
0374 
0375 bool wb_over_bg_thresh(struct bdi_writeback *wb);
0376 
0377 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
0378                 void *data);
0379 
0380 int generic_writepages(struct address_space *mapping,
0381                struct writeback_control *wbc);
0382 void tag_pages_for_writeback(struct address_space *mapping,
0383                  pgoff_t start, pgoff_t end);
0384 int write_cache_pages(struct address_space *mapping,
0385               struct writeback_control *wbc, writepage_t writepage,
0386               void *data);
0387 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
0388 void writeback_set_ratelimit(void);
0389 void tag_pages_for_writeback(struct address_space *mapping,
0390                  pgoff_t start, pgoff_t end);
0391 
0392 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
0393 void folio_account_redirty(struct folio *folio);
0394 static inline void account_page_redirty(struct page *page)
0395 {
0396     folio_account_redirty(page_folio(page));
0397 }
0398 bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
0399 bool redirty_page_for_writepage(struct writeback_control *, struct page *);
0400 
0401 void sb_mark_inode_writeback(struct inode *inode);
0402 void sb_clear_inode_writeback(struct inode *inode);
0403 
0404 #endif      /* WRITEBACK_H */