Back to home page

LXR

 
 

    


0001 /*
0002  * fs/mpage.c
0003  *
0004  * Copyright (C) 2002, Linus Torvalds.
0005  *
0006  * Contains functions related to preparing and submitting BIOs which contain
0007  * multiple pagecache pages.
0008  *
0009  * 15May2002    Andrew Morton
0010  *      Initial version
0011  * 27Jun2002    axboe@suse.de
0012  *      use bio_add_page() to build bio's just the right size
0013  */
0014 
0015 #include <linux/kernel.h>
0016 #include <linux/export.h>
0017 #include <linux/mm.h>
0018 #include <linux/kdev_t.h>
0019 #include <linux/gfp.h>
0020 #include <linux/bio.h>
0021 #include <linux/fs.h>
0022 #include <linux/buffer_head.h>
0023 #include <linux/blkdev.h>
0024 #include <linux/highmem.h>
0025 #include <linux/prefetch.h>
0026 #include <linux/mpage.h>
0027 #include <linux/mm_inline.h>
0028 #include <linux/writeback.h>
0029 #include <linux/backing-dev.h>
0030 #include <linux/pagevec.h>
0031 #include <linux/cleancache.h>
0032 #include "internal.h"
0033 
0034 /*
0035  * I/O completion handler for multipage BIOs.
0036  *
0037  * The mpage code never puts partial pages into a BIO (except for end-of-file).
0038  * If a page does not map to a contiguous run of blocks then it simply falls
0039  * back to block_read_full_page().
0040  *
0041  * Why is this?  If a page's completion depends on a number of different BIOs
0042  * which can complete in any order (or at the same time) then determining the
0043  * status of that page is hard.  See end_buffer_async_read() for the details.
0044  * There is no point in duplicating all that complexity.
0045  */
0046 static void mpage_end_io(struct bio *bio)
0047 {
0048     struct bio_vec *bv;
0049     int i;
0050 
0051     bio_for_each_segment_all(bv, bio, i) {
0052         struct page *page = bv->bv_page;
0053         page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
0054     }
0055 
0056     bio_put(bio);
0057 }
0058 
0059 static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
0060 {
0061     bio->bi_end_io = mpage_end_io;
0062     bio_set_op_attrs(bio, op, op_flags);
0063     guard_bio_eod(op, bio);
0064     submit_bio(bio);
0065     return NULL;
0066 }
0067 
0068 static struct bio *
0069 mpage_alloc(struct block_device *bdev,
0070         sector_t first_sector, int nr_vecs,
0071         gfp_t gfp_flags)
0072 {
0073     struct bio *bio;
0074 
0075     /* Restrict the given (page cache) mask for slab allocations */
0076     gfp_flags &= GFP_KERNEL;
0077     bio = bio_alloc(gfp_flags, nr_vecs);
0078 
0079     if (bio == NULL && (current->flags & PF_MEMALLOC)) {
0080         while (!bio && (nr_vecs /= 2))
0081             bio = bio_alloc(gfp_flags, nr_vecs);
0082     }
0083 
0084     if (bio) {
0085         bio->bi_bdev = bdev;
0086         bio->bi_iter.bi_sector = first_sector;
0087     }
0088     return bio;
0089 }
0090 
0091 /*
0092  * support function for mpage_readpages.  The fs supplied get_block might
0093  * return an up to date buffer.  This is used to map that buffer into
0094  * the page, which allows readpage to avoid triggering a duplicate call
0095  * to get_block.
0096  *
0097  * The idea is to avoid adding buffers to pages that don't already have
0098  * them.  So when the buffer is up to date and the page size == block size,
0099  * this marks the page up to date instead of adding new buffers.
0100  */
0101 static void 
0102 map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) 
0103 {
0104     struct inode *inode = page->mapping->host;
0105     struct buffer_head *page_bh, *head;
0106     int block = 0;
0107 
0108     if (!page_has_buffers(page)) {
0109         /*
0110          * don't make any buffers if there is only one buffer on
0111          * the page and the page just needs to be set up to date
0112          */
0113         if (inode->i_blkbits == PAGE_SHIFT &&
0114             buffer_uptodate(bh)) {
0115             SetPageUptodate(page);    
0116             return;
0117         }
0118         create_empty_buffers(page, 1 << inode->i_blkbits, 0);
0119     }
0120     head = page_buffers(page);
0121     page_bh = head;
0122     do {
0123         if (block == page_block) {
0124             page_bh->b_state = bh->b_state;
0125             page_bh->b_bdev = bh->b_bdev;
0126             page_bh->b_blocknr = bh->b_blocknr;
0127             break;
0128         }
0129         page_bh = page_bh->b_this_page;
0130         block++;
0131     } while (page_bh != head);
0132 }
0133 
0134 /*
0135  * This is the worker routine which does all the work of mapping the disk
0136  * blocks and constructs largest possible bios, submits them for IO if the
0137  * blocks are not contiguous on the disk.
0138  *
0139  * We pass a buffer_head back and forth and use its buffer_mapped() flag to
0140  * represent the validity of its disk mapping and to decide when to do the next
0141  * get_block() call.
0142  */
0143 static struct bio *
0144 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
0145         sector_t *last_block_in_bio, struct buffer_head *map_bh,
0146         unsigned long *first_logical_block, get_block_t get_block,
0147         gfp_t gfp)
0148 {
0149     struct inode *inode = page->mapping->host;
0150     const unsigned blkbits = inode->i_blkbits;
0151     const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
0152     const unsigned blocksize = 1 << blkbits;
0153     sector_t block_in_file;
0154     sector_t last_block;
0155     sector_t last_block_in_file;
0156     sector_t blocks[MAX_BUF_PER_PAGE];
0157     unsigned page_block;
0158     unsigned first_hole = blocks_per_page;
0159     struct block_device *bdev = NULL;
0160     int length;
0161     int fully_mapped = 1;
0162     unsigned nblocks;
0163     unsigned relative_block;
0164 
0165     if (page_has_buffers(page))
0166         goto confused;
0167 
0168     block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
0169     last_block = block_in_file + nr_pages * blocks_per_page;
0170     last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
0171     if (last_block > last_block_in_file)
0172         last_block = last_block_in_file;
0173     page_block = 0;
0174 
0175     /*
0176      * Map blocks using the result from the previous get_blocks call first.
0177      */
0178     nblocks = map_bh->b_size >> blkbits;
0179     if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
0180             block_in_file < (*first_logical_block + nblocks)) {
0181         unsigned map_offset = block_in_file - *first_logical_block;
0182         unsigned last = nblocks - map_offset;
0183 
0184         for (relative_block = 0; ; relative_block++) {
0185             if (relative_block == last) {
0186                 clear_buffer_mapped(map_bh);
0187                 break;
0188             }
0189             if (page_block == blocks_per_page)
0190                 break;
0191             blocks[page_block] = map_bh->b_blocknr + map_offset +
0192                         relative_block;
0193             page_block++;
0194             block_in_file++;
0195         }
0196         bdev = map_bh->b_bdev;
0197     }
0198 
0199     /*
0200      * Then do more get_blocks calls until we are done with this page.
0201      */
0202     map_bh->b_page = page;
0203     while (page_block < blocks_per_page) {
0204         map_bh->b_state = 0;
0205         map_bh->b_size = 0;
0206 
0207         if (block_in_file < last_block) {
0208             map_bh->b_size = (last_block-block_in_file) << blkbits;
0209             if (get_block(inode, block_in_file, map_bh, 0))
0210                 goto confused;
0211             *first_logical_block = block_in_file;
0212         }
0213 
0214         if (!buffer_mapped(map_bh)) {
0215             fully_mapped = 0;
0216             if (first_hole == blocks_per_page)
0217                 first_hole = page_block;
0218             page_block++;
0219             block_in_file++;
0220             continue;
0221         }
0222 
0223         /* some filesystems will copy data into the page during
0224          * the get_block call, in which case we don't want to
0225          * read it again.  map_buffer_to_page copies the data
0226          * we just collected from get_block into the page's buffers
0227          * so readpage doesn't have to repeat the get_block call
0228          */
0229         if (buffer_uptodate(map_bh)) {
0230             map_buffer_to_page(page, map_bh, page_block);
0231             goto confused;
0232         }
0233     
0234         if (first_hole != blocks_per_page)
0235             goto confused;      /* hole -> non-hole */
0236 
0237         /* Contiguous blocks? */
0238         if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
0239             goto confused;
0240         nblocks = map_bh->b_size >> blkbits;
0241         for (relative_block = 0; ; relative_block++) {
0242             if (relative_block == nblocks) {
0243                 clear_buffer_mapped(map_bh);
0244                 break;
0245             } else if (page_block == blocks_per_page)
0246                 break;
0247             blocks[page_block] = map_bh->b_blocknr+relative_block;
0248             page_block++;
0249             block_in_file++;
0250         }
0251         bdev = map_bh->b_bdev;
0252     }
0253 
0254     if (first_hole != blocks_per_page) {
0255         zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
0256         if (first_hole == 0) {
0257             SetPageUptodate(page);
0258             unlock_page(page);
0259             goto out;
0260         }
0261     } else if (fully_mapped) {
0262         SetPageMappedToDisk(page);
0263     }
0264 
0265     if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
0266         cleancache_get_page(page) == 0) {
0267         SetPageUptodate(page);
0268         goto confused;
0269     }
0270 
0271     /*
0272      * This page will go to BIO.  Do we need to send this BIO off first?
0273      */
0274     if (bio && (*last_block_in_bio != blocks[0] - 1))
0275         bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
0276 
0277 alloc_new:
0278     if (bio == NULL) {
0279         if (first_hole == blocks_per_page) {
0280             if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
0281                                 page))
0282                 goto out;
0283         }
0284         bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
0285                 min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
0286         if (bio == NULL)
0287             goto confused;
0288     }
0289 
0290     length = first_hole << blkbits;
0291     if (bio_add_page(bio, page, length, 0) < length) {
0292         bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
0293         goto alloc_new;
0294     }
0295 
0296     relative_block = block_in_file - *first_logical_block;
0297     nblocks = map_bh->b_size >> blkbits;
0298     if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
0299         (first_hole != blocks_per_page))
0300         bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
0301     else
0302         *last_block_in_bio = blocks[blocks_per_page - 1];
0303 out:
0304     return bio;
0305 
0306 confused:
0307     if (bio)
0308         bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
0309     if (!PageUptodate(page))
0310             block_read_full_page(page, get_block);
0311     else
0312         unlock_page(page);
0313     goto out;
0314 }
0315 
0316 /**
0317  * mpage_readpages - populate an address space with some pages & start reads against them
0318  * @mapping: the address_space
0319  * @pages: The address of a list_head which contains the target pages.  These
0320  *   pages have their ->index populated and are otherwise uninitialised.
0321  *   The page at @pages->prev has the lowest file offset, and reads should be
0322  *   issued in @pages->prev to @pages->next order.
0323  * @nr_pages: The number of pages at *@pages
0324  * @get_block: The filesystem's block mapper function.
0325  *
0326  * This function walks the pages and the blocks within each page, building and
0327  * emitting large BIOs.
0328  *
0329  * If anything unusual happens, such as:
0330  *
0331  * - encountering a page which has buffers
0332  * - encountering a page which has a non-hole after a hole
0333  * - encountering a page with non-contiguous blocks
0334  *
0335  * then this code just gives up and calls the buffer_head-based read function.
0336  * It does handle a page which has holes at the end - that is a common case:
0337  * the end-of-file on blocksize < PAGE_SIZE setups.
0338  *
0339  * BH_Boundary explanation:
0340  *
0341  * There is a problem.  The mpage read code assembles several pages, gets all
0342  * their disk mappings, and then submits them all.  That's fine, but obtaining
0343  * the disk mappings may require I/O.  Reads of indirect blocks, for example.
0344  *
0345  * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
0346  * submitted in the following order:
0347  *  12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
0348  *
0349  * because the indirect block has to be read to get the mappings of blocks
0350  * 13,14,15,16.  Obviously, this impacts performance.
0351  *
0352  * So what we do it to allow the filesystem's get_block() function to set
0353  * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
0354  * after this one will require I/O against a block which is probably close to
0355  * this one.  So you should push what I/O you have currently accumulated.
0356  *
0357  * This all causes the disk requests to be issued in the correct order.
0358  */
0359 int
0360 mpage_readpages(struct address_space *mapping, struct list_head *pages,
0361                 unsigned nr_pages, get_block_t get_block)
0362 {
0363     struct bio *bio = NULL;
0364     unsigned page_idx;
0365     sector_t last_block_in_bio = 0;
0366     struct buffer_head map_bh;
0367     unsigned long first_logical_block = 0;
0368     gfp_t gfp = readahead_gfp_mask(mapping);
0369 
0370     map_bh.b_state = 0;
0371     map_bh.b_size = 0;
0372     for (page_idx = 0; page_idx < nr_pages; page_idx++) {
0373         struct page *page = lru_to_page(pages);
0374 
0375         prefetchw(&page->flags);
0376         list_del(&page->lru);
0377         if (!add_to_page_cache_lru(page, mapping,
0378                     page->index,
0379                     gfp)) {
0380             bio = do_mpage_readpage(bio, page,
0381                     nr_pages - page_idx,
0382                     &last_block_in_bio, &map_bh,
0383                     &first_logical_block,
0384                     get_block, gfp);
0385         }
0386         put_page(page);
0387     }
0388     BUG_ON(!list_empty(pages));
0389     if (bio)
0390         mpage_bio_submit(REQ_OP_READ, 0, bio);
0391     return 0;
0392 }
0393 EXPORT_SYMBOL(mpage_readpages);
0394 
0395 /*
0396  * This isn't called much at all
0397  */
0398 int mpage_readpage(struct page *page, get_block_t get_block)
0399 {
0400     struct bio *bio = NULL;
0401     sector_t last_block_in_bio = 0;
0402     struct buffer_head map_bh;
0403     unsigned long first_logical_block = 0;
0404     gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
0405 
0406     map_bh.b_state = 0;
0407     map_bh.b_size = 0;
0408     bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
0409             &map_bh, &first_logical_block, get_block, gfp);
0410     if (bio)
0411         mpage_bio_submit(REQ_OP_READ, 0, bio);
0412     return 0;
0413 }
0414 EXPORT_SYMBOL(mpage_readpage);
0415 
0416 /*
0417  * Writing is not so simple.
0418  *
0419  * If the page has buffers then they will be used for obtaining the disk
0420  * mapping.  We only support pages which are fully mapped-and-dirty, with a
0421  * special case for pages which are unmapped at the end: end-of-file.
0422  *
0423  * If the page has no buffers (preferred) then the page is mapped here.
0424  *
0425  * If all blocks are found to be contiguous then the page can go into the
0426  * BIO.  Otherwise fall back to the mapping's writepage().
0427  * 
0428  * FIXME: This code wants an estimate of how many pages are still to be
0429  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
0430  * just allocate full-size (16-page) BIOs.
0431  */
0432 
0433 struct mpage_data {
0434     struct bio *bio;
0435     sector_t last_block_in_bio;
0436     get_block_t *get_block;
0437     unsigned use_writepage;
0438 };
0439 
0440 /*
0441  * We have our BIO, so we can now mark the buffers clean.  Make
0442  * sure to only clean buffers which we know we'll be writing.
0443  */
0444 static void clean_buffers(struct page *page, unsigned first_unmapped)
0445 {
0446     unsigned buffer_counter = 0;
0447     struct buffer_head *bh, *head;
0448     if (!page_has_buffers(page))
0449         return;
0450     head = page_buffers(page);
0451     bh = head;
0452 
0453     do {
0454         if (buffer_counter++ == first_unmapped)
0455             break;
0456         clear_buffer_dirty(bh);
0457         bh = bh->b_this_page;
0458     } while (bh != head);
0459 
0460     /*
0461      * we cannot drop the bh if the page is not uptodate or a concurrent
0462      * readpage would fail to serialize with the bh and it would read from
0463      * disk before we reach the platter.
0464      */
0465     if (buffer_heads_over_limit && PageUptodate(page))
0466         try_to_free_buffers(page);
0467 }
0468 
0469 static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
0470               void *data)
0471 {
0472     struct mpage_data *mpd = data;
0473     struct bio *bio = mpd->bio;
0474     struct address_space *mapping = page->mapping;
0475     struct inode *inode = page->mapping->host;
0476     const unsigned blkbits = inode->i_blkbits;
0477     unsigned long end_index;
0478     const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
0479     sector_t last_block;
0480     sector_t block_in_file;
0481     sector_t blocks[MAX_BUF_PER_PAGE];
0482     unsigned page_block;
0483     unsigned first_unmapped = blocks_per_page;
0484     struct block_device *bdev = NULL;
0485     int boundary = 0;
0486     sector_t boundary_block = 0;
0487     struct block_device *boundary_bdev = NULL;
0488     int length;
0489     struct buffer_head map_bh;
0490     loff_t i_size = i_size_read(inode);
0491     int ret = 0;
0492     int op_flags = wbc_to_write_flags(wbc);
0493 
0494     if (page_has_buffers(page)) {
0495         struct buffer_head *head = page_buffers(page);
0496         struct buffer_head *bh = head;
0497 
0498         /* If they're all mapped and dirty, do it */
0499         page_block = 0;
0500         do {
0501             BUG_ON(buffer_locked(bh));
0502             if (!buffer_mapped(bh)) {
0503                 /*
0504                  * unmapped dirty buffers are created by
0505                  * __set_page_dirty_buffers -> mmapped data
0506                  */
0507                 if (buffer_dirty(bh))
0508                     goto confused;
0509                 if (first_unmapped == blocks_per_page)
0510                     first_unmapped = page_block;
0511                 continue;
0512             }
0513 
0514             if (first_unmapped != blocks_per_page)
0515                 goto confused;  /* hole -> non-hole */
0516 
0517             if (!buffer_dirty(bh) || !buffer_uptodate(bh))
0518                 goto confused;
0519             if (page_block) {
0520                 if (bh->b_blocknr != blocks[page_block-1] + 1)
0521                     goto confused;
0522             }
0523             blocks[page_block++] = bh->b_blocknr;
0524             boundary = buffer_boundary(bh);
0525             if (boundary) {
0526                 boundary_block = bh->b_blocknr;
0527                 boundary_bdev = bh->b_bdev;
0528             }
0529             bdev = bh->b_bdev;
0530         } while ((bh = bh->b_this_page) != head);
0531 
0532         if (first_unmapped)
0533             goto page_is_mapped;
0534 
0535         /*
0536          * Page has buffers, but they are all unmapped. The page was
0537          * created by pagein or read over a hole which was handled by
0538          * block_read_full_page().  If this address_space is also
0539          * using mpage_readpages then this can rarely happen.
0540          */
0541         goto confused;
0542     }
0543 
0544     /*
0545      * The page has no buffers: map it to disk
0546      */
0547     BUG_ON(!PageUptodate(page));
0548     block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
0549     last_block = (i_size - 1) >> blkbits;
0550     map_bh.b_page = page;
0551     for (page_block = 0; page_block < blocks_per_page; ) {
0552 
0553         map_bh.b_state = 0;
0554         map_bh.b_size = 1 << blkbits;
0555         if (mpd->get_block(inode, block_in_file, &map_bh, 1))
0556             goto confused;
0557         if (buffer_new(&map_bh))
0558             clean_bdev_bh_alias(&map_bh);
0559         if (buffer_boundary(&map_bh)) {
0560             boundary_block = map_bh.b_blocknr;
0561             boundary_bdev = map_bh.b_bdev;
0562         }
0563         if (page_block) {
0564             if (map_bh.b_blocknr != blocks[page_block-1] + 1)
0565                 goto confused;
0566         }
0567         blocks[page_block++] = map_bh.b_blocknr;
0568         boundary = buffer_boundary(&map_bh);
0569         bdev = map_bh.b_bdev;
0570         if (block_in_file == last_block)
0571             break;
0572         block_in_file++;
0573     }
0574     BUG_ON(page_block == 0);
0575 
0576     first_unmapped = page_block;
0577 
0578 page_is_mapped:
0579     end_index = i_size >> PAGE_SHIFT;
0580     if (page->index >= end_index) {
0581         /*
0582          * The page straddles i_size.  It must be zeroed out on each
0583          * and every writepage invocation because it may be mmapped.
0584          * "A file is mapped in multiples of the page size.  For a file
0585          * that is not a multiple of the page size, the remaining memory
0586          * is zeroed when mapped, and writes to that region are not
0587          * written out to the file."
0588          */
0589         unsigned offset = i_size & (PAGE_SIZE - 1);
0590 
0591         if (page->index > end_index || !offset)
0592             goto confused;
0593         zero_user_segment(page, offset, PAGE_SIZE);
0594     }
0595 
0596     /*
0597      * This page will go to BIO.  Do we need to send this BIO off first?
0598      */
0599     if (bio && mpd->last_block_in_bio != blocks[0] - 1)
0600         bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
0601 
0602 alloc_new:
0603     if (bio == NULL) {
0604         if (first_unmapped == blocks_per_page) {
0605             if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
0606                                 page, wbc)) {
0607                 clean_buffers(page, first_unmapped);
0608                 goto out;
0609             }
0610         }
0611         bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
0612                 BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
0613         if (bio == NULL)
0614             goto confused;
0615 
0616         wbc_init_bio(wbc, bio);
0617     }
0618 
0619     /*
0620      * Must try to add the page before marking the buffer clean or
0621      * the confused fail path above (OOM) will be very confused when
0622      * it finds all bh marked clean (i.e. it will not write anything)
0623      */
0624     wbc_account_io(wbc, page, PAGE_SIZE);
0625     length = first_unmapped << blkbits;
0626     if (bio_add_page(bio, page, length, 0) < length) {
0627         bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
0628         goto alloc_new;
0629     }
0630 
0631     clean_buffers(page, first_unmapped);
0632 
0633     BUG_ON(PageWriteback(page));
0634     set_page_writeback(page);
0635     unlock_page(page);
0636     if (boundary || (first_unmapped != blocks_per_page)) {
0637         bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
0638         if (boundary_block) {
0639             write_boundary_block(boundary_bdev,
0640                     boundary_block, 1 << blkbits);
0641         }
0642     } else {
0643         mpd->last_block_in_bio = blocks[blocks_per_page - 1];
0644     }
0645     goto out;
0646 
0647 confused:
0648     if (bio)
0649         bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
0650 
0651     if (mpd->use_writepage) {
0652         ret = mapping->a_ops->writepage(page, wbc);
0653     } else {
0654         ret = -EAGAIN;
0655         goto out;
0656     }
0657     /*
0658      * The caller has a ref on the inode, so *mapping is stable
0659      */
0660     mapping_set_error(mapping, ret);
0661 out:
0662     mpd->bio = bio;
0663     return ret;
0664 }
0665 
0666 /**
0667  * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
0668  * @mapping: address space structure to write
0669  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
0670  * @get_block: the filesystem's block mapper function.
0671  *             If this is NULL then use a_ops->writepage.  Otherwise, go
0672  *             direct-to-BIO.
0673  *
0674  * This is a library function, which implements the writepages()
0675  * address_space_operation.
0676  *
0677  * If a page is already under I/O, generic_writepages() skips it, even
0678  * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
0679  * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
0680  * and msync() need to guarantee that all the data which was dirty at the time
0681  * the call was made get new I/O started against them.  If wbc->sync_mode is
0682  * WB_SYNC_ALL then we were called for data integrity and we must wait for
0683  * existing IO to complete.
0684  */
0685 int
0686 mpage_writepages(struct address_space *mapping,
0687         struct writeback_control *wbc, get_block_t get_block)
0688 {
0689     struct blk_plug plug;
0690     int ret;
0691 
0692     blk_start_plug(&plug);
0693 
0694     if (!get_block)
0695         ret = generic_writepages(mapping, wbc);
0696     else {
0697         struct mpage_data mpd = {
0698             .bio = NULL,
0699             .last_block_in_bio = 0,
0700             .get_block = get_block,
0701             .use_writepage = 1,
0702         };
0703 
0704         ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
0705         if (mpd.bio) {
0706             int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
0707                   REQ_SYNC : 0);
0708             mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
0709         }
0710     }
0711     blk_finish_plug(&plug);
0712     return ret;
0713 }
0714 EXPORT_SYMBOL(mpage_writepages);
0715 
0716 int mpage_writepage(struct page *page, get_block_t get_block,
0717     struct writeback_control *wbc)
0718 {
0719     struct mpage_data mpd = {
0720         .bio = NULL,
0721         .last_block_in_bio = 0,
0722         .get_block = get_block,
0723         .use_writepage = 0,
0724     };
0725     int ret = __mpage_writepage(page, wbc, &mpd);
0726     if (mpd.bio) {
0727         int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
0728               REQ_SYNC : 0);
0729         mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
0730     }
0731     return ret;
0732 }
0733 EXPORT_SYMBOL(mpage_writepage);