Back to home page

LXR

 
 

    


0001 /*
0002  * mm/truncate.c - code for taking down pages from address_spaces
0003  *
0004  * Copyright (C) 2002, Linus Torvalds
0005  *
0006  * 10Sep2002    Andrew Morton
0007  *      Initial version.
0008  */
0009 
0010 #include <linux/kernel.h>
0011 #include <linux/backing-dev.h>
0012 #include <linux/dax.h>
0013 #include <linux/gfp.h>
0014 #include <linux/mm.h>
0015 #include <linux/swap.h>
0016 #include <linux/export.h>
0017 #include <linux/pagemap.h>
0018 #include <linux/highmem.h>
0019 #include <linux/pagevec.h>
0020 #include <linux/task_io_accounting_ops.h>
0021 #include <linux/buffer_head.h>  /* grr. try_to_release_page,
0022                    do_invalidatepage */
0023 #include <linux/cleancache.h>
0024 #include <linux/rmap.h>
0025 #include "internal.h"
0026 
0027 static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
0028                    void *entry)
0029 {
0030     struct radix_tree_node *node;
0031     void **slot;
0032 
0033     spin_lock_irq(&mapping->tree_lock);
0034     /*
0035      * Regular page slots are stabilized by the page lock even
0036      * without the tree itself locked.  These unlocked entries
0037      * need verification under the tree lock.
0038      */
0039     if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
0040         goto unlock;
0041     if (*slot != entry)
0042         goto unlock;
0043     __radix_tree_replace(&mapping->page_tree, node, slot, NULL,
0044                  workingset_update_node, mapping);
0045     mapping->nrexceptional--;
0046 unlock:
0047     spin_unlock_irq(&mapping->tree_lock);
0048 }
0049 
0050 /*
0051  * Unconditionally remove exceptional entry. Usually called from truncate path.
0052  */
0053 static void truncate_exceptional_entry(struct address_space *mapping,
0054                        pgoff_t index, void *entry)
0055 {
0056     /* Handled by shmem itself */
0057     if (shmem_mapping(mapping))
0058         return;
0059 
0060     if (dax_mapping(mapping)) {
0061         dax_delete_mapping_entry(mapping, index);
0062         return;
0063     }
0064     clear_shadow_entry(mapping, index, entry);
0065 }
0066 
0067 /*
0068  * Invalidate exceptional entry if easily possible. This handles exceptional
0069  * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
0070  * clean entries.
0071  */
0072 static int invalidate_exceptional_entry(struct address_space *mapping,
0073                     pgoff_t index, void *entry)
0074 {
0075     /* Handled by shmem itself */
0076     if (shmem_mapping(mapping))
0077         return 1;
0078     if (dax_mapping(mapping))
0079         return dax_invalidate_mapping_entry(mapping, index);
0080     clear_shadow_entry(mapping, index, entry);
0081     return 1;
0082 }
0083 
0084 /*
0085  * Invalidate exceptional entry if clean. This handles exceptional entries for
0086  * invalidate_inode_pages2() so for DAX it evicts only clean entries.
0087  */
0088 static int invalidate_exceptional_entry2(struct address_space *mapping,
0089                      pgoff_t index, void *entry)
0090 {
0091     /* Handled by shmem itself */
0092     if (shmem_mapping(mapping))
0093         return 1;
0094     if (dax_mapping(mapping))
0095         return dax_invalidate_mapping_entry_sync(mapping, index);
0096     clear_shadow_entry(mapping, index, entry);
0097     return 1;
0098 }
0099 
0100 /**
0101  * do_invalidatepage - invalidate part or all of a page
0102  * @page: the page which is affected
0103  * @offset: start of the range to invalidate
0104  * @length: length of the range to invalidate
0105  *
0106  * do_invalidatepage() is called when all or part of the page has become
0107  * invalidated by a truncate operation.
0108  *
0109  * do_invalidatepage() does not have to release all buffers, but it must
0110  * ensure that no dirty buffer is left outside @offset and that no I/O
0111  * is underway against any of the blocks which are outside the truncation
0112  * point.  Because the caller is about to free (and possibly reuse) those
0113  * blocks on-disk.
0114  */
0115 void do_invalidatepage(struct page *page, unsigned int offset,
0116                unsigned int length)
0117 {
0118     void (*invalidatepage)(struct page *, unsigned int, unsigned int);
0119 
0120     invalidatepage = page->mapping->a_ops->invalidatepage;
0121 #ifdef CONFIG_BLOCK
0122     if (!invalidatepage)
0123         invalidatepage = block_invalidatepage;
0124 #endif
0125     if (invalidatepage)
0126         (*invalidatepage)(page, offset, length);
0127 }
0128 
0129 /*
0130  * If truncate cannot remove the fs-private metadata from the page, the page
0131  * becomes orphaned.  It will be left on the LRU and may even be mapped into
0132  * user pagetables if we're racing with filemap_fault().
0133  *
0134  * We need to bale out if page->mapping is no longer equal to the original
0135  * mapping.  This happens a) when the VM reclaimed the page while we waited on
0136  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
0137  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
0138  */
0139 static int
0140 truncate_complete_page(struct address_space *mapping, struct page *page)
0141 {
0142     if (page->mapping != mapping)
0143         return -EIO;
0144 
0145     if (page_has_private(page))
0146         do_invalidatepage(page, 0, PAGE_SIZE);
0147 
0148     /*
0149      * Some filesystems seem to re-dirty the page even after
0150      * the VM has canceled the dirty bit (eg ext3 journaling).
0151      * Hence dirty accounting check is placed after invalidation.
0152      */
0153     cancel_dirty_page(page);
0154     ClearPageMappedToDisk(page);
0155     delete_from_page_cache(page);
0156     return 0;
0157 }
0158 
0159 /*
0160  * This is for invalidate_mapping_pages().  That function can be called at
0161  * any time, and is not supposed to throw away dirty pages.  But pages can
0162  * be marked dirty at any time too, so use remove_mapping which safely
0163  * discards clean, unused pages.
0164  *
0165  * Returns non-zero if the page was successfully invalidated.
0166  */
0167 static int
0168 invalidate_complete_page(struct address_space *mapping, struct page *page)
0169 {
0170     int ret;
0171 
0172     if (page->mapping != mapping)
0173         return 0;
0174 
0175     if (page_has_private(page) && !try_to_release_page(page, 0))
0176         return 0;
0177 
0178     ret = remove_mapping(mapping, page);
0179 
0180     return ret;
0181 }
0182 
0183 int truncate_inode_page(struct address_space *mapping, struct page *page)
0184 {
0185     loff_t holelen;
0186     VM_BUG_ON_PAGE(PageTail(page), page);
0187 
0188     holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
0189     if (page_mapped(page)) {
0190         unmap_mapping_range(mapping,
0191                    (loff_t)page->index << PAGE_SHIFT,
0192                    holelen, 0);
0193     }
0194     return truncate_complete_page(mapping, page);
0195 }
0196 
0197 /*
0198  * Used to get rid of pages on hardware memory corruption.
0199  */
0200 int generic_error_remove_page(struct address_space *mapping, struct page *page)
0201 {
0202     if (!mapping)
0203         return -EINVAL;
0204     /*
0205      * Only punch for normal data pages for now.
0206      * Handling other types like directories would need more auditing.
0207      */
0208     if (!S_ISREG(mapping->host->i_mode))
0209         return -EIO;
0210     return truncate_inode_page(mapping, page);
0211 }
0212 EXPORT_SYMBOL(generic_error_remove_page);
0213 
0214 /*
0215  * Safely invalidate one page from its pagecache mapping.
0216  * It only drops clean, unused pages. The page must be locked.
0217  *
0218  * Returns 1 if the page is successfully invalidated, otherwise 0.
0219  */
0220 int invalidate_inode_page(struct page *page)
0221 {
0222     struct address_space *mapping = page_mapping(page);
0223     if (!mapping)
0224         return 0;
0225     if (PageDirty(page) || PageWriteback(page))
0226         return 0;
0227     if (page_mapped(page))
0228         return 0;
0229     return invalidate_complete_page(mapping, page);
0230 }
0231 
0232 /**
0233  * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
0234  * @mapping: mapping to truncate
0235  * @lstart: offset from which to truncate
0236  * @lend: offset to which to truncate (inclusive)
0237  *
0238  * Truncate the page cache, removing the pages that are between
0239  * specified offsets (and zeroing out partial pages
0240  * if lstart or lend + 1 is not page aligned).
0241  *
0242  * Truncate takes two passes - the first pass is nonblocking.  It will not
0243  * block on page locks and it will not block on writeback.  The second pass
0244  * will wait.  This is to prevent as much IO as possible in the affected region.
0245  * The first pass will remove most pages, so the search cost of the second pass
0246  * is low.
0247  *
0248  * We pass down the cache-hot hint to the page freeing code.  Even if the
0249  * mapping is large, it is probably the case that the final pages are the most
0250  * recently touched, and freeing happens in ascending file offset order.
0251  *
0252  * Note that since ->invalidatepage() accepts range to invalidate
0253  * truncate_inode_pages_range is able to handle cases where lend + 1 is not
0254  * page aligned properly.
0255  */
0256 void truncate_inode_pages_range(struct address_space *mapping,
0257                 loff_t lstart, loff_t lend)
0258 {
0259     pgoff_t     start;      /* inclusive */
0260     pgoff_t     end;        /* exclusive */
0261     unsigned int    partial_start;  /* inclusive */
0262     unsigned int    partial_end;    /* exclusive */
0263     struct pagevec  pvec;
0264     pgoff_t     indices[PAGEVEC_SIZE];
0265     pgoff_t     index;
0266     int     i;
0267 
0268     cleancache_invalidate_inode(mapping);
0269     if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
0270         return;
0271 
0272     /* Offsets within partial pages */
0273     partial_start = lstart & (PAGE_SIZE - 1);
0274     partial_end = (lend + 1) & (PAGE_SIZE - 1);
0275 
0276     /*
0277      * 'start' and 'end' always covers the range of pages to be fully
0278      * truncated. Partial pages are covered with 'partial_start' at the
0279      * start of the range and 'partial_end' at the end of the range.
0280      * Note that 'end' is exclusive while 'lend' is inclusive.
0281      */
0282     start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
0283     if (lend == -1)
0284         /*
0285          * lend == -1 indicates end-of-file so we have to set 'end'
0286          * to the highest possible pgoff_t and since the type is
0287          * unsigned we're using -1.
0288          */
0289         end = -1;
0290     else
0291         end = (lend + 1) >> PAGE_SHIFT;
0292 
0293     pagevec_init(&pvec, 0);
0294     index = start;
0295     while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
0296             min(end - index, (pgoff_t)PAGEVEC_SIZE),
0297             indices)) {
0298         for (i = 0; i < pagevec_count(&pvec); i++) {
0299             struct page *page = pvec.pages[i];
0300 
0301             /* We rely upon deletion not changing page->index */
0302             index = indices[i];
0303             if (index >= end)
0304                 break;
0305 
0306             if (radix_tree_exceptional_entry(page)) {
0307                 truncate_exceptional_entry(mapping, index,
0308                                page);
0309                 continue;
0310             }
0311 
0312             if (!trylock_page(page))
0313                 continue;
0314             WARN_ON(page_to_index(page) != index);
0315             if (PageWriteback(page)) {
0316                 unlock_page(page);
0317                 continue;
0318             }
0319             truncate_inode_page(mapping, page);
0320             unlock_page(page);
0321         }
0322         pagevec_remove_exceptionals(&pvec);
0323         pagevec_release(&pvec);
0324         cond_resched();
0325         index++;
0326     }
0327 
0328     if (partial_start) {
0329         struct page *page = find_lock_page(mapping, start - 1);
0330         if (page) {
0331             unsigned int top = PAGE_SIZE;
0332             if (start > end) {
0333                 /* Truncation within a single page */
0334                 top = partial_end;
0335                 partial_end = 0;
0336             }
0337             wait_on_page_writeback(page);
0338             zero_user_segment(page, partial_start, top);
0339             cleancache_invalidate_page(mapping, page);
0340             if (page_has_private(page))
0341                 do_invalidatepage(page, partial_start,
0342                           top - partial_start);
0343             unlock_page(page);
0344             put_page(page);
0345         }
0346     }
0347     if (partial_end) {
0348         struct page *page = find_lock_page(mapping, end);
0349         if (page) {
0350             wait_on_page_writeback(page);
0351             zero_user_segment(page, 0, partial_end);
0352             cleancache_invalidate_page(mapping, page);
0353             if (page_has_private(page))
0354                 do_invalidatepage(page, 0,
0355                           partial_end);
0356             unlock_page(page);
0357             put_page(page);
0358         }
0359     }
0360     /*
0361      * If the truncation happened within a single page no pages
0362      * will be released, just zeroed, so we can bail out now.
0363      */
0364     if (start >= end)
0365         return;
0366 
0367     index = start;
0368     for ( ; ; ) {
0369         cond_resched();
0370         if (!pagevec_lookup_entries(&pvec, mapping, index,
0371             min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
0372             /* If all gone from start onwards, we're done */
0373             if (index == start)
0374                 break;
0375             /* Otherwise restart to make sure all gone */
0376             index = start;
0377             continue;
0378         }
0379         if (index == start && indices[0] >= end) {
0380             /* All gone out of hole to be punched, we're done */
0381             pagevec_remove_exceptionals(&pvec);
0382             pagevec_release(&pvec);
0383             break;
0384         }
0385         for (i = 0; i < pagevec_count(&pvec); i++) {
0386             struct page *page = pvec.pages[i];
0387 
0388             /* We rely upon deletion not changing page->index */
0389             index = indices[i];
0390             if (index >= end) {
0391                 /* Restart punch to make sure all gone */
0392                 index = start - 1;
0393                 break;
0394             }
0395 
0396             if (radix_tree_exceptional_entry(page)) {
0397                 truncate_exceptional_entry(mapping, index,
0398                                page);
0399                 continue;
0400             }
0401 
0402             lock_page(page);
0403             WARN_ON(page_to_index(page) != index);
0404             wait_on_page_writeback(page);
0405             truncate_inode_page(mapping, page);
0406             unlock_page(page);
0407         }
0408         pagevec_remove_exceptionals(&pvec);
0409         pagevec_release(&pvec);
0410         index++;
0411     }
0412     cleancache_invalidate_inode(mapping);
0413 }
0414 EXPORT_SYMBOL(truncate_inode_pages_range);
0415 
0416 /**
0417  * truncate_inode_pages - truncate *all* the pages from an offset
0418  * @mapping: mapping to truncate
0419  * @lstart: offset from which to truncate
0420  *
0421  * Called under (and serialised by) inode->i_mutex.
0422  *
0423  * Note: When this function returns, there can be a page in the process of
0424  * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
0425  * mapping->nrpages can be non-zero when this function returns even after
0426  * truncation of the whole mapping.
0427  */
0428 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
0429 {
0430     truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
0431 }
0432 EXPORT_SYMBOL(truncate_inode_pages);
0433 
0434 /**
0435  * truncate_inode_pages_final - truncate *all* pages before inode dies
0436  * @mapping: mapping to truncate
0437  *
0438  * Called under (and serialized by) inode->i_mutex.
0439  *
0440  * Filesystems have to use this in the .evict_inode path to inform the
0441  * VM that this is the final truncate and the inode is going away.
0442  */
0443 void truncate_inode_pages_final(struct address_space *mapping)
0444 {
0445     unsigned long nrexceptional;
0446     unsigned long nrpages;
0447 
0448     /*
0449      * Page reclaim can not participate in regular inode lifetime
0450      * management (can't call iput()) and thus can race with the
0451      * inode teardown.  Tell it when the address space is exiting,
0452      * so that it does not install eviction information after the
0453      * final truncate has begun.
0454      */
0455     mapping_set_exiting(mapping);
0456 
0457     /*
0458      * When reclaim installs eviction entries, it increases
0459      * nrexceptional first, then decreases nrpages.  Make sure we see
0460      * this in the right order or we might miss an entry.
0461      */
0462     nrpages = mapping->nrpages;
0463     smp_rmb();
0464     nrexceptional = mapping->nrexceptional;
0465 
0466     if (nrpages || nrexceptional) {
0467         /*
0468          * As truncation uses a lockless tree lookup, cycle
0469          * the tree lock to make sure any ongoing tree
0470          * modification that does not see AS_EXITING is
0471          * completed before starting the final truncate.
0472          */
0473         spin_lock_irq(&mapping->tree_lock);
0474         spin_unlock_irq(&mapping->tree_lock);
0475 
0476         truncate_inode_pages(mapping, 0);
0477     }
0478 }
0479 EXPORT_SYMBOL(truncate_inode_pages_final);
0480 
0481 /**
0482  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
0483  * @mapping: the address_space which holds the pages to invalidate
0484  * @start: the offset 'from' which to invalidate
0485  * @end: the offset 'to' which to invalidate (inclusive)
0486  *
0487  * This function only removes the unlocked pages, if you want to
0488  * remove all the pages of one inode, you must call truncate_inode_pages.
0489  *
0490  * invalidate_mapping_pages() will not block on IO activity. It will not
0491  * invalidate pages which are dirty, locked, under writeback or mapped into
0492  * pagetables.
0493  */
0494 unsigned long invalidate_mapping_pages(struct address_space *mapping,
0495         pgoff_t start, pgoff_t end)
0496 {
0497     pgoff_t indices[PAGEVEC_SIZE];
0498     struct pagevec pvec;
0499     pgoff_t index = start;
0500     unsigned long ret;
0501     unsigned long count = 0;
0502     int i;
0503 
0504     pagevec_init(&pvec, 0);
0505     while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
0506             min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
0507             indices)) {
0508         for (i = 0; i < pagevec_count(&pvec); i++) {
0509             struct page *page = pvec.pages[i];
0510 
0511             /* We rely upon deletion not changing page->index */
0512             index = indices[i];
0513             if (index > end)
0514                 break;
0515 
0516             if (radix_tree_exceptional_entry(page)) {
0517                 invalidate_exceptional_entry(mapping, index,
0518                                  page);
0519                 continue;
0520             }
0521 
0522             if (!trylock_page(page))
0523                 continue;
0524 
0525             WARN_ON(page_to_index(page) != index);
0526 
0527             /* Middle of THP: skip */
0528             if (PageTransTail(page)) {
0529                 unlock_page(page);
0530                 continue;
0531             } else if (PageTransHuge(page)) {
0532                 index += HPAGE_PMD_NR - 1;
0533                 i += HPAGE_PMD_NR - 1;
0534                 /* 'end' is in the middle of THP */
0535                 if (index ==  round_down(end, HPAGE_PMD_NR))
0536                     continue;
0537             }
0538 
0539             ret = invalidate_inode_page(page);
0540             unlock_page(page);
0541             /*
0542              * Invalidation is a hint that the page is no longer
0543              * of interest and try to speed up its reclaim.
0544              */
0545             if (!ret)
0546                 deactivate_file_page(page);
0547             count += ret;
0548         }
0549         pagevec_remove_exceptionals(&pvec);
0550         pagevec_release(&pvec);
0551         cond_resched();
0552         index++;
0553     }
0554     return count;
0555 }
0556 EXPORT_SYMBOL(invalidate_mapping_pages);
0557 
0558 /*
0559  * This is like invalidate_complete_page(), except it ignores the page's
0560  * refcount.  We do this because invalidate_inode_pages2() needs stronger
0561  * invalidation guarantees, and cannot afford to leave pages behind because
0562  * shrink_page_list() has a temp ref on them, or because they're transiently
0563  * sitting in the lru_cache_add() pagevecs.
0564  */
0565 static int
0566 invalidate_complete_page2(struct address_space *mapping, struct page *page)
0567 {
0568     unsigned long flags;
0569 
0570     if (page->mapping != mapping)
0571         return 0;
0572 
0573     if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
0574         return 0;
0575 
0576     spin_lock_irqsave(&mapping->tree_lock, flags);
0577     if (PageDirty(page))
0578         goto failed;
0579 
0580     BUG_ON(page_has_private(page));
0581     __delete_from_page_cache(page, NULL);
0582     spin_unlock_irqrestore(&mapping->tree_lock, flags);
0583 
0584     if (mapping->a_ops->freepage)
0585         mapping->a_ops->freepage(page);
0586 
0587     put_page(page); /* pagecache ref */
0588     return 1;
0589 failed:
0590     spin_unlock_irqrestore(&mapping->tree_lock, flags);
0591     return 0;
0592 }
0593 
0594 static int do_launder_page(struct address_space *mapping, struct page *page)
0595 {
0596     if (!PageDirty(page))
0597         return 0;
0598     if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
0599         return 0;
0600     return mapping->a_ops->launder_page(page);
0601 }
0602 
0603 /**
0604  * invalidate_inode_pages2_range - remove range of pages from an address_space
0605  * @mapping: the address_space
0606  * @start: the page offset 'from' which to invalidate
0607  * @end: the page offset 'to' which to invalidate (inclusive)
0608  *
0609  * Any pages which are found to be mapped into pagetables are unmapped prior to
0610  * invalidation.
0611  *
0612  * Returns -EBUSY if any pages could not be invalidated.
0613  */
0614 int invalidate_inode_pages2_range(struct address_space *mapping,
0615                   pgoff_t start, pgoff_t end)
0616 {
0617     pgoff_t indices[PAGEVEC_SIZE];
0618     struct pagevec pvec;
0619     pgoff_t index;
0620     int i;
0621     int ret = 0;
0622     int ret2 = 0;
0623     int did_range_unmap = 0;
0624 
0625     cleancache_invalidate_inode(mapping);
0626     pagevec_init(&pvec, 0);
0627     index = start;
0628     while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
0629             min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
0630             indices)) {
0631         for (i = 0; i < pagevec_count(&pvec); i++) {
0632             struct page *page = pvec.pages[i];
0633 
0634             /* We rely upon deletion not changing page->index */
0635             index = indices[i];
0636             if (index > end)
0637                 break;
0638 
0639             if (radix_tree_exceptional_entry(page)) {
0640                 if (!invalidate_exceptional_entry2(mapping,
0641                                    index, page))
0642                     ret = -EBUSY;
0643                 continue;
0644             }
0645 
0646             lock_page(page);
0647             WARN_ON(page_to_index(page) != index);
0648             if (page->mapping != mapping) {
0649                 unlock_page(page);
0650                 continue;
0651             }
0652             wait_on_page_writeback(page);
0653             if (page_mapped(page)) {
0654                 if (!did_range_unmap) {
0655                     /*
0656                      * Zap the rest of the file in one hit.
0657                      */
0658                     unmap_mapping_range(mapping,
0659                        (loff_t)index << PAGE_SHIFT,
0660                        (loff_t)(1 + end - index)
0661                              << PAGE_SHIFT,
0662                              0);
0663                     did_range_unmap = 1;
0664                 } else {
0665                     /*
0666                      * Just zap this page
0667                      */
0668                     unmap_mapping_range(mapping,
0669                        (loff_t)index << PAGE_SHIFT,
0670                        PAGE_SIZE, 0);
0671                 }
0672             }
0673             BUG_ON(page_mapped(page));
0674             ret2 = do_launder_page(mapping, page);
0675             if (ret2 == 0) {
0676                 if (!invalidate_complete_page2(mapping, page))
0677                     ret2 = -EBUSY;
0678             }
0679             if (ret2 < 0)
0680                 ret = ret2;
0681             unlock_page(page);
0682         }
0683         pagevec_remove_exceptionals(&pvec);
0684         pagevec_release(&pvec);
0685         cond_resched();
0686         index++;
0687     }
0688     cleancache_invalidate_inode(mapping);
0689     return ret;
0690 }
0691 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
0692 
0693 /**
0694  * invalidate_inode_pages2 - remove all pages from an address_space
0695  * @mapping: the address_space
0696  *
0697  * Any pages which are found to be mapped into pagetables are unmapped prior to
0698  * invalidation.
0699  *
0700  * Returns -EBUSY if any pages could not be invalidated.
0701  */
0702 int invalidate_inode_pages2(struct address_space *mapping)
0703 {
0704     return invalidate_inode_pages2_range(mapping, 0, -1);
0705 }
0706 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
0707 
0708 /**
0709  * truncate_pagecache - unmap and remove pagecache that has been truncated
0710  * @inode: inode
0711  * @newsize: new file size
0712  *
0713  * inode's new i_size must already be written before truncate_pagecache
0714  * is called.
0715  *
0716  * This function should typically be called before the filesystem
0717  * releases resources associated with the freed range (eg. deallocates
0718  * blocks). This way, pagecache will always stay logically coherent
0719  * with on-disk format, and the filesystem would not have to deal with
0720  * situations such as writepage being called for a page that has already
0721  * had its underlying blocks deallocated.
0722  */
0723 void truncate_pagecache(struct inode *inode, loff_t newsize)
0724 {
0725     struct address_space *mapping = inode->i_mapping;
0726     loff_t holebegin = round_up(newsize, PAGE_SIZE);
0727 
0728     /*
0729      * unmap_mapping_range is called twice, first simply for
0730      * efficiency so that truncate_inode_pages does fewer
0731      * single-page unmaps.  However after this first call, and
0732      * before truncate_inode_pages finishes, it is possible for
0733      * private pages to be COWed, which remain after
0734      * truncate_inode_pages finishes, hence the second
0735      * unmap_mapping_range call must be made for correctness.
0736      */
0737     unmap_mapping_range(mapping, holebegin, 0, 1);
0738     truncate_inode_pages(mapping, newsize);
0739     unmap_mapping_range(mapping, holebegin, 0, 1);
0740 }
0741 EXPORT_SYMBOL(truncate_pagecache);
0742 
0743 /**
0744  * truncate_setsize - update inode and pagecache for a new file size
0745  * @inode: inode
0746  * @newsize: new file size
0747  *
0748  * truncate_setsize updates i_size and performs pagecache truncation (if
0749  * necessary) to @newsize. It will be typically be called from the filesystem's
0750  * setattr function when ATTR_SIZE is passed in.
0751  *
0752  * Must be called with a lock serializing truncates and writes (generally
0753  * i_mutex but e.g. xfs uses a different lock) and before all filesystem
0754  * specific block truncation has been performed.
0755  */
0756 void truncate_setsize(struct inode *inode, loff_t newsize)
0757 {
0758     loff_t oldsize = inode->i_size;
0759 
0760     i_size_write(inode, newsize);
0761     if (newsize > oldsize)
0762         pagecache_isize_extended(inode, oldsize, newsize);
0763     truncate_pagecache(inode, newsize);
0764 }
0765 EXPORT_SYMBOL(truncate_setsize);
0766 
0767 /**
0768  * pagecache_isize_extended - update pagecache after extension of i_size
0769  * @inode:  inode for which i_size was extended
0770  * @from:   original inode size
0771  * @to:     new inode size
0772  *
0773  * Handle extension of inode size either caused by extending truncate or by
0774  * write starting after current i_size. We mark the page straddling current
0775  * i_size RO so that page_mkwrite() is called on the nearest write access to
0776  * the page.  This way filesystem can be sure that page_mkwrite() is called on
0777  * the page before user writes to the page via mmap after the i_size has been
0778  * changed.
0779  *
0780  * The function must be called after i_size is updated so that page fault
0781  * coming after we unlock the page will already see the new i_size.
0782  * The function must be called while we still hold i_mutex - this not only
0783  * makes sure i_size is stable but also that userspace cannot observe new
0784  * i_size value before we are prepared to store mmap writes at new inode size.
0785  */
0786 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
0787 {
0788     int bsize = 1 << inode->i_blkbits;
0789     loff_t rounded_from;
0790     struct page *page;
0791     pgoff_t index;
0792 
0793     WARN_ON(to > inode->i_size);
0794 
0795     if (from >= to || bsize == PAGE_SIZE)
0796         return;
0797     /* Page straddling @from will not have any hole block created? */
0798     rounded_from = round_up(from, bsize);
0799     if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
0800         return;
0801 
0802     index = from >> PAGE_SHIFT;
0803     page = find_lock_page(inode->i_mapping, index);
0804     /* Page not cached? Nothing to do */
0805     if (!page)
0806         return;
0807     /*
0808      * See clear_page_dirty_for_io() for details why set_page_dirty()
0809      * is needed.
0810      */
0811     if (page_mkclean(page))
0812         set_page_dirty(page);
0813     unlock_page(page);
0814     put_page(page);
0815 }
0816 EXPORT_SYMBOL(pagecache_isize_extended);
0817 
0818 /**
0819  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
0820  * @inode: inode
0821  * @lstart: offset of beginning of hole
0822  * @lend: offset of last byte of hole
0823  *
0824  * This function should typically be called before the filesystem
0825  * releases resources associated with the freed range (eg. deallocates
0826  * blocks). This way, pagecache will always stay logically coherent
0827  * with on-disk format, and the filesystem would not have to deal with
0828  * situations such as writepage being called for a page that has already
0829  * had its underlying blocks deallocated.
0830  */
0831 void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
0832 {
0833     struct address_space *mapping = inode->i_mapping;
0834     loff_t unmap_start = round_up(lstart, PAGE_SIZE);
0835     loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
0836     /*
0837      * This rounding is currently just for example: unmap_mapping_range
0838      * expands its hole outwards, whereas we want it to contract the hole
0839      * inwards.  However, existing callers of truncate_pagecache_range are
0840      * doing their own page rounding first.  Note that unmap_mapping_range
0841      * allows holelen 0 for all, and we allow lend -1 for end of file.
0842      */
0843 
0844     /*
0845      * Unlike in truncate_pagecache, unmap_mapping_range is called only
0846      * once (before truncating pagecache), and without "even_cows" flag:
0847      * hole-punching should not remove private COWed pages from the hole.
0848      */
0849     if ((u64)unmap_end > (u64)unmap_start)
0850         unmap_mapping_range(mapping, unmap_start,
0851                     1 + unmap_end - unmap_start, 0);
0852     truncate_inode_pages_range(mapping, lstart, lend);
0853 }
0854 EXPORT_SYMBOL(truncate_pagecache_range);