Back to home page

LXR

 
 

    


0001 /*
0002  * mm/fadvise.c
0003  *
0004  * Copyright (C) 2002, Linus Torvalds
0005  *
0006  * 11Jan2003    Andrew Morton
0007  *      Initial version.
0008  */
0009 
0010 #include <linux/kernel.h>
0011 #include <linux/file.h>
0012 #include <linux/fs.h>
0013 #include <linux/mm.h>
0014 #include <linux/pagemap.h>
0015 #include <linux/backing-dev.h>
0016 #include <linux/pagevec.h>
0017 #include <linux/fadvise.h>
0018 #include <linux/writeback.h>
0019 #include <linux/syscalls.h>
0020 #include <linux/swap.h>
0021 
0022 #include <asm/unistd.h>
0023 
0024 /*
0025  * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
0026  * deactivate the pages and clear PG_Referenced.
0027  */
0028 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
0029 {
0030     struct fd f = fdget(fd);
0031     struct inode *inode;
0032     struct address_space *mapping;
0033     struct backing_dev_info *bdi;
0034     loff_t endbyte;         /* inclusive */
0035     pgoff_t start_index;
0036     pgoff_t end_index;
0037     unsigned long nrpages;
0038     int ret = 0;
0039 
0040     if (!f.file)
0041         return -EBADF;
0042 
0043     inode = file_inode(f.file);
0044     if (S_ISFIFO(inode->i_mode)) {
0045         ret = -ESPIPE;
0046         goto out;
0047     }
0048 
0049     mapping = f.file->f_mapping;
0050     if (!mapping || len < 0) {
0051         ret = -EINVAL;
0052         goto out;
0053     }
0054 
0055     if (IS_DAX(inode)) {
0056         switch (advice) {
0057         case POSIX_FADV_NORMAL:
0058         case POSIX_FADV_RANDOM:
0059         case POSIX_FADV_SEQUENTIAL:
0060         case POSIX_FADV_WILLNEED:
0061         case POSIX_FADV_NOREUSE:
0062         case POSIX_FADV_DONTNEED:
0063             /* no bad return value, but ignore advice */
0064             break;
0065         default:
0066             ret = -EINVAL;
0067         }
0068         goto out;
0069     }
0070 
0071     /* Careful about overflows. Len == 0 means "as much as possible" */
0072     endbyte = offset + len;
0073     if (!len || endbyte < len)
0074         endbyte = -1;
0075     else
0076         endbyte--;      /* inclusive */
0077 
0078     bdi = inode_to_bdi(mapping->host);
0079 
0080     switch (advice) {
0081     case POSIX_FADV_NORMAL:
0082         f.file->f_ra.ra_pages = bdi->ra_pages;
0083         spin_lock(&f.file->f_lock);
0084         f.file->f_mode &= ~FMODE_RANDOM;
0085         spin_unlock(&f.file->f_lock);
0086         break;
0087     case POSIX_FADV_RANDOM:
0088         spin_lock(&f.file->f_lock);
0089         f.file->f_mode |= FMODE_RANDOM;
0090         spin_unlock(&f.file->f_lock);
0091         break;
0092     case POSIX_FADV_SEQUENTIAL:
0093         f.file->f_ra.ra_pages = bdi->ra_pages * 2;
0094         spin_lock(&f.file->f_lock);
0095         f.file->f_mode &= ~FMODE_RANDOM;
0096         spin_unlock(&f.file->f_lock);
0097         break;
0098     case POSIX_FADV_WILLNEED:
0099         /* First and last PARTIAL page! */
0100         start_index = offset >> PAGE_SHIFT;
0101         end_index = endbyte >> PAGE_SHIFT;
0102 
0103         /* Careful about overflow on the "+1" */
0104         nrpages = end_index - start_index + 1;
0105         if (!nrpages)
0106             nrpages = ~0UL;
0107 
0108         /*
0109          * Ignore return value because fadvise() shall return
0110          * success even if filesystem can't retrieve a hint,
0111          */
0112         force_page_cache_readahead(mapping, f.file, start_index,
0113                        nrpages);
0114         break;
0115     case POSIX_FADV_NOREUSE:
0116         break;
0117     case POSIX_FADV_DONTNEED:
0118         if (!inode_write_congested(mapping->host))
0119             __filemap_fdatawrite_range(mapping, offset, endbyte,
0120                            WB_SYNC_NONE);
0121 
0122         /*
0123          * First and last FULL page! Partial pages are deliberately
0124          * preserved on the expectation that it is better to preserve
0125          * needed memory than to discard unneeded memory.
0126          */
0127         start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
0128         end_index = (endbyte >> PAGE_SHIFT);
0129         if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
0130             /* First page is tricky as 0 - 1 = -1, but pgoff_t
0131              * is unsigned, so the end_index >= start_index
0132              * check below would be true and we'll discard the whole
0133              * file cache which is not what was asked.
0134              */
0135             if (end_index == 0)
0136                 break;
0137 
0138             end_index--;
0139         }
0140 
0141         if (end_index >= start_index) {
0142             unsigned long count;
0143 
0144             /*
0145              * It's common to FADV_DONTNEED right after
0146              * the read or write that instantiates the
0147              * pages, in which case there will be some
0148              * sitting on the local LRU cache. Try to
0149              * avoid the expensive remote drain and the
0150              * second cache tree walk below by flushing
0151              * them out right away.
0152              */
0153             lru_add_drain();
0154 
0155             count = invalidate_mapping_pages(mapping,
0156                         start_index, end_index);
0157 
0158             /*
0159              * If fewer pages were invalidated than expected then
0160              * it is possible that some of the pages were on
0161              * a per-cpu pagevec for a remote CPU. Drain all
0162              * pagevecs and try again.
0163              */
0164             if (count < (end_index - start_index + 1)) {
0165                 lru_add_drain_all();
0166                 invalidate_mapping_pages(mapping, start_index,
0167                         end_index);
0168             }
0169         }
0170         break;
0171     default:
0172         ret = -EINVAL;
0173     }
0174 out:
0175     fdput(f);
0176     return ret;
0177 }
0178 
0179 #ifdef __ARCH_WANT_SYS_FADVISE64
0180 
0181 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
0182 {
0183     return sys_fadvise64_64(fd, offset, len, advice);
0184 }
0185 
0186 #endif