Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * mm/fadvise.c
0004  *
0005  * Copyright (C) 2002, Linus Torvalds
0006  *
0007  * 11Jan2003    Andrew Morton
0008  *      Initial version.
0009  */
0010 
0011 #include <linux/kernel.h>
0012 #include <linux/file.h>
0013 #include <linux/fs.h>
0014 #include <linux/mm.h>
0015 #include <linux/pagemap.h>
0016 #include <linux/backing-dev.h>
0017 #include <linux/pagevec.h>
0018 #include <linux/fadvise.h>
0019 #include <linux/writeback.h>
0020 #include <linux/syscalls.h>
0021 #include <linux/swap.h>
0022 
0023 #include <asm/unistd.h>
0024 
0025 #include "internal.h"
0026 
0027 /*
0028  * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
0029  * deactivate the pages and clear PG_Referenced.
0030  */
0031 
0032 int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
0033 {
0034     struct inode *inode;
0035     struct address_space *mapping;
0036     struct backing_dev_info *bdi;
0037     loff_t endbyte;         /* inclusive */
0038     pgoff_t start_index;
0039     pgoff_t end_index;
0040     unsigned long nrpages;
0041 
0042     inode = file_inode(file);
0043     if (S_ISFIFO(inode->i_mode))
0044         return -ESPIPE;
0045 
0046     mapping = file->f_mapping;
0047     if (!mapping || len < 0)
0048         return -EINVAL;
0049 
0050     bdi = inode_to_bdi(mapping->host);
0051 
0052     if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
0053         switch (advice) {
0054         case POSIX_FADV_NORMAL:
0055         case POSIX_FADV_RANDOM:
0056         case POSIX_FADV_SEQUENTIAL:
0057         case POSIX_FADV_WILLNEED:
0058         case POSIX_FADV_NOREUSE:
0059         case POSIX_FADV_DONTNEED:
0060             /* no bad return value, but ignore advice */
0061             break;
0062         default:
0063             return -EINVAL;
0064         }
0065         return 0;
0066     }
0067 
0068     /*
0069      * Careful about overflows. Len == 0 means "as much as possible".  Use
0070      * unsigned math because signed overflows are undefined and UBSan
0071      * complains.
0072      */
0073     endbyte = (u64)offset + (u64)len;
0074     if (!len || endbyte < len)
0075         endbyte = -1;
0076     else
0077         endbyte--;      /* inclusive */
0078 
0079     switch (advice) {
0080     case POSIX_FADV_NORMAL:
0081         file->f_ra.ra_pages = bdi->ra_pages;
0082         spin_lock(&file->f_lock);
0083         file->f_mode &= ~FMODE_RANDOM;
0084         spin_unlock(&file->f_lock);
0085         break;
0086     case POSIX_FADV_RANDOM:
0087         spin_lock(&file->f_lock);
0088         file->f_mode |= FMODE_RANDOM;
0089         spin_unlock(&file->f_lock);
0090         break;
0091     case POSIX_FADV_SEQUENTIAL:
0092         file->f_ra.ra_pages = bdi->ra_pages * 2;
0093         spin_lock(&file->f_lock);
0094         file->f_mode &= ~FMODE_RANDOM;
0095         spin_unlock(&file->f_lock);
0096         break;
0097     case POSIX_FADV_WILLNEED:
0098         /* First and last PARTIAL page! */
0099         start_index = offset >> PAGE_SHIFT;
0100         end_index = endbyte >> PAGE_SHIFT;
0101 
0102         /* Careful about overflow on the "+1" */
0103         nrpages = end_index - start_index + 1;
0104         if (!nrpages)
0105             nrpages = ~0UL;
0106 
0107         force_page_cache_readahead(mapping, file, start_index, nrpages);
0108         break;
0109     case POSIX_FADV_NOREUSE:
0110         break;
0111     case POSIX_FADV_DONTNEED:
0112         __filemap_fdatawrite_range(mapping, offset, endbyte,
0113                        WB_SYNC_NONE);
0114 
0115         /*
0116          * First and last FULL page! Partial pages are deliberately
0117          * preserved on the expectation that it is better to preserve
0118          * needed memory than to discard unneeded memory.
0119          */
0120         start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
0121         end_index = (endbyte >> PAGE_SHIFT);
0122         /*
0123          * The page at end_index will be inclusively discarded according
0124          * by invalidate_mapping_pages(), so subtracting 1 from
0125          * end_index means we will skip the last page.  But if endbyte
0126          * is page aligned or is at the end of file, we should not skip
0127          * that page - discarding the last page is safe enough.
0128          */
0129         if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
0130                 endbyte != inode->i_size - 1) {
0131             /* First page is tricky as 0 - 1 = -1, but pgoff_t
0132              * is unsigned, so the end_index >= start_index
0133              * check below would be true and we'll discard the whole
0134              * file cache which is not what was asked.
0135              */
0136             if (end_index == 0)
0137                 break;
0138 
0139             end_index--;
0140         }
0141 
0142         if (end_index >= start_index) {
0143             unsigned long nr_pagevec = 0;
0144 
0145             /*
0146              * It's common to FADV_DONTNEED right after
0147              * the read or write that instantiates the
0148              * pages, in which case there will be some
0149              * sitting on the local LRU cache. Try to
0150              * avoid the expensive remote drain and the
0151              * second cache tree walk below by flushing
0152              * them out right away.
0153              */
0154             lru_add_drain();
0155 
0156             invalidate_mapping_pagevec(mapping,
0157                         start_index, end_index,
0158                         &nr_pagevec);
0159 
0160             /*
0161              * If fewer pages were invalidated than expected then
0162              * it is possible that some of the pages were on
0163              * a per-cpu pagevec for a remote CPU. Drain all
0164              * pagevecs and try again.
0165              */
0166             if (nr_pagevec) {
0167                 lru_add_drain_all();
0168                 invalidate_mapping_pages(mapping, start_index,
0169                         end_index);
0170             }
0171         }
0172         break;
0173     default:
0174         return -EINVAL;
0175     }
0176     return 0;
0177 }
0178 EXPORT_SYMBOL(generic_fadvise);
0179 
0180 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
0181 {
0182     if (file->f_op->fadvise)
0183         return file->f_op->fadvise(file, offset, len, advice);
0184 
0185     return generic_fadvise(file, offset, len, advice);
0186 }
0187 EXPORT_SYMBOL(vfs_fadvise);
0188 
0189 #ifdef CONFIG_ADVISE_SYSCALLS
0190 
0191 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
0192 {
0193     struct fd f = fdget(fd);
0194     int ret;
0195 
0196     if (!f.file)
0197         return -EBADF;
0198 
0199     ret = vfs_fadvise(f.file, offset, len, advice);
0200 
0201     fdput(f);
0202     return ret;
0203 }
0204 
0205 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
0206 {
0207     return ksys_fadvise64_64(fd, offset, len, advice);
0208 }
0209 
0210 #ifdef __ARCH_WANT_SYS_FADVISE64
0211 
0212 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
0213 {
0214     return ksys_fadvise64_64(fd, offset, len, advice);
0215 }
0216 
0217 #endif
0218 
0219 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64)
0220 
0221 COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset),
0222                compat_arg_u64_dual(len), int, advice)
0223 {
0224     return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset),
0225                  compat_arg_u64_glue(len), advice);
0226 }
0227 
0228 #endif
0229 #endif