Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/read_write.c
0004  *
0005  *  Copyright (C) 1991, 1992  Linus Torvalds
0006  */
0007 
0008 #include <linux/slab.h>
0009 #include <linux/stat.h>
0010 #include <linux/sched/xacct.h>
0011 #include <linux/fcntl.h>
0012 #include <linux/file.h>
0013 #include <linux/uio.h>
0014 #include <linux/fsnotify.h>
0015 #include <linux/security.h>
0016 #include <linux/export.h>
0017 #include <linux/syscalls.h>
0018 #include <linux/pagemap.h>
0019 #include <linux/splice.h>
0020 #include <linux/compat.h>
0021 #include <linux/mount.h>
0022 #include <linux/fs.h>
0023 #include "internal.h"
0024 
0025 #include <linux/uaccess.h>
0026 #include <asm/unistd.h>
0027 
0028 const struct file_operations generic_ro_fops = {
0029     .llseek     = generic_file_llseek,
0030     .read_iter  = generic_file_read_iter,
0031     .mmap       = generic_file_readonly_mmap,
0032     .splice_read    = generic_file_splice_read,
0033 };
0034 
0035 EXPORT_SYMBOL(generic_ro_fops);
0036 
0037 static inline bool unsigned_offsets(struct file *file)
0038 {
0039     return file->f_mode & FMODE_UNSIGNED_OFFSET;
0040 }
0041 
0042 /**
0043  * vfs_setpos - update the file offset for lseek
0044  * @file:   file structure in question
0045  * @offset: file offset to seek to
0046  * @maxsize:    maximum file size
0047  *
0048  * This is a low-level filesystem helper for updating the file offset to
0049  * the value specified by @offset if the given offset is valid and it is
0050  * not equal to the current file offset.
0051  *
0052  * Return the specified offset on success and -EINVAL on invalid offset.
0053  */
0054 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
0055 {
0056     if (offset < 0 && !unsigned_offsets(file))
0057         return -EINVAL;
0058     if (offset > maxsize)
0059         return -EINVAL;
0060 
0061     if (offset != file->f_pos) {
0062         file->f_pos = offset;
0063         file->f_version = 0;
0064     }
0065     return offset;
0066 }
0067 EXPORT_SYMBOL(vfs_setpos);
0068 
0069 /**
0070  * generic_file_llseek_size - generic llseek implementation for regular files
0071  * @file:   file structure to seek on
0072  * @offset: file offset to seek to
0073  * @whence: type of seek
0074  * @size:   max size of this file in file system
0075  * @eof:    offset used for SEEK_END position
0076  *
0077  * This is a variant of generic_file_llseek that allows passing in a custom
0078  * maximum file size and a custom EOF position, for e.g. hashed directories
0079  *
0080  * Synchronization:
0081  * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
0082  * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
0083  * read/writes behave like SEEK_SET against seeks.
0084  */
0085 loff_t
0086 generic_file_llseek_size(struct file *file, loff_t offset, int whence,
0087         loff_t maxsize, loff_t eof)
0088 {
0089     switch (whence) {
0090     case SEEK_END:
0091         offset += eof;
0092         break;
0093     case SEEK_CUR:
0094         /*
0095          * Here we special-case the lseek(fd, 0, SEEK_CUR)
0096          * position-querying operation.  Avoid rewriting the "same"
0097          * f_pos value back to the file because a concurrent read(),
0098          * write() or lseek() might have altered it
0099          */
0100         if (offset == 0)
0101             return file->f_pos;
0102         /*
0103          * f_lock protects against read/modify/write race with other
0104          * SEEK_CURs. Note that parallel writes and reads behave
0105          * like SEEK_SET.
0106          */
0107         spin_lock(&file->f_lock);
0108         offset = vfs_setpos(file, file->f_pos + offset, maxsize);
0109         spin_unlock(&file->f_lock);
0110         return offset;
0111     case SEEK_DATA:
0112         /*
0113          * In the generic case the entire file is data, so as long as
0114          * offset isn't at the end of the file then the offset is data.
0115          */
0116         if ((unsigned long long)offset >= eof)
0117             return -ENXIO;
0118         break;
0119     case SEEK_HOLE:
0120         /*
0121          * There is a virtual hole at the end of the file, so as long as
0122          * offset isn't i_size or larger, return i_size.
0123          */
0124         if ((unsigned long long)offset >= eof)
0125             return -ENXIO;
0126         offset = eof;
0127         break;
0128     }
0129 
0130     return vfs_setpos(file, offset, maxsize);
0131 }
0132 EXPORT_SYMBOL(generic_file_llseek_size);
0133 
0134 /**
0135  * generic_file_llseek - generic llseek implementation for regular files
0136  * @file:   file structure to seek on
0137  * @offset: file offset to seek to
0138  * @whence: type of seek
0139  *
0140  * This is a generic implemenation of ->llseek useable for all normal local
0141  * filesystems.  It just updates the file offset to the value specified by
0142  * @offset and @whence.
0143  */
0144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
0145 {
0146     struct inode *inode = file->f_mapping->host;
0147 
0148     return generic_file_llseek_size(file, offset, whence,
0149                     inode->i_sb->s_maxbytes,
0150                     i_size_read(inode));
0151 }
0152 EXPORT_SYMBOL(generic_file_llseek);
0153 
0154 /**
0155  * fixed_size_llseek - llseek implementation for fixed-sized devices
0156  * @file:   file structure to seek on
0157  * @offset: file offset to seek to
0158  * @whence: type of seek
0159  * @size:   size of the file
0160  *
0161  */
0162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
0163 {
0164     switch (whence) {
0165     case SEEK_SET: case SEEK_CUR: case SEEK_END:
0166         return generic_file_llseek_size(file, offset, whence,
0167                         size, size);
0168     default:
0169         return -EINVAL;
0170     }
0171 }
0172 EXPORT_SYMBOL(fixed_size_llseek);
0173 
0174 /**
0175  * no_seek_end_llseek - llseek implementation for fixed-sized devices
0176  * @file:   file structure to seek on
0177  * @offset: file offset to seek to
0178  * @whence: type of seek
0179  *
0180  */
0181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
0182 {
0183     switch (whence) {
0184     case SEEK_SET: case SEEK_CUR:
0185         return generic_file_llseek_size(file, offset, whence,
0186                         OFFSET_MAX, 0);
0187     default:
0188         return -EINVAL;
0189     }
0190 }
0191 EXPORT_SYMBOL(no_seek_end_llseek);
0192 
0193 /**
0194  * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
0195  * @file:   file structure to seek on
0196  * @offset: file offset to seek to
0197  * @whence: type of seek
0198  * @size:   maximal offset allowed
0199  *
0200  */
0201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
0202 {
0203     switch (whence) {
0204     case SEEK_SET: case SEEK_CUR:
0205         return generic_file_llseek_size(file, offset, whence,
0206                         size, 0);
0207     default:
0208         return -EINVAL;
0209     }
0210 }
0211 EXPORT_SYMBOL(no_seek_end_llseek_size);
0212 
0213 /**
0214  * noop_llseek - No Operation Performed llseek implementation
0215  * @file:   file structure to seek on
0216  * @offset: file offset to seek to
0217  * @whence: type of seek
0218  *
0219  * This is an implementation of ->llseek useable for the rare special case when
0220  * userspace expects the seek to succeed but the (device) file is actually not
0221  * able to perform the seek. In this case you use noop_llseek() instead of
0222  * falling back to the default implementation of ->llseek.
0223  */
0224 loff_t noop_llseek(struct file *file, loff_t offset, int whence)
0225 {
0226     return file->f_pos;
0227 }
0228 EXPORT_SYMBOL(noop_llseek);
0229 
0230 loff_t default_llseek(struct file *file, loff_t offset, int whence)
0231 {
0232     struct inode *inode = file_inode(file);
0233     loff_t retval;
0234 
0235     inode_lock(inode);
0236     switch (whence) {
0237         case SEEK_END:
0238             offset += i_size_read(inode);
0239             break;
0240         case SEEK_CUR:
0241             if (offset == 0) {
0242                 retval = file->f_pos;
0243                 goto out;
0244             }
0245             offset += file->f_pos;
0246             break;
0247         case SEEK_DATA:
0248             /*
0249              * In the generic case the entire file is data, so as
0250              * long as offset isn't at the end of the file then the
0251              * offset is data.
0252              */
0253             if (offset >= inode->i_size) {
0254                 retval = -ENXIO;
0255                 goto out;
0256             }
0257             break;
0258         case SEEK_HOLE:
0259             /*
0260              * There is a virtual hole at the end of the file, so
0261              * as long as offset isn't i_size or larger, return
0262              * i_size.
0263              */
0264             if (offset >= inode->i_size) {
0265                 retval = -ENXIO;
0266                 goto out;
0267             }
0268             offset = inode->i_size;
0269             break;
0270     }
0271     retval = -EINVAL;
0272     if (offset >= 0 || unsigned_offsets(file)) {
0273         if (offset != file->f_pos) {
0274             file->f_pos = offset;
0275             file->f_version = 0;
0276         }
0277         retval = offset;
0278     }
0279 out:
0280     inode_unlock(inode);
0281     return retval;
0282 }
0283 EXPORT_SYMBOL(default_llseek);
0284 
0285 loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
0286 {
0287     if (!(file->f_mode & FMODE_LSEEK))
0288         return -ESPIPE;
0289     return file->f_op->llseek(file, offset, whence);
0290 }
0291 EXPORT_SYMBOL(vfs_llseek);
0292 
0293 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
0294 {
0295     off_t retval;
0296     struct fd f = fdget_pos(fd);
0297     if (!f.file)
0298         return -EBADF;
0299 
0300     retval = -EINVAL;
0301     if (whence <= SEEK_MAX) {
0302         loff_t res = vfs_llseek(f.file, offset, whence);
0303         retval = res;
0304         if (res != (loff_t)retval)
0305             retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
0306     }
0307     fdput_pos(f);
0308     return retval;
0309 }
0310 
0311 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
0312 {
0313     return ksys_lseek(fd, offset, whence);
0314 }
0315 
0316 #ifdef CONFIG_COMPAT
0317 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
0318 {
0319     return ksys_lseek(fd, offset, whence);
0320 }
0321 #endif
0322 
0323 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \
0324     defined(__ARCH_WANT_SYS_LLSEEK)
0325 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
0326         unsigned long, offset_low, loff_t __user *, result,
0327         unsigned int, whence)
0328 {
0329     int retval;
0330     struct fd f = fdget_pos(fd);
0331     loff_t offset;
0332 
0333     if (!f.file)
0334         return -EBADF;
0335 
0336     retval = -EINVAL;
0337     if (whence > SEEK_MAX)
0338         goto out_putf;
0339 
0340     offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
0341             whence);
0342 
0343     retval = (int)offset;
0344     if (offset >= 0) {
0345         retval = -EFAULT;
0346         if (!copy_to_user(result, &offset, sizeof(offset)))
0347             retval = 0;
0348     }
0349 out_putf:
0350     fdput_pos(f);
0351     return retval;
0352 }
0353 #endif
0354 
0355 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
0356 {
0357     if (unlikely((ssize_t) count < 0))
0358         return -EINVAL;
0359 
0360     if (ppos) {
0361         loff_t pos = *ppos;
0362 
0363         if (unlikely(pos < 0)) {
0364             if (!unsigned_offsets(file))
0365                 return -EINVAL;
0366             if (count >= -pos) /* both values are in 0..LLONG_MAX */
0367                 return -EOVERFLOW;
0368         } else if (unlikely((loff_t) (pos + count) < 0)) {
0369             if (!unsigned_offsets(file))
0370                 return -EINVAL;
0371         }
0372     }
0373 
0374     return security_file_permission(file,
0375                 read_write == READ ? MAY_READ : MAY_WRITE);
0376 }
0377 EXPORT_SYMBOL(rw_verify_area);
0378 
0379 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
0380 {
0381     struct kiocb kiocb;
0382     struct iov_iter iter;
0383     ssize_t ret;
0384 
0385     init_sync_kiocb(&kiocb, filp);
0386     kiocb.ki_pos = (ppos ? *ppos : 0);
0387     iov_iter_ubuf(&iter, READ, buf, len);
0388 
0389     ret = call_read_iter(filp, &kiocb, &iter);
0390     BUG_ON(ret == -EIOCBQUEUED);
0391     if (ppos)
0392         *ppos = kiocb.ki_pos;
0393     return ret;
0394 }
0395 
0396 static int warn_unsupported(struct file *file, const char *op)
0397 {
0398     pr_warn_ratelimited(
0399         "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
0400         op, file, current->pid, current->comm);
0401     return -EINVAL;
0402 }
0403 
0404 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
0405 {
0406     struct kvec iov = {
0407         .iov_base   = buf,
0408         .iov_len    = min_t(size_t, count, MAX_RW_COUNT),
0409     };
0410     struct kiocb kiocb;
0411     struct iov_iter iter;
0412     ssize_t ret;
0413 
0414     if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
0415         return -EINVAL;
0416     if (!(file->f_mode & FMODE_CAN_READ))
0417         return -EINVAL;
0418     /*
0419      * Also fail if ->read_iter and ->read are both wired up as that
0420      * implies very convoluted semantics.
0421      */
0422     if (unlikely(!file->f_op->read_iter || file->f_op->read))
0423         return warn_unsupported(file, "read");
0424 
0425     init_sync_kiocb(&kiocb, file);
0426     kiocb.ki_pos = pos ? *pos : 0;
0427     iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
0428     ret = file->f_op->read_iter(&kiocb, &iter);
0429     if (ret > 0) {
0430         if (pos)
0431             *pos = kiocb.ki_pos;
0432         fsnotify_access(file);
0433         add_rchar(current, ret);
0434     }
0435     inc_syscr(current);
0436     return ret;
0437 }
0438 
0439 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
0440 {
0441     ssize_t ret;
0442 
0443     ret = rw_verify_area(READ, file, pos, count);
0444     if (ret)
0445         return ret;
0446     return __kernel_read(file, buf, count, pos);
0447 }
0448 EXPORT_SYMBOL(kernel_read);
0449 
0450 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
0451 {
0452     ssize_t ret;
0453 
0454     if (!(file->f_mode & FMODE_READ))
0455         return -EBADF;
0456     if (!(file->f_mode & FMODE_CAN_READ))
0457         return -EINVAL;
0458     if (unlikely(!access_ok(buf, count)))
0459         return -EFAULT;
0460 
0461     ret = rw_verify_area(READ, file, pos, count);
0462     if (ret)
0463         return ret;
0464     if (count > MAX_RW_COUNT)
0465         count =  MAX_RW_COUNT;
0466 
0467     if (file->f_op->read)
0468         ret = file->f_op->read(file, buf, count, pos);
0469     else if (file->f_op->read_iter)
0470         ret = new_sync_read(file, buf, count, pos);
0471     else
0472         ret = -EINVAL;
0473     if (ret > 0) {
0474         fsnotify_access(file);
0475         add_rchar(current, ret);
0476     }
0477     inc_syscr(current);
0478     return ret;
0479 }
0480 
0481 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
0482 {
0483     struct kiocb kiocb;
0484     struct iov_iter iter;
0485     ssize_t ret;
0486 
0487     init_sync_kiocb(&kiocb, filp);
0488     kiocb.ki_pos = (ppos ? *ppos : 0);
0489     iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len);
0490 
0491     ret = call_write_iter(filp, &kiocb, &iter);
0492     BUG_ON(ret == -EIOCBQUEUED);
0493     if (ret > 0 && ppos)
0494         *ppos = kiocb.ki_pos;
0495     return ret;
0496 }
0497 
0498 /* caller is responsible for file_start_write/file_end_write */
0499 ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
0500 {
0501     struct kiocb kiocb;
0502     ssize_t ret;
0503 
0504     if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
0505         return -EBADF;
0506     if (!(file->f_mode & FMODE_CAN_WRITE))
0507         return -EINVAL;
0508     /*
0509      * Also fail if ->write_iter and ->write are both wired up as that
0510      * implies very convoluted semantics.
0511      */
0512     if (unlikely(!file->f_op->write_iter || file->f_op->write))
0513         return warn_unsupported(file, "write");
0514 
0515     init_sync_kiocb(&kiocb, file);
0516     kiocb.ki_pos = pos ? *pos : 0;
0517     ret = file->f_op->write_iter(&kiocb, from);
0518     if (ret > 0) {
0519         if (pos)
0520             *pos = kiocb.ki_pos;
0521         fsnotify_modify(file);
0522         add_wchar(current, ret);
0523     }
0524     inc_syscw(current);
0525     return ret;
0526 }
0527 
0528 /* caller is responsible for file_start_write/file_end_write */
0529 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
0530 {
0531     struct kvec iov = {
0532         .iov_base   = (void *)buf,
0533         .iov_len    = min_t(size_t, count, MAX_RW_COUNT),
0534     };
0535     struct iov_iter iter;
0536     iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
0537     return __kernel_write_iter(file, &iter, pos);
0538 }
0539 /*
0540  * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
0541  * but autofs is one of the few internal kernel users that actually
0542  * wants this _and_ can be built as a module. So we need to export
0543  * this symbol for autofs, even though it really isn't appropriate
0544  * for any other kernel modules.
0545  */
0546 EXPORT_SYMBOL_GPL(__kernel_write);
0547 
0548 ssize_t kernel_write(struct file *file, const void *buf, size_t count,
0549                 loff_t *pos)
0550 {
0551     ssize_t ret;
0552 
0553     ret = rw_verify_area(WRITE, file, pos, count);
0554     if (ret)
0555         return ret;
0556 
0557     file_start_write(file);
0558     ret =  __kernel_write(file, buf, count, pos);
0559     file_end_write(file);
0560     return ret;
0561 }
0562 EXPORT_SYMBOL(kernel_write);
0563 
0564 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
0565 {
0566     ssize_t ret;
0567 
0568     if (!(file->f_mode & FMODE_WRITE))
0569         return -EBADF;
0570     if (!(file->f_mode & FMODE_CAN_WRITE))
0571         return -EINVAL;
0572     if (unlikely(!access_ok(buf, count)))
0573         return -EFAULT;
0574 
0575     ret = rw_verify_area(WRITE, file, pos, count);
0576     if (ret)
0577         return ret;
0578     if (count > MAX_RW_COUNT)
0579         count =  MAX_RW_COUNT;
0580     file_start_write(file);
0581     if (file->f_op->write)
0582         ret = file->f_op->write(file, buf, count, pos);
0583     else if (file->f_op->write_iter)
0584         ret = new_sync_write(file, buf, count, pos);
0585     else
0586         ret = -EINVAL;
0587     if (ret > 0) {
0588         fsnotify_modify(file);
0589         add_wchar(current, ret);
0590     }
0591     inc_syscw(current);
0592     file_end_write(file);
0593     return ret;
0594 }
0595 
0596 /* file_ppos returns &file->f_pos or NULL if file is stream */
0597 static inline loff_t *file_ppos(struct file *file)
0598 {
0599     return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
0600 }
0601 
0602 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
0603 {
0604     struct fd f = fdget_pos(fd);
0605     ssize_t ret = -EBADF;
0606 
0607     if (f.file) {
0608         loff_t pos, *ppos = file_ppos(f.file);
0609         if (ppos) {
0610             pos = *ppos;
0611             ppos = &pos;
0612         }
0613         ret = vfs_read(f.file, buf, count, ppos);
0614         if (ret >= 0 && ppos)
0615             f.file->f_pos = pos;
0616         fdput_pos(f);
0617     }
0618     return ret;
0619 }
0620 
0621 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
0622 {
0623     return ksys_read(fd, buf, count);
0624 }
0625 
0626 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
0627 {
0628     struct fd f = fdget_pos(fd);
0629     ssize_t ret = -EBADF;
0630 
0631     if (f.file) {
0632         loff_t pos, *ppos = file_ppos(f.file);
0633         if (ppos) {
0634             pos = *ppos;
0635             ppos = &pos;
0636         }
0637         ret = vfs_write(f.file, buf, count, ppos);
0638         if (ret >= 0 && ppos)
0639             f.file->f_pos = pos;
0640         fdput_pos(f);
0641     }
0642 
0643     return ret;
0644 }
0645 
0646 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
0647         size_t, count)
0648 {
0649     return ksys_write(fd, buf, count);
0650 }
0651 
0652 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
0653              loff_t pos)
0654 {
0655     struct fd f;
0656     ssize_t ret = -EBADF;
0657 
0658     if (pos < 0)
0659         return -EINVAL;
0660 
0661     f = fdget(fd);
0662     if (f.file) {
0663         ret = -ESPIPE;
0664         if (f.file->f_mode & FMODE_PREAD)
0665             ret = vfs_read(f.file, buf, count, &pos);
0666         fdput(f);
0667     }
0668 
0669     return ret;
0670 }
0671 
0672 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
0673             size_t, count, loff_t, pos)
0674 {
0675     return ksys_pread64(fd, buf, count, pos);
0676 }
0677 
0678 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64)
0679 COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
0680                size_t, count, compat_arg_u64_dual(pos))
0681 {
0682     return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos));
0683 }
0684 #endif
0685 
0686 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
0687               size_t count, loff_t pos)
0688 {
0689     struct fd f;
0690     ssize_t ret = -EBADF;
0691 
0692     if (pos < 0)
0693         return -EINVAL;
0694 
0695     f = fdget(fd);
0696     if (f.file) {
0697         ret = -ESPIPE;
0698         if (f.file->f_mode & FMODE_PWRITE)  
0699             ret = vfs_write(f.file, buf, count, &pos);
0700         fdput(f);
0701     }
0702 
0703     return ret;
0704 }
0705 
0706 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
0707              size_t, count, loff_t, pos)
0708 {
0709     return ksys_pwrite64(fd, buf, count, pos);
0710 }
0711 
0712 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64)
0713 COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf,
0714                size_t, count, compat_arg_u64_dual(pos))
0715 {
0716     return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos));
0717 }
0718 #endif
0719 
0720 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
0721         loff_t *ppos, int type, rwf_t flags)
0722 {
0723     struct kiocb kiocb;
0724     ssize_t ret;
0725 
0726     init_sync_kiocb(&kiocb, filp);
0727     ret = kiocb_set_rw_flags(&kiocb, flags);
0728     if (ret)
0729         return ret;
0730     kiocb.ki_pos = (ppos ? *ppos : 0);
0731 
0732     if (type == READ)
0733         ret = call_read_iter(filp, &kiocb, iter);
0734     else
0735         ret = call_write_iter(filp, &kiocb, iter);
0736     BUG_ON(ret == -EIOCBQUEUED);
0737     if (ppos)
0738         *ppos = kiocb.ki_pos;
0739     return ret;
0740 }
0741 
0742 /* Do it by hand, with file-ops */
0743 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
0744         loff_t *ppos, int type, rwf_t flags)
0745 {
0746     ssize_t ret = 0;
0747 
0748     if (flags & ~RWF_HIPRI)
0749         return -EOPNOTSUPP;
0750 
0751     while (iov_iter_count(iter)) {
0752         struct iovec iovec = iov_iter_iovec(iter);
0753         ssize_t nr;
0754 
0755         if (type == READ) {
0756             nr = filp->f_op->read(filp, iovec.iov_base,
0757                           iovec.iov_len, ppos);
0758         } else {
0759             nr = filp->f_op->write(filp, iovec.iov_base,
0760                            iovec.iov_len, ppos);
0761         }
0762 
0763         if (nr < 0) {
0764             if (!ret)
0765                 ret = nr;
0766             break;
0767         }
0768         ret += nr;
0769         if (nr != iovec.iov_len)
0770             break;
0771         iov_iter_advance(iter, nr);
0772     }
0773 
0774     return ret;
0775 }
0776 
0777 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
0778         loff_t *pos, rwf_t flags)
0779 {
0780     size_t tot_len;
0781     ssize_t ret = 0;
0782 
0783     if (!(file->f_mode & FMODE_READ))
0784         return -EBADF;
0785     if (!(file->f_mode & FMODE_CAN_READ))
0786         return -EINVAL;
0787 
0788     tot_len = iov_iter_count(iter);
0789     if (!tot_len)
0790         goto out;
0791     ret = rw_verify_area(READ, file, pos, tot_len);
0792     if (ret < 0)
0793         return ret;
0794 
0795     if (file->f_op->read_iter)
0796         ret = do_iter_readv_writev(file, iter, pos, READ, flags);
0797     else
0798         ret = do_loop_readv_writev(file, iter, pos, READ, flags);
0799 out:
0800     if (ret >= 0)
0801         fsnotify_access(file);
0802     return ret;
0803 }
0804 
0805 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
0806                struct iov_iter *iter)
0807 {
0808     size_t tot_len;
0809     ssize_t ret = 0;
0810 
0811     if (!file->f_op->read_iter)
0812         return -EINVAL;
0813     if (!(file->f_mode & FMODE_READ))
0814         return -EBADF;
0815     if (!(file->f_mode & FMODE_CAN_READ))
0816         return -EINVAL;
0817 
0818     tot_len = iov_iter_count(iter);
0819     if (!tot_len)
0820         goto out;
0821     ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
0822     if (ret < 0)
0823         return ret;
0824 
0825     ret = call_read_iter(file, iocb, iter);
0826 out:
0827     if (ret >= 0)
0828         fsnotify_access(file);
0829     return ret;
0830 }
0831 EXPORT_SYMBOL(vfs_iocb_iter_read);
0832 
0833 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
0834         rwf_t flags)
0835 {
0836     if (!file->f_op->read_iter)
0837         return -EINVAL;
0838     return do_iter_read(file, iter, ppos, flags);
0839 }
0840 EXPORT_SYMBOL(vfs_iter_read);
0841 
0842 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
0843         loff_t *pos, rwf_t flags)
0844 {
0845     size_t tot_len;
0846     ssize_t ret = 0;
0847 
0848     if (!(file->f_mode & FMODE_WRITE))
0849         return -EBADF;
0850     if (!(file->f_mode & FMODE_CAN_WRITE))
0851         return -EINVAL;
0852 
0853     tot_len = iov_iter_count(iter);
0854     if (!tot_len)
0855         return 0;
0856     ret = rw_verify_area(WRITE, file, pos, tot_len);
0857     if (ret < 0)
0858         return ret;
0859 
0860     if (file->f_op->write_iter)
0861         ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
0862     else
0863         ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
0864     if (ret > 0)
0865         fsnotify_modify(file);
0866     return ret;
0867 }
0868 
0869 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
0870                 struct iov_iter *iter)
0871 {
0872     size_t tot_len;
0873     ssize_t ret = 0;
0874 
0875     if (!file->f_op->write_iter)
0876         return -EINVAL;
0877     if (!(file->f_mode & FMODE_WRITE))
0878         return -EBADF;
0879     if (!(file->f_mode & FMODE_CAN_WRITE))
0880         return -EINVAL;
0881 
0882     tot_len = iov_iter_count(iter);
0883     if (!tot_len)
0884         return 0;
0885     ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
0886     if (ret < 0)
0887         return ret;
0888 
0889     ret = call_write_iter(file, iocb, iter);
0890     if (ret > 0)
0891         fsnotify_modify(file);
0892 
0893     return ret;
0894 }
0895 EXPORT_SYMBOL(vfs_iocb_iter_write);
0896 
0897 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
0898         rwf_t flags)
0899 {
0900     if (!file->f_op->write_iter)
0901         return -EINVAL;
0902     return do_iter_write(file, iter, ppos, flags);
0903 }
0904 EXPORT_SYMBOL(vfs_iter_write);
0905 
0906 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
0907           unsigned long vlen, loff_t *pos, rwf_t flags)
0908 {
0909     struct iovec iovstack[UIO_FASTIOV];
0910     struct iovec *iov = iovstack;
0911     struct iov_iter iter;
0912     ssize_t ret;
0913 
0914     ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
0915     if (ret >= 0) {
0916         ret = do_iter_read(file, &iter, pos, flags);
0917         kfree(iov);
0918     }
0919 
0920     return ret;
0921 }
0922 
0923 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
0924            unsigned long vlen, loff_t *pos, rwf_t flags)
0925 {
0926     struct iovec iovstack[UIO_FASTIOV];
0927     struct iovec *iov = iovstack;
0928     struct iov_iter iter;
0929     ssize_t ret;
0930 
0931     ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
0932     if (ret >= 0) {
0933         file_start_write(file);
0934         ret = do_iter_write(file, &iter, pos, flags);
0935         file_end_write(file);
0936         kfree(iov);
0937     }
0938     return ret;
0939 }
0940 
0941 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
0942             unsigned long vlen, rwf_t flags)
0943 {
0944     struct fd f = fdget_pos(fd);
0945     ssize_t ret = -EBADF;
0946 
0947     if (f.file) {
0948         loff_t pos, *ppos = file_ppos(f.file);
0949         if (ppos) {
0950             pos = *ppos;
0951             ppos = &pos;
0952         }
0953         ret = vfs_readv(f.file, vec, vlen, ppos, flags);
0954         if (ret >= 0 && ppos)
0955             f.file->f_pos = pos;
0956         fdput_pos(f);
0957     }
0958 
0959     if (ret > 0)
0960         add_rchar(current, ret);
0961     inc_syscr(current);
0962     return ret;
0963 }
0964 
0965 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
0966              unsigned long vlen, rwf_t flags)
0967 {
0968     struct fd f = fdget_pos(fd);
0969     ssize_t ret = -EBADF;
0970 
0971     if (f.file) {
0972         loff_t pos, *ppos = file_ppos(f.file);
0973         if (ppos) {
0974             pos = *ppos;
0975             ppos = &pos;
0976         }
0977         ret = vfs_writev(f.file, vec, vlen, ppos, flags);
0978         if (ret >= 0 && ppos)
0979             f.file->f_pos = pos;
0980         fdput_pos(f);
0981     }
0982 
0983     if (ret > 0)
0984         add_wchar(current, ret);
0985     inc_syscw(current);
0986     return ret;
0987 }
0988 
0989 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
0990 {
0991 #define HALF_LONG_BITS (BITS_PER_LONG / 2)
0992     return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
0993 }
0994 
0995 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
0996              unsigned long vlen, loff_t pos, rwf_t flags)
0997 {
0998     struct fd f;
0999     ssize_t ret = -EBADF;
1000 
1001     if (pos < 0)
1002         return -EINVAL;
1003 
1004     f = fdget(fd);
1005     if (f.file) {
1006         ret = -ESPIPE;
1007         if (f.file->f_mode & FMODE_PREAD)
1008             ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1009         fdput(f);
1010     }
1011 
1012     if (ret > 0)
1013         add_rchar(current, ret);
1014     inc_syscr(current);
1015     return ret;
1016 }
1017 
1018 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1019               unsigned long vlen, loff_t pos, rwf_t flags)
1020 {
1021     struct fd f;
1022     ssize_t ret = -EBADF;
1023 
1024     if (pos < 0)
1025         return -EINVAL;
1026 
1027     f = fdget(fd);
1028     if (f.file) {
1029         ret = -ESPIPE;
1030         if (f.file->f_mode & FMODE_PWRITE)
1031             ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1032         fdput(f);
1033     }
1034 
1035     if (ret > 0)
1036         add_wchar(current, ret);
1037     inc_syscw(current);
1038     return ret;
1039 }
1040 
1041 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1042         unsigned long, vlen)
1043 {
1044     return do_readv(fd, vec, vlen, 0);
1045 }
1046 
1047 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1048         unsigned long, vlen)
1049 {
1050     return do_writev(fd, vec, vlen, 0);
1051 }
1052 
1053 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1054         unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1055 {
1056     loff_t pos = pos_from_hilo(pos_h, pos_l);
1057 
1058     return do_preadv(fd, vec, vlen, pos, 0);
1059 }
1060 
1061 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1062         unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1063         rwf_t, flags)
1064 {
1065     loff_t pos = pos_from_hilo(pos_h, pos_l);
1066 
1067     if (pos == -1)
1068         return do_readv(fd, vec, vlen, flags);
1069 
1070     return do_preadv(fd, vec, vlen, pos, flags);
1071 }
1072 
1073 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1074         unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1075 {
1076     loff_t pos = pos_from_hilo(pos_h, pos_l);
1077 
1078     return do_pwritev(fd, vec, vlen, pos, 0);
1079 }
1080 
1081 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1082         unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1083         rwf_t, flags)
1084 {
1085     loff_t pos = pos_from_hilo(pos_h, pos_l);
1086 
1087     if (pos == -1)
1088         return do_writev(fd, vec, vlen, flags);
1089 
1090     return do_pwritev(fd, vec, vlen, pos, flags);
1091 }
1092 
1093 /*
1094  * Various compat syscalls.  Note that they all pretend to take a native
1095  * iovec - import_iovec will properly treat those as compat_iovecs based on
1096  * in_compat_syscall().
1097  */
1098 #ifdef CONFIG_COMPAT
1099 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1100 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1101         const struct iovec __user *, vec,
1102         unsigned long, vlen, loff_t, pos)
1103 {
1104     return do_preadv(fd, vec, vlen, pos, 0);
1105 }
1106 #endif
1107 
1108 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1109         const struct iovec __user *, vec,
1110         compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1111 {
1112     loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1113 
1114     return do_preadv(fd, vec, vlen, pos, 0);
1115 }
1116 
1117 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1118 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1119         const struct iovec __user *, vec,
1120         unsigned long, vlen, loff_t, pos, rwf_t, flags)
1121 {
1122     if (pos == -1)
1123         return do_readv(fd, vec, vlen, flags);
1124     return do_preadv(fd, vec, vlen, pos, flags);
1125 }
1126 #endif
1127 
1128 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1129         const struct iovec __user *, vec,
1130         compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1131         rwf_t, flags)
1132 {
1133     loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1134 
1135     if (pos == -1)
1136         return do_readv(fd, vec, vlen, flags);
1137     return do_preadv(fd, vec, vlen, pos, flags);
1138 }
1139 
1140 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1141 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1142         const struct iovec __user *, vec,
1143         unsigned long, vlen, loff_t, pos)
1144 {
1145     return do_pwritev(fd, vec, vlen, pos, 0);
1146 }
1147 #endif
1148 
1149 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1150         const struct iovec __user *,vec,
1151         compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1152 {
1153     loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1154 
1155     return do_pwritev(fd, vec, vlen, pos, 0);
1156 }
1157 
1158 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1159 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1160         const struct iovec __user *, vec,
1161         unsigned long, vlen, loff_t, pos, rwf_t, flags)
1162 {
1163     if (pos == -1)
1164         return do_writev(fd, vec, vlen, flags);
1165     return do_pwritev(fd, vec, vlen, pos, flags);
1166 }
1167 #endif
1168 
1169 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1170         const struct iovec __user *,vec,
1171         compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1172 {
1173     loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1174 
1175     if (pos == -1)
1176         return do_writev(fd, vec, vlen, flags);
1177     return do_pwritev(fd, vec, vlen, pos, flags);
1178 }
1179 #endif /* CONFIG_COMPAT */
1180 
1181 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1182                size_t count, loff_t max)
1183 {
1184     struct fd in, out;
1185     struct inode *in_inode, *out_inode;
1186     struct pipe_inode_info *opipe;
1187     loff_t pos;
1188     loff_t out_pos;
1189     ssize_t retval;
1190     int fl;
1191 
1192     /*
1193      * Get input file, and verify that it is ok..
1194      */
1195     retval = -EBADF;
1196     in = fdget(in_fd);
1197     if (!in.file)
1198         goto out;
1199     if (!(in.file->f_mode & FMODE_READ))
1200         goto fput_in;
1201     retval = -ESPIPE;
1202     if (!ppos) {
1203         pos = in.file->f_pos;
1204     } else {
1205         pos = *ppos;
1206         if (!(in.file->f_mode & FMODE_PREAD))
1207             goto fput_in;
1208     }
1209     retval = rw_verify_area(READ, in.file, &pos, count);
1210     if (retval < 0)
1211         goto fput_in;
1212     if (count > MAX_RW_COUNT)
1213         count =  MAX_RW_COUNT;
1214 
1215     /*
1216      * Get output file, and verify that it is ok..
1217      */
1218     retval = -EBADF;
1219     out = fdget(out_fd);
1220     if (!out.file)
1221         goto fput_in;
1222     if (!(out.file->f_mode & FMODE_WRITE))
1223         goto fput_out;
1224     in_inode = file_inode(in.file);
1225     out_inode = file_inode(out.file);
1226     out_pos = out.file->f_pos;
1227 
1228     if (!max)
1229         max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1230 
1231     if (unlikely(pos + count > max)) {
1232         retval = -EOVERFLOW;
1233         if (pos >= max)
1234             goto fput_out;
1235         count = max - pos;
1236     }
1237 
1238     fl = 0;
1239 #if 0
1240     /*
1241      * We need to debate whether we can enable this or not. The
1242      * man page documents EAGAIN return for the output at least,
1243      * and the application is arguably buggy if it doesn't expect
1244      * EAGAIN on a non-blocking file descriptor.
1245      */
1246     if (in.file->f_flags & O_NONBLOCK)
1247         fl = SPLICE_F_NONBLOCK;
1248 #endif
1249     opipe = get_pipe_info(out.file, true);
1250     if (!opipe) {
1251         retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1252         if (retval < 0)
1253             goto fput_out;
1254         file_start_write(out.file);
1255         retval = do_splice_direct(in.file, &pos, out.file, &out_pos,
1256                       count, fl);
1257         file_end_write(out.file);
1258     } else {
1259         if (out.file->f_flags & O_NONBLOCK)
1260             fl |= SPLICE_F_NONBLOCK;
1261 
1262         retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
1263     }
1264 
1265     if (retval > 0) {
1266         add_rchar(current, retval);
1267         add_wchar(current, retval);
1268         fsnotify_access(in.file);
1269         fsnotify_modify(out.file);
1270         out.file->f_pos = out_pos;
1271         if (ppos)
1272             *ppos = pos;
1273         else
1274             in.file->f_pos = pos;
1275     }
1276 
1277     inc_syscr(current);
1278     inc_syscw(current);
1279     if (pos > max)
1280         retval = -EOVERFLOW;
1281 
1282 fput_out:
1283     fdput(out);
1284 fput_in:
1285     fdput(in);
1286 out:
1287     return retval;
1288 }
1289 
1290 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1291 {
1292     loff_t pos;
1293     off_t off;
1294     ssize_t ret;
1295 
1296     if (offset) {
1297         if (unlikely(get_user(off, offset)))
1298             return -EFAULT;
1299         pos = off;
1300         ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1301         if (unlikely(put_user(pos, offset)))
1302             return -EFAULT;
1303         return ret;
1304     }
1305 
1306     return do_sendfile(out_fd, in_fd, NULL, count, 0);
1307 }
1308 
1309 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1310 {
1311     loff_t pos;
1312     ssize_t ret;
1313 
1314     if (offset) {
1315         if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1316             return -EFAULT;
1317         ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1318         if (unlikely(put_user(pos, offset)))
1319             return -EFAULT;
1320         return ret;
1321     }
1322 
1323     return do_sendfile(out_fd, in_fd, NULL, count, 0);
1324 }
1325 
1326 #ifdef CONFIG_COMPAT
1327 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1328         compat_off_t __user *, offset, compat_size_t, count)
1329 {
1330     loff_t pos;
1331     off_t off;
1332     ssize_t ret;
1333 
1334     if (offset) {
1335         if (unlikely(get_user(off, offset)))
1336             return -EFAULT;
1337         pos = off;
1338         ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1339         if (unlikely(put_user(pos, offset)))
1340             return -EFAULT;
1341         return ret;
1342     }
1343 
1344     return do_sendfile(out_fd, in_fd, NULL, count, 0);
1345 }
1346 
1347 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1348         compat_loff_t __user *, offset, compat_size_t, count)
1349 {
1350     loff_t pos;
1351     ssize_t ret;
1352 
1353     if (offset) {
1354         if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1355             return -EFAULT;
1356         ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1357         if (unlikely(put_user(pos, offset)))
1358             return -EFAULT;
1359         return ret;
1360     }
1361 
1362     return do_sendfile(out_fd, in_fd, NULL, count, 0);
1363 }
1364 #endif
1365 
1366 /**
1367  * generic_copy_file_range - copy data between two files
1368  * @file_in:    file structure to read from
1369  * @pos_in: file offset to read from
1370  * @file_out:   file structure to write data to
1371  * @pos_out:    file offset to write data to
1372  * @len:    amount of data to copy
1373  * @flags:  copy flags
1374  *
1375  * This is a generic filesystem helper to copy data from one file to another.
1376  * It has no constraints on the source or destination file owners - the files
1377  * can belong to different superblocks and different filesystem types. Short
1378  * copies are allowed.
1379  *
1380  * This should be called from the @file_out filesystem, as per the
1381  * ->copy_file_range() method.
1382  *
1383  * Returns the number of bytes copied or a negative error indicating the
1384  * failure.
1385  */
1386 
1387 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1388                 struct file *file_out, loff_t pos_out,
1389                 size_t len, unsigned int flags)
1390 {
1391     return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1392                 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1393 }
1394 EXPORT_SYMBOL(generic_copy_file_range);
1395 
1396 /*
1397  * Performs necessary checks before doing a file copy
1398  *
1399  * Can adjust amount of bytes to copy via @req_count argument.
1400  * Returns appropriate error code that caller should return or
1401  * zero in case the copy should be allowed.
1402  */
1403 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
1404                     struct file *file_out, loff_t pos_out,
1405                     size_t *req_count, unsigned int flags)
1406 {
1407     struct inode *inode_in = file_inode(file_in);
1408     struct inode *inode_out = file_inode(file_out);
1409     uint64_t count = *req_count;
1410     loff_t size_in;
1411     int ret;
1412 
1413     ret = generic_file_rw_checks(file_in, file_out);
1414     if (ret)
1415         return ret;
1416 
1417     /*
1418      * We allow some filesystems to handle cross sb copy, but passing
1419      * a file of the wrong filesystem type to filesystem driver can result
1420      * in an attempt to dereference the wrong type of ->private_data, so
1421      * avoid doing that until we really have a good reason.
1422      *
1423      * nfs and cifs define several different file_system_type structures
1424      * and several different sets of file_operations, but they all end up
1425      * using the same ->copy_file_range() function pointer.
1426      */
1427     if (file_out->f_op->copy_file_range) {
1428         if (file_in->f_op->copy_file_range !=
1429             file_out->f_op->copy_file_range)
1430             return -EXDEV;
1431     } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
1432         return -EXDEV;
1433     }
1434 
1435     /* Don't touch certain kinds of inodes */
1436     if (IS_IMMUTABLE(inode_out))
1437         return -EPERM;
1438 
1439     if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1440         return -ETXTBSY;
1441 
1442     /* Ensure offsets don't wrap. */
1443     if (pos_in + count < pos_in || pos_out + count < pos_out)
1444         return -EOVERFLOW;
1445 
1446     /* Shorten the copy to EOF */
1447     size_in = i_size_read(inode_in);
1448     if (pos_in >= size_in)
1449         count = 0;
1450     else
1451         count = min(count, size_in - (uint64_t)pos_in);
1452 
1453     ret = generic_write_check_limits(file_out, pos_out, &count);
1454     if (ret)
1455         return ret;
1456 
1457     /* Don't allow overlapped copying within the same file. */
1458     if (inode_in == inode_out &&
1459         pos_out + count > pos_in &&
1460         pos_out < pos_in + count)
1461         return -EINVAL;
1462 
1463     *req_count = count;
1464     return 0;
1465 }
1466 
1467 /*
1468  * copy_file_range() differs from regular file read and write in that it
1469  * specifically allows return partial success.  When it does so is up to
1470  * the copy_file_range method.
1471  */
1472 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1473                 struct file *file_out, loff_t pos_out,
1474                 size_t len, unsigned int flags)
1475 {
1476     ssize_t ret;
1477 
1478     if (flags != 0)
1479         return -EINVAL;
1480 
1481     ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
1482                        flags);
1483     if (unlikely(ret))
1484         return ret;
1485 
1486     ret = rw_verify_area(READ, file_in, &pos_in, len);
1487     if (unlikely(ret))
1488         return ret;
1489 
1490     ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1491     if (unlikely(ret))
1492         return ret;
1493 
1494     if (len == 0)
1495         return 0;
1496 
1497     file_start_write(file_out);
1498 
1499     /*
1500      * Cloning is supported by more file systems, so we implement copy on
1501      * same sb using clone, but for filesystems where both clone and copy
1502      * are supported (e.g. nfs,cifs), we only call the copy method.
1503      */
1504     if (file_out->f_op->copy_file_range) {
1505         ret = file_out->f_op->copy_file_range(file_in, pos_in,
1506                               file_out, pos_out,
1507                               len, flags);
1508         goto done;
1509     }
1510 
1511     if (file_in->f_op->remap_file_range &&
1512         file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
1513         ret = file_in->f_op->remap_file_range(file_in, pos_in,
1514                 file_out, pos_out,
1515                 min_t(loff_t, MAX_RW_COUNT, len),
1516                 REMAP_FILE_CAN_SHORTEN);
1517         if (ret > 0)
1518             goto done;
1519     }
1520 
1521     /*
1522      * We can get here for same sb copy of filesystems that do not implement
1523      * ->copy_file_range() in case filesystem does not support clone or in
1524      * case filesystem supports clone but rejected the clone request (e.g.
1525      * because it was not block aligned).
1526      *
1527      * In both cases, fall back to kernel copy so we are able to maintain a
1528      * consistent story about which filesystems support copy_file_range()
1529      * and which filesystems do not, that will allow userspace tools to
1530      * make consistent desicions w.r.t using copy_file_range().
1531      */
1532     ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1533                       flags);
1534 
1535 done:
1536     if (ret > 0) {
1537         fsnotify_access(file_in);
1538         add_rchar(current, ret);
1539         fsnotify_modify(file_out);
1540         add_wchar(current, ret);
1541     }
1542 
1543     inc_syscr(current);
1544     inc_syscw(current);
1545 
1546     file_end_write(file_out);
1547 
1548     return ret;
1549 }
1550 EXPORT_SYMBOL(vfs_copy_file_range);
1551 
1552 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1553         int, fd_out, loff_t __user *, off_out,
1554         size_t, len, unsigned int, flags)
1555 {
1556     loff_t pos_in;
1557     loff_t pos_out;
1558     struct fd f_in;
1559     struct fd f_out;
1560     ssize_t ret = -EBADF;
1561 
1562     f_in = fdget(fd_in);
1563     if (!f_in.file)
1564         goto out2;
1565 
1566     f_out = fdget(fd_out);
1567     if (!f_out.file)
1568         goto out1;
1569 
1570     ret = -EFAULT;
1571     if (off_in) {
1572         if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1573             goto out;
1574     } else {
1575         pos_in = f_in.file->f_pos;
1576     }
1577 
1578     if (off_out) {
1579         if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1580             goto out;
1581     } else {
1582         pos_out = f_out.file->f_pos;
1583     }
1584 
1585     ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1586                   flags);
1587     if (ret > 0) {
1588         pos_in += ret;
1589         pos_out += ret;
1590 
1591         if (off_in) {
1592             if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1593                 ret = -EFAULT;
1594         } else {
1595             f_in.file->f_pos = pos_in;
1596         }
1597 
1598         if (off_out) {
1599             if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1600                 ret = -EFAULT;
1601         } else {
1602             f_out.file->f_pos = pos_out;
1603         }
1604     }
1605 
1606 out:
1607     fdput(f_out);
1608 out1:
1609     fdput(f_in);
1610 out2:
1611     return ret;
1612 }
1613 
1614 /*
1615  * Don't operate on ranges the page cache doesn't support, and don't exceed the
1616  * LFS limits.  If pos is under the limit it becomes a short access.  If it
1617  * exceeds the limit we return -EFBIG.
1618  */
1619 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
1620 {
1621     struct inode *inode = file->f_mapping->host;
1622     loff_t max_size = inode->i_sb->s_maxbytes;
1623     loff_t limit = rlimit(RLIMIT_FSIZE);
1624 
1625     if (limit != RLIM_INFINITY) {
1626         if (pos >= limit) {
1627             send_sig(SIGXFSZ, current, 0);
1628             return -EFBIG;
1629         }
1630         *count = min(*count, limit - pos);
1631     }
1632 
1633     if (!(file->f_flags & O_LARGEFILE))
1634         max_size = MAX_NON_LFS;
1635 
1636     if (unlikely(pos >= max_size))
1637         return -EFBIG;
1638 
1639     *count = min(*count, max_size - pos);
1640 
1641     return 0;
1642 }
1643 
1644 /* Like generic_write_checks(), but takes size of write instead of iter. */
1645 int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
1646 {
1647     struct file *file = iocb->ki_filp;
1648     struct inode *inode = file->f_mapping->host;
1649 
1650     if (IS_SWAPFILE(inode))
1651         return -ETXTBSY;
1652 
1653     if (!*count)
1654         return 0;
1655 
1656     if (iocb->ki_flags & IOCB_APPEND)
1657         iocb->ki_pos = i_size_read(inode);
1658 
1659     if ((iocb->ki_flags & IOCB_NOWAIT) &&
1660         !((iocb->ki_flags & IOCB_DIRECT) ||
1661           (file->f_mode & FMODE_BUF_WASYNC)))
1662         return -EINVAL;
1663 
1664     return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
1665 }
1666 EXPORT_SYMBOL(generic_write_checks_count);
1667 
1668 /*
1669  * Performs necessary checks before doing a write
1670  *
1671  * Can adjust writing position or amount of bytes to write.
1672  * Returns appropriate error code that caller should return or
1673  * zero in case that write should be allowed.
1674  */
1675 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
1676 {
1677     loff_t count = iov_iter_count(from);
1678     int ret;
1679 
1680     ret = generic_write_checks_count(iocb, &count);
1681     if (ret)
1682         return ret;
1683 
1684     iov_iter_truncate(from, count);
1685     return iov_iter_count(from);
1686 }
1687 EXPORT_SYMBOL(generic_write_checks);
1688 
1689 /*
1690  * Performs common checks before doing a file copy/clone
1691  * from @file_in to @file_out.
1692  */
1693 int generic_file_rw_checks(struct file *file_in, struct file *file_out)
1694 {
1695     struct inode *inode_in = file_inode(file_in);
1696     struct inode *inode_out = file_inode(file_out);
1697 
1698     /* Don't copy dirs, pipes, sockets... */
1699     if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1700         return -EISDIR;
1701     if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1702         return -EINVAL;
1703 
1704     if (!(file_in->f_mode & FMODE_READ) ||
1705         !(file_out->f_mode & FMODE_WRITE) ||
1706         (file_out->f_flags & O_APPEND))
1707         return -EBADF;
1708 
1709     return 0;
1710 }