Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2017 Red Hat, Inc.
0004  */
0005 
0006 #include <linux/cred.h>
0007 #include <linux/file.h>
0008 #include <linux/mount.h>
0009 #include <linux/xattr.h>
0010 #include <linux/uio.h>
0011 #include <linux/uaccess.h>
0012 #include <linux/splice.h>
0013 #include <linux/security.h>
0014 #include <linux/mm.h>
0015 #include <linux/fs.h>
0016 #include "overlayfs.h"
0017 
0018 struct ovl_aio_req {
0019     struct kiocb iocb;
0020     refcount_t ref;
0021     struct kiocb *orig_iocb;
0022     struct fd fd;
0023 };
0024 
0025 static struct kmem_cache *ovl_aio_request_cachep;
0026 
0027 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
0028 {
0029     if (realinode != ovl_inode_upper(inode))
0030         return 'l';
0031     if (ovl_has_upperdata(inode))
0032         return 'u';
0033     else
0034         return 'm';
0035 }
0036 
0037 /* No atime modificaton nor notify on underlying */
0038 #define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
0039 
0040 static struct file *ovl_open_realfile(const struct file *file,
0041                       struct path *realpath)
0042 {
0043     struct inode *realinode = d_inode(realpath->dentry);
0044     struct inode *inode = file_inode(file);
0045     struct user_namespace *real_mnt_userns;
0046     struct file *realfile;
0047     const struct cred *old_cred;
0048     int flags = file->f_flags | OVL_OPEN_FLAGS;
0049     int acc_mode = ACC_MODE(flags);
0050     int err;
0051 
0052     if (flags & O_APPEND)
0053         acc_mode |= MAY_APPEND;
0054 
0055     old_cred = ovl_override_creds(inode->i_sb);
0056     real_mnt_userns = mnt_user_ns(realpath->mnt);
0057     err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
0058     if (err) {
0059         realfile = ERR_PTR(err);
0060     } else {
0061         if (!inode_owner_or_capable(real_mnt_userns, realinode))
0062             flags &= ~O_NOATIME;
0063 
0064         realfile = open_with_fake_path(&file->f_path, flags, realinode,
0065                            current_cred());
0066     }
0067     revert_creds(old_cred);
0068 
0069     pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
0070          file, file, ovl_whatisit(inode, realinode), file->f_flags,
0071          realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
0072 
0073     return realfile;
0074 }
0075 
0076 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
0077 
0078 static int ovl_change_flags(struct file *file, unsigned int flags)
0079 {
0080     struct inode *inode = file_inode(file);
0081     int err;
0082 
0083     flags &= OVL_SETFL_MASK;
0084 
0085     if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
0086         return -EPERM;
0087 
0088     if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
0089         return -EINVAL;
0090 
0091     if (file->f_op->check_flags) {
0092         err = file->f_op->check_flags(flags);
0093         if (err)
0094             return err;
0095     }
0096 
0097     spin_lock(&file->f_lock);
0098     file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
0099     spin_unlock(&file->f_lock);
0100 
0101     return 0;
0102 }
0103 
0104 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
0105                    bool allow_meta)
0106 {
0107     struct dentry *dentry = file_dentry(file);
0108     struct path realpath;
0109 
0110     real->flags = 0;
0111     real->file = file->private_data;
0112 
0113     if (allow_meta)
0114         ovl_path_real(dentry, &realpath);
0115     else
0116         ovl_path_realdata(dentry, &realpath);
0117 
0118     /* Has it been copied up since we'd opened it? */
0119     if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
0120         real->flags = FDPUT_FPUT;
0121         real->file = ovl_open_realfile(file, &realpath);
0122 
0123         return PTR_ERR_OR_ZERO(real->file);
0124     }
0125 
0126     /* Did the flags change since open? */
0127     if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
0128         return ovl_change_flags(real->file, file->f_flags);
0129 
0130     return 0;
0131 }
0132 
0133 static int ovl_real_fdget(const struct file *file, struct fd *real)
0134 {
0135     if (d_is_dir(file_dentry(file))) {
0136         real->flags = 0;
0137         real->file = ovl_dir_real_file(file, false);
0138 
0139         return PTR_ERR_OR_ZERO(real->file);
0140     }
0141 
0142     return ovl_real_fdget_meta(file, real, false);
0143 }
0144 
0145 static int ovl_open(struct inode *inode, struct file *file)
0146 {
0147     struct dentry *dentry = file_dentry(file);
0148     struct file *realfile;
0149     struct path realpath;
0150     int err;
0151 
0152     err = ovl_maybe_copy_up(dentry, file->f_flags);
0153     if (err)
0154         return err;
0155 
0156     /* No longer need these flags, so don't pass them on to underlying fs */
0157     file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
0158 
0159     ovl_path_realdata(dentry, &realpath);
0160     realfile = ovl_open_realfile(file, &realpath);
0161     if (IS_ERR(realfile))
0162         return PTR_ERR(realfile);
0163 
0164     file->private_data = realfile;
0165 
0166     return 0;
0167 }
0168 
0169 static int ovl_release(struct inode *inode, struct file *file)
0170 {
0171     fput(file->private_data);
0172 
0173     return 0;
0174 }
0175 
0176 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
0177 {
0178     struct inode *inode = file_inode(file);
0179     struct fd real;
0180     const struct cred *old_cred;
0181     loff_t ret;
0182 
0183     /*
0184      * The two special cases below do not need to involve real fs,
0185      * so we can optimizing concurrent callers.
0186      */
0187     if (offset == 0) {
0188         if (whence == SEEK_CUR)
0189             return file->f_pos;
0190 
0191         if (whence == SEEK_SET)
0192             return vfs_setpos(file, 0, 0);
0193     }
0194 
0195     ret = ovl_real_fdget(file, &real);
0196     if (ret)
0197         return ret;
0198 
0199     /*
0200      * Overlay file f_pos is the master copy that is preserved
0201      * through copy up and modified on read/write, but only real
0202      * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
0203      * limitations that are more strict than ->s_maxbytes for specific
0204      * files, so we use the real file to perform seeks.
0205      */
0206     ovl_inode_lock(inode);
0207     real.file->f_pos = file->f_pos;
0208 
0209     old_cred = ovl_override_creds(inode->i_sb);
0210     ret = vfs_llseek(real.file, offset, whence);
0211     revert_creds(old_cred);
0212 
0213     file->f_pos = real.file->f_pos;
0214     ovl_inode_unlock(inode);
0215 
0216     fdput(real);
0217 
0218     return ret;
0219 }
0220 
0221 static void ovl_file_accessed(struct file *file)
0222 {
0223     struct inode *inode, *upperinode;
0224 
0225     if (file->f_flags & O_NOATIME)
0226         return;
0227 
0228     inode = file_inode(file);
0229     upperinode = ovl_inode_upper(inode);
0230 
0231     if (!upperinode)
0232         return;
0233 
0234     if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
0235          !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
0236         inode->i_mtime = upperinode->i_mtime;
0237         inode->i_ctime = upperinode->i_ctime;
0238     }
0239 
0240     touch_atime(&file->f_path);
0241 }
0242 
0243 static rwf_t ovl_iocb_to_rwf(int ifl)
0244 {
0245     rwf_t flags = 0;
0246 
0247     if (ifl & IOCB_NOWAIT)
0248         flags |= RWF_NOWAIT;
0249     if (ifl & IOCB_HIPRI)
0250         flags |= RWF_HIPRI;
0251     if (ifl & IOCB_DSYNC)
0252         flags |= RWF_DSYNC;
0253     if (ifl & IOCB_SYNC)
0254         flags |= RWF_SYNC;
0255 
0256     return flags;
0257 }
0258 
0259 static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
0260 {
0261     if (refcount_dec_and_test(&aio_req->ref)) {
0262         fdput(aio_req->fd);
0263         kmem_cache_free(ovl_aio_request_cachep, aio_req);
0264     }
0265 }
0266 
0267 static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
0268 {
0269     struct kiocb *iocb = &aio_req->iocb;
0270     struct kiocb *orig_iocb = aio_req->orig_iocb;
0271 
0272     if (iocb->ki_flags & IOCB_WRITE) {
0273         struct inode *inode = file_inode(orig_iocb->ki_filp);
0274 
0275         /* Actually acquired in ovl_write_iter() */
0276         __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
0277                       SB_FREEZE_WRITE);
0278         file_end_write(iocb->ki_filp);
0279         ovl_copyattr(inode);
0280     }
0281 
0282     orig_iocb->ki_pos = iocb->ki_pos;
0283     ovl_aio_put(aio_req);
0284 }
0285 
0286 static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
0287 {
0288     struct ovl_aio_req *aio_req = container_of(iocb,
0289                            struct ovl_aio_req, iocb);
0290     struct kiocb *orig_iocb = aio_req->orig_iocb;
0291 
0292     ovl_aio_cleanup_handler(aio_req);
0293     orig_iocb->ki_complete(orig_iocb, res);
0294 }
0295 
0296 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
0297 {
0298     struct file *file = iocb->ki_filp;
0299     struct fd real;
0300     const struct cred *old_cred;
0301     ssize_t ret;
0302 
0303     if (!iov_iter_count(iter))
0304         return 0;
0305 
0306     ret = ovl_real_fdget(file, &real);
0307     if (ret)
0308         return ret;
0309 
0310     ret = -EINVAL;
0311     if (iocb->ki_flags & IOCB_DIRECT &&
0312         !(real.file->f_mode & FMODE_CAN_ODIRECT))
0313         goto out_fdput;
0314 
0315     old_cred = ovl_override_creds(file_inode(file)->i_sb);
0316     if (is_sync_kiocb(iocb)) {
0317         ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
0318                     ovl_iocb_to_rwf(iocb->ki_flags));
0319     } else {
0320         struct ovl_aio_req *aio_req;
0321 
0322         ret = -ENOMEM;
0323         aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
0324         if (!aio_req)
0325             goto out;
0326 
0327         aio_req->fd = real;
0328         real.flags = 0;
0329         aio_req->orig_iocb = iocb;
0330         kiocb_clone(&aio_req->iocb, iocb, real.file);
0331         aio_req->iocb.ki_complete = ovl_aio_rw_complete;
0332         refcount_set(&aio_req->ref, 2);
0333         ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
0334         ovl_aio_put(aio_req);
0335         if (ret != -EIOCBQUEUED)
0336             ovl_aio_cleanup_handler(aio_req);
0337     }
0338 out:
0339     revert_creds(old_cred);
0340     ovl_file_accessed(file);
0341 out_fdput:
0342     fdput(real);
0343 
0344     return ret;
0345 }
0346 
0347 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
0348 {
0349     struct file *file = iocb->ki_filp;
0350     struct inode *inode = file_inode(file);
0351     struct fd real;
0352     const struct cred *old_cred;
0353     ssize_t ret;
0354     int ifl = iocb->ki_flags;
0355 
0356     if (!iov_iter_count(iter))
0357         return 0;
0358 
0359     inode_lock(inode);
0360     /* Update mode */
0361     ovl_copyattr(inode);
0362     ret = file_remove_privs(file);
0363     if (ret)
0364         goto out_unlock;
0365 
0366     ret = ovl_real_fdget(file, &real);
0367     if (ret)
0368         goto out_unlock;
0369 
0370     ret = -EINVAL;
0371     if (iocb->ki_flags & IOCB_DIRECT &&
0372         !(real.file->f_mode & FMODE_CAN_ODIRECT))
0373         goto out_fdput;
0374 
0375     if (!ovl_should_sync(OVL_FS(inode->i_sb)))
0376         ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
0377 
0378     old_cred = ovl_override_creds(file_inode(file)->i_sb);
0379     if (is_sync_kiocb(iocb)) {
0380         file_start_write(real.file);
0381         ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
0382                      ovl_iocb_to_rwf(ifl));
0383         file_end_write(real.file);
0384         /* Update size */
0385         ovl_copyattr(inode);
0386     } else {
0387         struct ovl_aio_req *aio_req;
0388 
0389         ret = -ENOMEM;
0390         aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
0391         if (!aio_req)
0392             goto out;
0393 
0394         file_start_write(real.file);
0395         /* Pacify lockdep, same trick as done in aio_write() */
0396         __sb_writers_release(file_inode(real.file)->i_sb,
0397                      SB_FREEZE_WRITE);
0398         aio_req->fd = real;
0399         real.flags = 0;
0400         aio_req->orig_iocb = iocb;
0401         kiocb_clone(&aio_req->iocb, iocb, real.file);
0402         aio_req->iocb.ki_flags = ifl;
0403         aio_req->iocb.ki_complete = ovl_aio_rw_complete;
0404         refcount_set(&aio_req->ref, 2);
0405         ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
0406         ovl_aio_put(aio_req);
0407         if (ret != -EIOCBQUEUED)
0408             ovl_aio_cleanup_handler(aio_req);
0409     }
0410 out:
0411     revert_creds(old_cred);
0412 out_fdput:
0413     fdput(real);
0414 
0415 out_unlock:
0416     inode_unlock(inode);
0417 
0418     return ret;
0419 }
0420 
0421 /*
0422  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
0423  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
0424  * and file_start_write(real.file) in ovl_write_iter().
0425  *
0426  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
0427  * the real file.
0428  */
0429 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
0430                 loff_t *ppos, size_t len, unsigned int flags)
0431 {
0432     struct fd real;
0433     const struct cred *old_cred;
0434     struct inode *inode = file_inode(out);
0435     ssize_t ret;
0436 
0437     inode_lock(inode);
0438     /* Update mode */
0439     ovl_copyattr(inode);
0440     ret = file_remove_privs(out);
0441     if (ret)
0442         goto out_unlock;
0443 
0444     ret = ovl_real_fdget(out, &real);
0445     if (ret)
0446         goto out_unlock;
0447 
0448     old_cred = ovl_override_creds(inode->i_sb);
0449     file_start_write(real.file);
0450 
0451     ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
0452 
0453     file_end_write(real.file);
0454     /* Update size */
0455     ovl_copyattr(inode);
0456     revert_creds(old_cred);
0457     fdput(real);
0458 
0459 out_unlock:
0460     inode_unlock(inode);
0461 
0462     return ret;
0463 }
0464 
0465 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
0466 {
0467     struct fd real;
0468     const struct cred *old_cred;
0469     int ret;
0470 
0471     ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
0472     if (ret <= 0)
0473         return ret;
0474 
0475     ret = ovl_real_fdget_meta(file, &real, !datasync);
0476     if (ret)
0477         return ret;
0478 
0479     /* Don't sync lower file for fear of receiving EROFS error */
0480     if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
0481         old_cred = ovl_override_creds(file_inode(file)->i_sb);
0482         ret = vfs_fsync_range(real.file, start, end, datasync);
0483         revert_creds(old_cred);
0484     }
0485 
0486     fdput(real);
0487 
0488     return ret;
0489 }
0490 
0491 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
0492 {
0493     struct file *realfile = file->private_data;
0494     const struct cred *old_cred;
0495     int ret;
0496 
0497     if (!realfile->f_op->mmap)
0498         return -ENODEV;
0499 
0500     if (WARN_ON(file != vma->vm_file))
0501         return -EIO;
0502 
0503     vma_set_file(vma, realfile);
0504 
0505     old_cred = ovl_override_creds(file_inode(file)->i_sb);
0506     ret = call_mmap(vma->vm_file, vma);
0507     revert_creds(old_cred);
0508     ovl_file_accessed(file);
0509 
0510     return ret;
0511 }
0512 
0513 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
0514 {
0515     struct inode *inode = file_inode(file);
0516     struct fd real;
0517     const struct cred *old_cred;
0518     int ret;
0519 
0520     ret = ovl_real_fdget(file, &real);
0521     if (ret)
0522         return ret;
0523 
0524     old_cred = ovl_override_creds(file_inode(file)->i_sb);
0525     ret = vfs_fallocate(real.file, mode, offset, len);
0526     revert_creds(old_cred);
0527 
0528     /* Update size */
0529     ovl_copyattr(inode);
0530 
0531     fdput(real);
0532 
0533     return ret;
0534 }
0535 
0536 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
0537 {
0538     struct fd real;
0539     const struct cred *old_cred;
0540     int ret;
0541 
0542     ret = ovl_real_fdget(file, &real);
0543     if (ret)
0544         return ret;
0545 
0546     old_cred = ovl_override_creds(file_inode(file)->i_sb);
0547     ret = vfs_fadvise(real.file, offset, len, advice);
0548     revert_creds(old_cred);
0549 
0550     fdput(real);
0551 
0552     return ret;
0553 }
0554 
0555 enum ovl_copyop {
0556     OVL_COPY,
0557     OVL_CLONE,
0558     OVL_DEDUPE,
0559 };
0560 
0561 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
0562                 struct file *file_out, loff_t pos_out,
0563                 loff_t len, unsigned int flags, enum ovl_copyop op)
0564 {
0565     struct inode *inode_out = file_inode(file_out);
0566     struct fd real_in, real_out;
0567     const struct cred *old_cred;
0568     loff_t ret;
0569 
0570     ret = ovl_real_fdget(file_out, &real_out);
0571     if (ret)
0572         return ret;
0573 
0574     ret = ovl_real_fdget(file_in, &real_in);
0575     if (ret) {
0576         fdput(real_out);
0577         return ret;
0578     }
0579 
0580     old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
0581     switch (op) {
0582     case OVL_COPY:
0583         ret = vfs_copy_file_range(real_in.file, pos_in,
0584                       real_out.file, pos_out, len, flags);
0585         break;
0586 
0587     case OVL_CLONE:
0588         ret = vfs_clone_file_range(real_in.file, pos_in,
0589                        real_out.file, pos_out, len, flags);
0590         break;
0591 
0592     case OVL_DEDUPE:
0593         ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
0594                         real_out.file, pos_out, len,
0595                         flags);
0596         break;
0597     }
0598     revert_creds(old_cred);
0599 
0600     /* Update size */
0601     ovl_copyattr(inode_out);
0602 
0603     fdput(real_in);
0604     fdput(real_out);
0605 
0606     return ret;
0607 }
0608 
0609 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
0610                    struct file *file_out, loff_t pos_out,
0611                    size_t len, unsigned int flags)
0612 {
0613     return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
0614                 OVL_COPY);
0615 }
0616 
0617 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
0618                    struct file *file_out, loff_t pos_out,
0619                    loff_t len, unsigned int remap_flags)
0620 {
0621     enum ovl_copyop op;
0622 
0623     if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
0624         return -EINVAL;
0625 
0626     if (remap_flags & REMAP_FILE_DEDUP)
0627         op = OVL_DEDUPE;
0628     else
0629         op = OVL_CLONE;
0630 
0631     /*
0632      * Don't copy up because of a dedupe request, this wouldn't make sense
0633      * most of the time (data would be duplicated instead of deduplicated).
0634      */
0635     if (op == OVL_DEDUPE &&
0636         (!ovl_inode_upper(file_inode(file_in)) ||
0637          !ovl_inode_upper(file_inode(file_out))))
0638         return -EPERM;
0639 
0640     return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
0641                 remap_flags, op);
0642 }
0643 
0644 static int ovl_flush(struct file *file, fl_owner_t id)
0645 {
0646     struct fd real;
0647     const struct cred *old_cred;
0648     int err;
0649 
0650     err = ovl_real_fdget(file, &real);
0651     if (err)
0652         return err;
0653 
0654     if (real.file->f_op->flush) {
0655         old_cred = ovl_override_creds(file_inode(file)->i_sb);
0656         err = real.file->f_op->flush(real.file, id);
0657         revert_creds(old_cred);
0658     }
0659     fdput(real);
0660 
0661     return err;
0662 }
0663 
0664 const struct file_operations ovl_file_operations = {
0665     .open       = ovl_open,
0666     .release    = ovl_release,
0667     .llseek     = ovl_llseek,
0668     .read_iter  = ovl_read_iter,
0669     .write_iter = ovl_write_iter,
0670     .fsync      = ovl_fsync,
0671     .mmap       = ovl_mmap,
0672     .fallocate  = ovl_fallocate,
0673     .fadvise    = ovl_fadvise,
0674     .flush      = ovl_flush,
0675     .splice_read    = generic_file_splice_read,
0676     .splice_write   = ovl_splice_write,
0677 
0678     .copy_file_range    = ovl_copy_file_range,
0679     .remap_file_range   = ovl_remap_file_range,
0680 };
0681 
0682 int __init ovl_aio_request_cache_init(void)
0683 {
0684     ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
0685                            sizeof(struct ovl_aio_req),
0686                            0, SLAB_HWCACHE_ALIGN, NULL);
0687     if (!ovl_aio_request_cachep)
0688         return -ENOMEM;
0689 
0690     return 0;
0691 }
0692 
0693 void ovl_aio_request_cache_destroy(void)
0694 {
0695     kmem_cache_destroy(ovl_aio_request_cachep);
0696 }