0001
0002
0003
0004
0005
0006 #include <linux/cred.h>
0007 #include <linux/file.h>
0008 #include <linux/mount.h>
0009 #include <linux/xattr.h>
0010 #include <linux/uio.h>
0011 #include <linux/uaccess.h>
0012 #include <linux/splice.h>
0013 #include <linux/security.h>
0014 #include <linux/mm.h>
0015 #include <linux/fs.h>
0016 #include "overlayfs.h"
0017
0018 struct ovl_aio_req {
0019 struct kiocb iocb;
0020 refcount_t ref;
0021 struct kiocb *orig_iocb;
0022 struct fd fd;
0023 };
0024
0025 static struct kmem_cache *ovl_aio_request_cachep;
0026
0027 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
0028 {
0029 if (realinode != ovl_inode_upper(inode))
0030 return 'l';
0031 if (ovl_has_upperdata(inode))
0032 return 'u';
0033 else
0034 return 'm';
0035 }
0036
0037
0038 #define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
0039
0040 static struct file *ovl_open_realfile(const struct file *file,
0041 struct path *realpath)
0042 {
0043 struct inode *realinode = d_inode(realpath->dentry);
0044 struct inode *inode = file_inode(file);
0045 struct user_namespace *real_mnt_userns;
0046 struct file *realfile;
0047 const struct cred *old_cred;
0048 int flags = file->f_flags | OVL_OPEN_FLAGS;
0049 int acc_mode = ACC_MODE(flags);
0050 int err;
0051
0052 if (flags & O_APPEND)
0053 acc_mode |= MAY_APPEND;
0054
0055 old_cred = ovl_override_creds(inode->i_sb);
0056 real_mnt_userns = mnt_user_ns(realpath->mnt);
0057 err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
0058 if (err) {
0059 realfile = ERR_PTR(err);
0060 } else {
0061 if (!inode_owner_or_capable(real_mnt_userns, realinode))
0062 flags &= ~O_NOATIME;
0063
0064 realfile = open_with_fake_path(&file->f_path, flags, realinode,
0065 current_cred());
0066 }
0067 revert_creds(old_cred);
0068
0069 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
0070 file, file, ovl_whatisit(inode, realinode), file->f_flags,
0071 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
0072
0073 return realfile;
0074 }
0075
0076 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
0077
0078 static int ovl_change_flags(struct file *file, unsigned int flags)
0079 {
0080 struct inode *inode = file_inode(file);
0081 int err;
0082
0083 flags &= OVL_SETFL_MASK;
0084
0085 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
0086 return -EPERM;
0087
0088 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
0089 return -EINVAL;
0090
0091 if (file->f_op->check_flags) {
0092 err = file->f_op->check_flags(flags);
0093 if (err)
0094 return err;
0095 }
0096
0097 spin_lock(&file->f_lock);
0098 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
0099 spin_unlock(&file->f_lock);
0100
0101 return 0;
0102 }
0103
0104 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
0105 bool allow_meta)
0106 {
0107 struct dentry *dentry = file_dentry(file);
0108 struct path realpath;
0109
0110 real->flags = 0;
0111 real->file = file->private_data;
0112
0113 if (allow_meta)
0114 ovl_path_real(dentry, &realpath);
0115 else
0116 ovl_path_realdata(dentry, &realpath);
0117
0118
0119 if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
0120 real->flags = FDPUT_FPUT;
0121 real->file = ovl_open_realfile(file, &realpath);
0122
0123 return PTR_ERR_OR_ZERO(real->file);
0124 }
0125
0126
0127 if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
0128 return ovl_change_flags(real->file, file->f_flags);
0129
0130 return 0;
0131 }
0132
0133 static int ovl_real_fdget(const struct file *file, struct fd *real)
0134 {
0135 if (d_is_dir(file_dentry(file))) {
0136 real->flags = 0;
0137 real->file = ovl_dir_real_file(file, false);
0138
0139 return PTR_ERR_OR_ZERO(real->file);
0140 }
0141
0142 return ovl_real_fdget_meta(file, real, false);
0143 }
0144
0145 static int ovl_open(struct inode *inode, struct file *file)
0146 {
0147 struct dentry *dentry = file_dentry(file);
0148 struct file *realfile;
0149 struct path realpath;
0150 int err;
0151
0152 err = ovl_maybe_copy_up(dentry, file->f_flags);
0153 if (err)
0154 return err;
0155
0156
0157 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
0158
0159 ovl_path_realdata(dentry, &realpath);
0160 realfile = ovl_open_realfile(file, &realpath);
0161 if (IS_ERR(realfile))
0162 return PTR_ERR(realfile);
0163
0164 file->private_data = realfile;
0165
0166 return 0;
0167 }
0168
0169 static int ovl_release(struct inode *inode, struct file *file)
0170 {
0171 fput(file->private_data);
0172
0173 return 0;
0174 }
0175
0176 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
0177 {
0178 struct inode *inode = file_inode(file);
0179 struct fd real;
0180 const struct cred *old_cred;
0181 loff_t ret;
0182
0183
0184
0185
0186
0187 if (offset == 0) {
0188 if (whence == SEEK_CUR)
0189 return file->f_pos;
0190
0191 if (whence == SEEK_SET)
0192 return vfs_setpos(file, 0, 0);
0193 }
0194
0195 ret = ovl_real_fdget(file, &real);
0196 if (ret)
0197 return ret;
0198
0199
0200
0201
0202
0203
0204
0205
0206 ovl_inode_lock(inode);
0207 real.file->f_pos = file->f_pos;
0208
0209 old_cred = ovl_override_creds(inode->i_sb);
0210 ret = vfs_llseek(real.file, offset, whence);
0211 revert_creds(old_cred);
0212
0213 file->f_pos = real.file->f_pos;
0214 ovl_inode_unlock(inode);
0215
0216 fdput(real);
0217
0218 return ret;
0219 }
0220
0221 static void ovl_file_accessed(struct file *file)
0222 {
0223 struct inode *inode, *upperinode;
0224
0225 if (file->f_flags & O_NOATIME)
0226 return;
0227
0228 inode = file_inode(file);
0229 upperinode = ovl_inode_upper(inode);
0230
0231 if (!upperinode)
0232 return;
0233
0234 if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
0235 !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
0236 inode->i_mtime = upperinode->i_mtime;
0237 inode->i_ctime = upperinode->i_ctime;
0238 }
0239
0240 touch_atime(&file->f_path);
0241 }
0242
0243 static rwf_t ovl_iocb_to_rwf(int ifl)
0244 {
0245 rwf_t flags = 0;
0246
0247 if (ifl & IOCB_NOWAIT)
0248 flags |= RWF_NOWAIT;
0249 if (ifl & IOCB_HIPRI)
0250 flags |= RWF_HIPRI;
0251 if (ifl & IOCB_DSYNC)
0252 flags |= RWF_DSYNC;
0253 if (ifl & IOCB_SYNC)
0254 flags |= RWF_SYNC;
0255
0256 return flags;
0257 }
0258
0259 static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
0260 {
0261 if (refcount_dec_and_test(&aio_req->ref)) {
0262 fdput(aio_req->fd);
0263 kmem_cache_free(ovl_aio_request_cachep, aio_req);
0264 }
0265 }
0266
0267 static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
0268 {
0269 struct kiocb *iocb = &aio_req->iocb;
0270 struct kiocb *orig_iocb = aio_req->orig_iocb;
0271
0272 if (iocb->ki_flags & IOCB_WRITE) {
0273 struct inode *inode = file_inode(orig_iocb->ki_filp);
0274
0275
0276 __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
0277 SB_FREEZE_WRITE);
0278 file_end_write(iocb->ki_filp);
0279 ovl_copyattr(inode);
0280 }
0281
0282 orig_iocb->ki_pos = iocb->ki_pos;
0283 ovl_aio_put(aio_req);
0284 }
0285
0286 static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
0287 {
0288 struct ovl_aio_req *aio_req = container_of(iocb,
0289 struct ovl_aio_req, iocb);
0290 struct kiocb *orig_iocb = aio_req->orig_iocb;
0291
0292 ovl_aio_cleanup_handler(aio_req);
0293 orig_iocb->ki_complete(orig_iocb, res);
0294 }
0295
0296 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
0297 {
0298 struct file *file = iocb->ki_filp;
0299 struct fd real;
0300 const struct cred *old_cred;
0301 ssize_t ret;
0302
0303 if (!iov_iter_count(iter))
0304 return 0;
0305
0306 ret = ovl_real_fdget(file, &real);
0307 if (ret)
0308 return ret;
0309
0310 ret = -EINVAL;
0311 if (iocb->ki_flags & IOCB_DIRECT &&
0312 !(real.file->f_mode & FMODE_CAN_ODIRECT))
0313 goto out_fdput;
0314
0315 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0316 if (is_sync_kiocb(iocb)) {
0317 ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
0318 ovl_iocb_to_rwf(iocb->ki_flags));
0319 } else {
0320 struct ovl_aio_req *aio_req;
0321
0322 ret = -ENOMEM;
0323 aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
0324 if (!aio_req)
0325 goto out;
0326
0327 aio_req->fd = real;
0328 real.flags = 0;
0329 aio_req->orig_iocb = iocb;
0330 kiocb_clone(&aio_req->iocb, iocb, real.file);
0331 aio_req->iocb.ki_complete = ovl_aio_rw_complete;
0332 refcount_set(&aio_req->ref, 2);
0333 ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
0334 ovl_aio_put(aio_req);
0335 if (ret != -EIOCBQUEUED)
0336 ovl_aio_cleanup_handler(aio_req);
0337 }
0338 out:
0339 revert_creds(old_cred);
0340 ovl_file_accessed(file);
0341 out_fdput:
0342 fdput(real);
0343
0344 return ret;
0345 }
0346
0347 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
0348 {
0349 struct file *file = iocb->ki_filp;
0350 struct inode *inode = file_inode(file);
0351 struct fd real;
0352 const struct cred *old_cred;
0353 ssize_t ret;
0354 int ifl = iocb->ki_flags;
0355
0356 if (!iov_iter_count(iter))
0357 return 0;
0358
0359 inode_lock(inode);
0360
0361 ovl_copyattr(inode);
0362 ret = file_remove_privs(file);
0363 if (ret)
0364 goto out_unlock;
0365
0366 ret = ovl_real_fdget(file, &real);
0367 if (ret)
0368 goto out_unlock;
0369
0370 ret = -EINVAL;
0371 if (iocb->ki_flags & IOCB_DIRECT &&
0372 !(real.file->f_mode & FMODE_CAN_ODIRECT))
0373 goto out_fdput;
0374
0375 if (!ovl_should_sync(OVL_FS(inode->i_sb)))
0376 ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
0377
0378 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0379 if (is_sync_kiocb(iocb)) {
0380 file_start_write(real.file);
0381 ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
0382 ovl_iocb_to_rwf(ifl));
0383 file_end_write(real.file);
0384
0385 ovl_copyattr(inode);
0386 } else {
0387 struct ovl_aio_req *aio_req;
0388
0389 ret = -ENOMEM;
0390 aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
0391 if (!aio_req)
0392 goto out;
0393
0394 file_start_write(real.file);
0395
0396 __sb_writers_release(file_inode(real.file)->i_sb,
0397 SB_FREEZE_WRITE);
0398 aio_req->fd = real;
0399 real.flags = 0;
0400 aio_req->orig_iocb = iocb;
0401 kiocb_clone(&aio_req->iocb, iocb, real.file);
0402 aio_req->iocb.ki_flags = ifl;
0403 aio_req->iocb.ki_complete = ovl_aio_rw_complete;
0404 refcount_set(&aio_req->ref, 2);
0405 ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
0406 ovl_aio_put(aio_req);
0407 if (ret != -EIOCBQUEUED)
0408 ovl_aio_cleanup_handler(aio_req);
0409 }
0410 out:
0411 revert_creds(old_cred);
0412 out_fdput:
0413 fdput(real);
0414
0415 out_unlock:
0416 inode_unlock(inode);
0417
0418 return ret;
0419 }
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
0430 loff_t *ppos, size_t len, unsigned int flags)
0431 {
0432 struct fd real;
0433 const struct cred *old_cred;
0434 struct inode *inode = file_inode(out);
0435 ssize_t ret;
0436
0437 inode_lock(inode);
0438
0439 ovl_copyattr(inode);
0440 ret = file_remove_privs(out);
0441 if (ret)
0442 goto out_unlock;
0443
0444 ret = ovl_real_fdget(out, &real);
0445 if (ret)
0446 goto out_unlock;
0447
0448 old_cred = ovl_override_creds(inode->i_sb);
0449 file_start_write(real.file);
0450
0451 ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
0452
0453 file_end_write(real.file);
0454
0455 ovl_copyattr(inode);
0456 revert_creds(old_cred);
0457 fdput(real);
0458
0459 out_unlock:
0460 inode_unlock(inode);
0461
0462 return ret;
0463 }
0464
0465 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
0466 {
0467 struct fd real;
0468 const struct cred *old_cred;
0469 int ret;
0470
0471 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
0472 if (ret <= 0)
0473 return ret;
0474
0475 ret = ovl_real_fdget_meta(file, &real, !datasync);
0476 if (ret)
0477 return ret;
0478
0479
0480 if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
0481 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0482 ret = vfs_fsync_range(real.file, start, end, datasync);
0483 revert_creds(old_cred);
0484 }
0485
0486 fdput(real);
0487
0488 return ret;
0489 }
0490
0491 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
0492 {
0493 struct file *realfile = file->private_data;
0494 const struct cred *old_cred;
0495 int ret;
0496
0497 if (!realfile->f_op->mmap)
0498 return -ENODEV;
0499
0500 if (WARN_ON(file != vma->vm_file))
0501 return -EIO;
0502
0503 vma_set_file(vma, realfile);
0504
0505 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0506 ret = call_mmap(vma->vm_file, vma);
0507 revert_creds(old_cred);
0508 ovl_file_accessed(file);
0509
0510 return ret;
0511 }
0512
0513 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
0514 {
0515 struct inode *inode = file_inode(file);
0516 struct fd real;
0517 const struct cred *old_cred;
0518 int ret;
0519
0520 ret = ovl_real_fdget(file, &real);
0521 if (ret)
0522 return ret;
0523
0524 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0525 ret = vfs_fallocate(real.file, mode, offset, len);
0526 revert_creds(old_cred);
0527
0528
0529 ovl_copyattr(inode);
0530
0531 fdput(real);
0532
0533 return ret;
0534 }
0535
0536 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
0537 {
0538 struct fd real;
0539 const struct cred *old_cred;
0540 int ret;
0541
0542 ret = ovl_real_fdget(file, &real);
0543 if (ret)
0544 return ret;
0545
0546 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0547 ret = vfs_fadvise(real.file, offset, len, advice);
0548 revert_creds(old_cred);
0549
0550 fdput(real);
0551
0552 return ret;
0553 }
0554
0555 enum ovl_copyop {
0556 OVL_COPY,
0557 OVL_CLONE,
0558 OVL_DEDUPE,
0559 };
0560
0561 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
0562 struct file *file_out, loff_t pos_out,
0563 loff_t len, unsigned int flags, enum ovl_copyop op)
0564 {
0565 struct inode *inode_out = file_inode(file_out);
0566 struct fd real_in, real_out;
0567 const struct cred *old_cred;
0568 loff_t ret;
0569
0570 ret = ovl_real_fdget(file_out, &real_out);
0571 if (ret)
0572 return ret;
0573
0574 ret = ovl_real_fdget(file_in, &real_in);
0575 if (ret) {
0576 fdput(real_out);
0577 return ret;
0578 }
0579
0580 old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
0581 switch (op) {
0582 case OVL_COPY:
0583 ret = vfs_copy_file_range(real_in.file, pos_in,
0584 real_out.file, pos_out, len, flags);
0585 break;
0586
0587 case OVL_CLONE:
0588 ret = vfs_clone_file_range(real_in.file, pos_in,
0589 real_out.file, pos_out, len, flags);
0590 break;
0591
0592 case OVL_DEDUPE:
0593 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
0594 real_out.file, pos_out, len,
0595 flags);
0596 break;
0597 }
0598 revert_creds(old_cred);
0599
0600
0601 ovl_copyattr(inode_out);
0602
0603 fdput(real_in);
0604 fdput(real_out);
0605
0606 return ret;
0607 }
0608
0609 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
0610 struct file *file_out, loff_t pos_out,
0611 size_t len, unsigned int flags)
0612 {
0613 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
0614 OVL_COPY);
0615 }
0616
0617 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
0618 struct file *file_out, loff_t pos_out,
0619 loff_t len, unsigned int remap_flags)
0620 {
0621 enum ovl_copyop op;
0622
0623 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
0624 return -EINVAL;
0625
0626 if (remap_flags & REMAP_FILE_DEDUP)
0627 op = OVL_DEDUPE;
0628 else
0629 op = OVL_CLONE;
0630
0631
0632
0633
0634
0635 if (op == OVL_DEDUPE &&
0636 (!ovl_inode_upper(file_inode(file_in)) ||
0637 !ovl_inode_upper(file_inode(file_out))))
0638 return -EPERM;
0639
0640 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
0641 remap_flags, op);
0642 }
0643
0644 static int ovl_flush(struct file *file, fl_owner_t id)
0645 {
0646 struct fd real;
0647 const struct cred *old_cred;
0648 int err;
0649
0650 err = ovl_real_fdget(file, &real);
0651 if (err)
0652 return err;
0653
0654 if (real.file->f_op->flush) {
0655 old_cred = ovl_override_creds(file_inode(file)->i_sb);
0656 err = real.file->f_op->flush(real.file, id);
0657 revert_creds(old_cred);
0658 }
0659 fdput(real);
0660
0661 return err;
0662 }
0663
0664 const struct file_operations ovl_file_operations = {
0665 .open = ovl_open,
0666 .release = ovl_release,
0667 .llseek = ovl_llseek,
0668 .read_iter = ovl_read_iter,
0669 .write_iter = ovl_write_iter,
0670 .fsync = ovl_fsync,
0671 .mmap = ovl_mmap,
0672 .fallocate = ovl_fallocate,
0673 .fadvise = ovl_fadvise,
0674 .flush = ovl_flush,
0675 .splice_read = generic_file_splice_read,
0676 .splice_write = ovl_splice_write,
0677
0678 .copy_file_range = ovl_copy_file_range,
0679 .remap_file_range = ovl_remap_file_range,
0680 };
0681
0682 int __init ovl_aio_request_cache_init(void)
0683 {
0684 ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
0685 sizeof(struct ovl_aio_req),
0686 0, SLAB_HWCACHE_ALIGN, NULL);
0687 if (!ovl_aio_request_cachep)
0688 return -ENOMEM;
0689
0690 return 0;
0691 }
0692
0693 void ovl_aio_request_cache_destroy(void)
0694 {
0695 kmem_cache_destroy(ovl_aio_request_cachep);
0696 }