the-tree/fs/fcntl.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/fcntl.c
0004  *
0005  *  Copyright (C) 1991, 1992  Linus Torvalds
0006  */
0007
0008 #include <linux/syscalls.h>
0009 #include <linux/init.h>
0010 #include <linux/mm.h>
0011 #include <linux/sched/task.h>
0012 #include <linux/fs.h>
0013 #include <linux/file.h>
0014 #include <linux/fdtable.h>
0015 #include <linux/capability.h>
0016 #include <linux/dnotify.h>
0017 #include <linux/slab.h>
0018 #include <linux/module.h>
0019 #include <linux/pipe_fs_i.h>
0020 #include <linux/security.h>
0021 #include <linux/ptrace.h>
0022 #include <linux/signal.h>
0023 #include <linux/rcupdate.h>
0024 #include <linux/pid_namespace.h>
0025 #include <linux/user_namespace.h>
0026 #include <linux/memfd.h>
0027 #include <linux/compat.h>
0028 #include <linux/mount.h>
0029
0030 #include <linux/poll.h>
0031 #include <asm/siginfo.h>
0032 #include <linux/uaccess.h>
0033
0034 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
0035
0036 static int setfl(int fd, struct file * filp, unsigned long arg)
0037 {
0038     struct inode * inode = file_inode(filp);
0039     int error = 0;
0040
0041     /*
0042      * O_APPEND cannot be cleared if the file is marked as append-only
0043      * and the file is open for write.
0044      */
0045     if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
0046         return -EPERM;
0047
0048     /* O_NOATIME can only be set by the owner or superuser */
0049     if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
0050         if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
0051             return -EPERM;
0052
0053     /* required for strict SunOS emulation */
0054     if (O_NONBLOCK != O_NDELAY)
0055            if (arg & O_NDELAY)
0056            arg |= O_NONBLOCK;
0057
0058     /* Pipe packetized mode is controlled by O_DIRECT flag */
0059     if (!S_ISFIFO(inode->i_mode) &&
0060         (arg & O_DIRECT) &&
0061         !(filp->f_mode & FMODE_CAN_ODIRECT))
0062         return -EINVAL;
0063
0064     if (filp->f_op->check_flags)
0065         error = filp->f_op->check_flags(arg);
0066     if (error)
0067         return error;
0068
0069     /*
0070      * ->fasync() is responsible for setting the FASYNC bit.
0071      */
0072     if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
0073         error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
0074         if (error < 0)
0075             goto out;
0076         if (error > 0)
0077             error = 0;
0078     }
0079     spin_lock(&filp->f_lock);
0080     filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
0081     filp->f_iocb_flags = iocb_flags(filp);
0082     spin_unlock(&filp->f_lock);
0083
0084  out:
0085     return error;
0086 }
0087
0088 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
0089                      int force)
0090 {
0091     write_lock_irq(&filp->f_owner.lock);
0092     if (force || !filp->f_owner.pid) {
0093         put_pid(filp->f_owner.pid);
0094         filp->f_owner.pid = get_pid(pid);
0095         filp->f_owner.pid_type = type;
0096
0097         if (pid) {
0098             const struct cred *cred = current_cred();
0099             filp->f_owner.uid = cred->uid;
0100             filp->f_owner.euid = cred->euid;
0101         }
0102     }
0103     write_unlock_irq(&filp->f_owner.lock);
0104 }
0105
0106 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
0107         int force)
0108 {
0109     security_file_set_fowner(filp);
0110     f_modown(filp, pid, type, force);
0111 }
0112 EXPORT_SYMBOL(__f_setown);
0113
0114 int f_setown(struct file *filp, unsigned long arg, int force)
0115 {
0116     enum pid_type type;
0117     struct pid *pid = NULL;
0118     int who = arg, ret = 0;
0119
0120     type = PIDTYPE_TGID;
0121     if (who < 0) {
0122         /* avoid overflow below */
0123         if (who == INT_MIN)
0124             return -EINVAL;
0125
0126         type = PIDTYPE_PGID;
0127         who = -who;
0128     }
0129
0130     rcu_read_lock();
0131     if (who) {
0132         pid = find_vpid(who);
0133         if (!pid)
0134             ret = -ESRCH;
0135     }
0136
0137     if (!ret)
0138         __f_setown(filp, pid, type, force);
0139     rcu_read_unlock();
0140
0141     return ret;
0142 }
0143 EXPORT_SYMBOL(f_setown);
0144
0145 void f_delown(struct file *filp)
0146 {
0147     f_modown(filp, NULL, PIDTYPE_TGID, 1);
0148 }
0149
0150 pid_t f_getown(struct file *filp)
0151 {
0152     pid_t pid = 0;
0153
0154     read_lock_irq(&filp->f_owner.lock);
0155     rcu_read_lock();
0156     if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
0157         pid = pid_vnr(filp->f_owner.pid);
0158         if (filp->f_owner.pid_type == PIDTYPE_PGID)
0159             pid = -pid;
0160     }
0161     rcu_read_unlock();
0162     read_unlock_irq(&filp->f_owner.lock);
0163     return pid;
0164 }
0165
0166 static int f_setown_ex(struct file *filp, unsigned long arg)
0167 {
0168     struct f_owner_ex __user *owner_p = (void __user *)arg;
0169     struct f_owner_ex owner;
0170     struct pid *pid;
0171     int type;
0172     int ret;
0173
0174     ret = copy_from_user(&owner, owner_p, sizeof(owner));
0175     if (ret)
0176         return -EFAULT;
0177
0178     switch (owner.type) {
0179     case F_OWNER_TID:
0180         type = PIDTYPE_PID;
0181         break;
0182
0183     case F_OWNER_PID:
0184         type = PIDTYPE_TGID;
0185         break;
0186
0187     case F_OWNER_PGRP:
0188         type = PIDTYPE_PGID;
0189         break;
0190
0191     default:
0192         return -EINVAL;
0193     }
0194
0195     rcu_read_lock();
0196     pid = find_vpid(owner.pid);
0197     if (owner.pid && !pid)
0198         ret = -ESRCH;
0199     else
0200          __f_setown(filp, pid, type, 1);
0201     rcu_read_unlock();
0202
0203     return ret;
0204 }
0205
0206 static int f_getown_ex(struct file *filp, unsigned long arg)
0207 {
0208     struct f_owner_ex __user *owner_p = (void __user *)arg;
0209     struct f_owner_ex owner = {};
0210     int ret = 0;
0211
0212     read_lock_irq(&filp->f_owner.lock);
0213     rcu_read_lock();
0214     if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
0215         owner.pid = pid_vnr(filp->f_owner.pid);
0216     rcu_read_unlock();
0217     switch (filp->f_owner.pid_type) {
0218     case PIDTYPE_PID:
0219         owner.type = F_OWNER_TID;
0220         break;
0221
0222     case PIDTYPE_TGID:
0223         owner.type = F_OWNER_PID;
0224         break;
0225
0226     case PIDTYPE_PGID:
0227         owner.type = F_OWNER_PGRP;
0228         break;
0229
0230     default:
0231         WARN_ON(1);
0232         ret = -EINVAL;
0233         break;
0234     }
0235     read_unlock_irq(&filp->f_owner.lock);
0236
0237     if (!ret) {
0238         ret = copy_to_user(owner_p, &owner, sizeof(owner));
0239         if (ret)
0240             ret = -EFAULT;
0241     }
0242     return ret;
0243 }
0244
0245 #ifdef CONFIG_CHECKPOINT_RESTORE
0246 static int f_getowner_uids(struct file *filp, unsigned long arg)
0247 {
0248     struct user_namespace *user_ns = current_user_ns();
0249     uid_t __user *dst = (void __user *)arg;
0250     uid_t src[2];
0251     int err;
0252
0253     read_lock_irq(&filp->f_owner.lock);
0254     src[0] = from_kuid(user_ns, filp->f_owner.uid);
0255     src[1] = from_kuid(user_ns, filp->f_owner.euid);
0256     read_unlock_irq(&filp->f_owner.lock);
0257
0258     err  = put_user(src[0], &dst[0]);
0259     err |= put_user(src[1], &dst[1]);
0260
0261     return err;
0262 }
0263 #else
0264 static int f_getowner_uids(struct file *filp, unsigned long arg)
0265 {
0266     return -EINVAL;
0267 }
0268 #endif
0269
0270 static bool rw_hint_valid(enum rw_hint hint)
0271 {
0272     switch (hint) {
0273     case RWH_WRITE_LIFE_NOT_SET:
0274     case RWH_WRITE_LIFE_NONE:
0275     case RWH_WRITE_LIFE_SHORT:
0276     case RWH_WRITE_LIFE_MEDIUM:
0277     case RWH_WRITE_LIFE_LONG:
0278     case RWH_WRITE_LIFE_EXTREME:
0279         return true;
0280     default:
0281         return false;
0282     }
0283 }
0284
0285 static long fcntl_rw_hint(struct file *file, unsigned int cmd,
0286               unsigned long arg)
0287 {
0288     struct inode *inode = file_inode(file);
0289     u64 __user *argp = (u64 __user *)arg;
0290     enum rw_hint hint;
0291     u64 h;
0292
0293     switch (cmd) {
0294     case F_GET_RW_HINT:
0295         h = inode->i_write_hint;
0296         if (copy_to_user(argp, &h, sizeof(*argp)))
0297             return -EFAULT;
0298         return 0;
0299     case F_SET_RW_HINT:
0300         if (copy_from_user(&h, argp, sizeof(h)))
0301             return -EFAULT;
0302         hint = (enum rw_hint) h;
0303         if (!rw_hint_valid(hint))
0304             return -EINVAL;
0305
0306         inode_lock(inode);
0307         inode->i_write_hint = hint;
0308         inode_unlock(inode);
0309         return 0;
0310     default:
0311         return -EINVAL;
0312     }
0313 }
0314
0315 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
0316         struct file *filp)
0317 {
0318     void __user *argp = (void __user *)arg;
0319     struct flock flock;
0320     long err = -EINVAL;
0321
0322     switch (cmd) {
0323     case F_DUPFD:
0324         err = f_dupfd(arg, filp, 0);
0325         break;
0326     case F_DUPFD_CLOEXEC:
0327         err = f_dupfd(arg, filp, O_CLOEXEC);
0328         break;
0329     case F_GETFD:
0330         err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
0331         break;
0332     case F_SETFD:
0333         err = 0;
0334         set_close_on_exec(fd, arg & FD_CLOEXEC);
0335         break;
0336     case F_GETFL:
0337         err = filp->f_flags;
0338         break;
0339     case F_SETFL:
0340         err = setfl(fd, filp, arg);
0341         break;
0342 #if BITS_PER_LONG != 32
0343     /* 32-bit arches must use fcntl64() */
0344     case F_OFD_GETLK:
0345 #endif
0346     case F_GETLK:
0347         if (copy_from_user(&flock, argp, sizeof(flock)))
0348             return -EFAULT;
0349         err = fcntl_getlk(filp, cmd, &flock);
0350         if (!err && copy_to_user(argp, &flock, sizeof(flock)))
0351             return -EFAULT;
0352         break;
0353 #if BITS_PER_LONG != 32
0354     /* 32-bit arches must use fcntl64() */
0355     case F_OFD_SETLK:
0356     case F_OFD_SETLKW:
0357         fallthrough;
0358 #endif
0359     case F_SETLK:
0360     case F_SETLKW:
0361         if (copy_from_user(&flock, argp, sizeof(flock)))
0362             return -EFAULT;
0363         err = fcntl_setlk(fd, filp, cmd, &flock);
0364         break;
0365     case F_GETOWN:
0366         /*
0367          * XXX If f_owner is a process group, the
0368          * negative return value will get converted
0369          * into an error.  Oops.  If we keep the
0370          * current syscall conventions, the only way
0371          * to fix this will be in libc.
0372          */
0373         err = f_getown(filp);
0374         force_successful_syscall_return();
0375         break;
0376     case F_SETOWN:
0377         err = f_setown(filp, arg, 1);
0378         break;
0379     case F_GETOWN_EX:
0380         err = f_getown_ex(filp, arg);
0381         break;
0382     case F_SETOWN_EX:
0383         err = f_setown_ex(filp, arg);
0384         break;
0385     case F_GETOWNER_UIDS:
0386         err = f_getowner_uids(filp, arg);
0387         break;
0388     case F_GETSIG:
0389         err = filp->f_owner.signum;
0390         break;
0391     case F_SETSIG:
0392         /* arg == 0 restores default behaviour. */
0393         if (!valid_signal(arg)) {
0394             break;
0395         }
0396         err = 0;
0397         filp->f_owner.signum = arg;
0398         break;
0399     case F_GETLEASE:
0400         err = fcntl_getlease(filp);
0401         break;
0402     case F_SETLEASE:
0403         err = fcntl_setlease(fd, filp, arg);
0404         break;
0405     case F_NOTIFY:
0406         err = fcntl_dirnotify(fd, filp, arg);
0407         break;
0408     case F_SETPIPE_SZ:
0409     case F_GETPIPE_SZ:
0410         err = pipe_fcntl(filp, cmd, arg);
0411         break;
0412     case F_ADD_SEALS:
0413     case F_GET_SEALS:
0414         err = memfd_fcntl(filp, cmd, arg);
0415         break;
0416     case F_GET_RW_HINT:
0417     case F_SET_RW_HINT:
0418         err = fcntl_rw_hint(filp, cmd, arg);
0419         break;
0420     default:
0421         break;
0422     }
0423     return err;
0424 }
0425
0426 static int check_fcntl_cmd(unsigned cmd)
0427 {
0428     switch (cmd) {
0429     case F_DUPFD:
0430     case F_DUPFD_CLOEXEC:
0431     case F_GETFD:
0432     case F_SETFD:
0433     case F_GETFL:
0434         return 1;
0435     }
0436     return 0;
0437 }
0438
0439 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
0440 {
0441     struct fd f = fdget_raw(fd);
0442     long err = -EBADF;
0443
0444     if (!f.file)
0445         goto out;
0446
0447     if (unlikely(f.file->f_mode & FMODE_PATH)) {
0448         if (!check_fcntl_cmd(cmd))
0449             goto out1;
0450     }
0451
0452     err = security_file_fcntl(f.file, cmd, arg);
0453     if (!err)
0454         err = do_fcntl(fd, cmd, arg, f.file);
0455
0456 out1:
0457     fdput(f);
0458 out:
0459     return err;
0460 }
0461
0462 #if BITS_PER_LONG == 32
0463 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
0464         unsigned long, arg)
0465 {
0466     void __user *argp = (void __user *)arg;
0467     struct fd f = fdget_raw(fd);
0468     struct flock64 flock;
0469     long err = -EBADF;
0470
0471     if (!f.file)
0472         goto out;
0473
0474     if (unlikely(f.file->f_mode & FMODE_PATH)) {
0475         if (!check_fcntl_cmd(cmd))
0476             goto out1;
0477     }
0478
0479     err = security_file_fcntl(f.file, cmd, arg);
0480     if (err)
0481         goto out1;
0482
0483     switch (cmd) {
0484     case F_GETLK64:
0485     case F_OFD_GETLK:
0486         err = -EFAULT;
0487         if (copy_from_user(&flock, argp, sizeof(flock)))
0488             break;
0489         err = fcntl_getlk64(f.file, cmd, &flock);
0490         if (!err && copy_to_user(argp, &flock, sizeof(flock)))
0491             err = -EFAULT;
0492         break;
0493     case F_SETLK64:
0494     case F_SETLKW64:
0495     case F_OFD_SETLK:
0496     case F_OFD_SETLKW:
0497         err = -EFAULT;
0498         if (copy_from_user(&flock, argp, sizeof(flock)))
0499             break;
0500         err = fcntl_setlk64(fd, f.file, cmd, &flock);
0501         break;
0502     default:
0503         err = do_fcntl(fd, cmd, arg, f.file);
0504         break;
0505     }
0506 out1:
0507     fdput(f);
0508 out:
0509     return err;
0510 }
0511 #endif
0512
0513 #ifdef CONFIG_COMPAT
0514 /* careful - don't use anywhere else */
0515 #define copy_flock_fields(dst, src)     \
0516     (dst)->l_type = (src)->l_type;      \
0517     (dst)->l_whence = (src)->l_whence;  \
0518     (dst)->l_start = (src)->l_start;    \
0519     (dst)->l_len = (src)->l_len;        \
0520     (dst)->l_pid = (src)->l_pid;
0521
0522 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
0523 {
0524     struct compat_flock fl;
0525
0526     if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
0527         return -EFAULT;
0528     copy_flock_fields(kfl, &fl);
0529     return 0;
0530 }
0531
0532 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
0533 {
0534     struct compat_flock64 fl;
0535
0536     if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
0537         return -EFAULT;
0538     copy_flock_fields(kfl, &fl);
0539     return 0;
0540 }
0541
0542 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
0543 {
0544     struct compat_flock fl;
0545
0546     memset(&fl, 0, sizeof(struct compat_flock));
0547     copy_flock_fields(&fl, kfl);
0548     if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
0549         return -EFAULT;
0550     return 0;
0551 }
0552
0553 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
0554 {
0555     struct compat_flock64 fl;
0556
0557     BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
0558     BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
0559
0560     memset(&fl, 0, sizeof(struct compat_flock64));
0561     copy_flock_fields(&fl, kfl);
0562     if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
0563         return -EFAULT;
0564     return 0;
0565 }
0566 #undef copy_flock_fields
0567
0568 static unsigned int
0569 convert_fcntl_cmd(unsigned int cmd)
0570 {
0571     switch (cmd) {
0572     case F_GETLK64:
0573         return F_GETLK;
0574     case F_SETLK64:
0575         return F_SETLK;
0576     case F_SETLKW64:
0577         return F_SETLKW;
0578     }
0579
0580     return cmd;
0581 }
0582
0583 /*
0584  * GETLK was successful and we need to return the data, but it needs to fit in
0585  * the compat structure.
0586  * l_start shouldn't be too big, unless the original start + end is greater than
0587  * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
0588  * -EOVERFLOW in that case.  l_len could be too big, in which case we just
0589  * truncate it, and only allow the app to see that part of the conflicting lock
0590  * that might make sense to it anyway
0591  */
0592 static int fixup_compat_flock(struct flock *flock)
0593 {
0594     if (flock->l_start > COMPAT_OFF_T_MAX)
0595         return -EOVERFLOW;
0596     if (flock->l_len > COMPAT_OFF_T_MAX)
0597         flock->l_len = COMPAT_OFF_T_MAX;
0598     return 0;
0599 }
0600
0601 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
0602                  compat_ulong_t arg)
0603 {
0604     struct fd f = fdget_raw(fd);
0605     struct flock flock;
0606     long err = -EBADF;
0607
0608     if (!f.file)
0609         return err;
0610
0611     if (unlikely(f.file->f_mode & FMODE_PATH)) {
0612         if (!check_fcntl_cmd(cmd))
0613             goto out_put;
0614     }
0615
0616     err = security_file_fcntl(f.file, cmd, arg);
0617     if (err)
0618         goto out_put;
0619
0620     switch (cmd) {
0621     case F_GETLK:
0622         err = get_compat_flock(&flock, compat_ptr(arg));
0623         if (err)
0624             break;
0625         err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
0626         if (err)
0627             break;
0628         err = fixup_compat_flock(&flock);
0629         if (!err)
0630             err = put_compat_flock(&flock, compat_ptr(arg));
0631         break;
0632     case F_GETLK64:
0633     case F_OFD_GETLK:
0634         err = get_compat_flock64(&flock, compat_ptr(arg));
0635         if (err)
0636             break;
0637         err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
0638         if (!err)
0639             err = put_compat_flock64(&flock, compat_ptr(arg));
0640         break;
0641     case F_SETLK:
0642     case F_SETLKW:
0643         err = get_compat_flock(&flock, compat_ptr(arg));
0644         if (err)
0645             break;
0646         err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
0647         break;
0648     case F_SETLK64:
0649     case F_SETLKW64:
0650     case F_OFD_SETLK:
0651     case F_OFD_SETLKW:
0652         err = get_compat_flock64(&flock, compat_ptr(arg));
0653         if (err)
0654             break;
0655         err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
0656         break;
0657     default:
0658         err = do_fcntl(fd, cmd, arg, f.file);
0659         break;
0660     }
0661 out_put:
0662     fdput(f);
0663     return err;
0664 }
0665
0666 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
0667                compat_ulong_t, arg)
0668 {
0669     return do_compat_fcntl64(fd, cmd, arg);
0670 }
0671
0672 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
0673                compat_ulong_t, arg)
0674 {
0675     switch (cmd) {
0676     case F_GETLK64:
0677     case F_SETLK64:
0678     case F_SETLKW64:
0679     case F_OFD_GETLK:
0680     case F_OFD_SETLK:
0681     case F_OFD_SETLKW:
0682         return -EINVAL;
0683     }
0684     return do_compat_fcntl64(fd, cmd, arg);
0685 }
0686 #endif
0687
0688 /* Table to convert sigio signal codes into poll band bitmaps */
0689
0690 static const __poll_t band_table[NSIGPOLL] = {
0691     EPOLLIN | EPOLLRDNORM,          /* POLL_IN */
0692     EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,   /* POLL_OUT */
0693     EPOLLIN | EPOLLRDNORM | EPOLLMSG,       /* POLL_MSG */
0694     EPOLLERR,               /* POLL_ERR */
0695     EPOLLPRI | EPOLLRDBAND,         /* POLL_PRI */
0696     EPOLLHUP | EPOLLERR         /* POLL_HUP */
0697 };
0698
0699 static inline int sigio_perm(struct task_struct *p,
0700                              struct fown_struct *fown, int sig)
0701 {
0702     const struct cred *cred;
0703     int ret;
0704
0705     rcu_read_lock();
0706     cred = __task_cred(p);
0707     ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
0708         uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
0709         uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
0710            !security_file_send_sigiotask(p, fown, sig));
0711     rcu_read_unlock();
0712     return ret;
0713 }
0714
0715 static void send_sigio_to_task(struct task_struct *p,
0716                    struct fown_struct *fown,
0717                    int fd, int reason, enum pid_type type)
0718 {
0719     /*
0720      * F_SETSIG can change ->signum lockless in parallel, make
0721      * sure we read it once and use the same value throughout.
0722      */
0723     int signum = READ_ONCE(fown->signum);
0724
0725     if (!sigio_perm(p, fown, signum))
0726         return;
0727
0728     switch (signum) {
0729         default: {
0730             kernel_siginfo_t si;
0731
0732             /* Queue a rt signal with the appropriate fd as its
0733                value.  We use SI_SIGIO as the source, not
0734                SI_KERNEL, since kernel signals always get
0735                delivered even if we can't queue.  Failure to
0736                queue in this case _should_ be reported; we fall
0737                back to SIGIO in that case. --sct */
0738             clear_siginfo(&si);
0739             si.si_signo = signum;
0740             si.si_errno = 0;
0741                 si.si_code  = reason;
0742             /*
0743              * Posix definies POLL_IN and friends to be signal
0744              * specific si_codes for SIG_POLL.  Linux extended
0745              * these si_codes to other signals in a way that is
0746              * ambiguous if other signals also have signal
0747              * specific si_codes.  In that case use SI_SIGIO instead
0748              * to remove the ambiguity.
0749              */
0750             if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
0751                 si.si_code = SI_SIGIO;
0752
0753             /* Make sure we are called with one of the POLL_*
0754                reasons, otherwise we could leak kernel stack into
0755                userspace.  */
0756             BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
0757             if (reason - POLL_IN >= NSIGPOLL)
0758                 si.si_band  = ~0L;
0759             else
0760                 si.si_band = mangle_poll(band_table[reason - POLL_IN]);
0761             si.si_fd    = fd;
0762             if (!do_send_sig_info(signum, &si, p, type))
0763                 break;
0764         }
0765             fallthrough;    /* fall back on the old plain SIGIO signal */
0766         case 0:
0767             do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
0768     }
0769 }
0770
0771 void send_sigio(struct fown_struct *fown, int fd, int band)
0772 {
0773     struct task_struct *p;
0774     enum pid_type type;
0775     unsigned long flags;
0776     struct pid *pid;
0777
0778     read_lock_irqsave(&fown->lock, flags);
0779
0780     type = fown->pid_type;
0781     pid = fown->pid;
0782     if (!pid)
0783         goto out_unlock_fown;
0784
0785     if (type <= PIDTYPE_TGID) {
0786         rcu_read_lock();
0787         p = pid_task(pid, PIDTYPE_PID);
0788         if (p)
0789             send_sigio_to_task(p, fown, fd, band, type);
0790         rcu_read_unlock();
0791     } else {
0792         read_lock(&tasklist_lock);
0793         do_each_pid_task(pid, type, p) {
0794             send_sigio_to_task(p, fown, fd, band, type);
0795         } while_each_pid_task(pid, type, p);
0796         read_unlock(&tasklist_lock);
0797     }
0798  out_unlock_fown:
0799     read_unlock_irqrestore(&fown->lock, flags);
0800 }
0801
0802 static void send_sigurg_to_task(struct task_struct *p,
0803                 struct fown_struct *fown, enum pid_type type)
0804 {
0805     if (sigio_perm(p, fown, SIGURG))
0806         do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
0807 }
0808
0809 int send_sigurg(struct fown_struct *fown)
0810 {
0811     struct task_struct *p;
0812     enum pid_type type;
0813     struct pid *pid;
0814     unsigned long flags;
0815     int ret = 0;
0816
0817     read_lock_irqsave(&fown->lock, flags);
0818
0819     type = fown->pid_type;
0820     pid = fown->pid;
0821     if (!pid)
0822         goto out_unlock_fown;
0823
0824     ret = 1;
0825
0826     if (type <= PIDTYPE_TGID) {
0827         rcu_read_lock();
0828         p = pid_task(pid, PIDTYPE_PID);
0829         if (p)
0830             send_sigurg_to_task(p, fown, type);
0831         rcu_read_unlock();
0832     } else {
0833         read_lock(&tasklist_lock);
0834         do_each_pid_task(pid, type, p) {
0835             send_sigurg_to_task(p, fown, type);
0836         } while_each_pid_task(pid, type, p);
0837         read_unlock(&tasklist_lock);
0838     }
0839  out_unlock_fown:
0840     read_unlock_irqrestore(&fown->lock, flags);
0841     return ret;
0842 }
0843
0844 static DEFINE_SPINLOCK(fasync_lock);
0845 static struct kmem_cache *fasync_cache __read_mostly;
0846
0847 static void fasync_free_rcu(struct rcu_head *head)
0848 {
0849     kmem_cache_free(fasync_cache,
0850             container_of(head, struct fasync_struct, fa_rcu));
0851 }
0852
0853 /*
0854  * Remove a fasync entry. If successfully removed, return
0855  * positive and clear the FASYNC flag. If no entry exists,
0856  * do nothing and return 0.
0857  *
0858  * NOTE! It is very important that the FASYNC flag always
0859  * match the state "is the filp on a fasync list".
0860  *
0861  */
0862 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
0863 {
0864     struct fasync_struct *fa, **fp;
0865     int result = 0;
0866
0867     spin_lock(&filp->f_lock);
0868     spin_lock(&fasync_lock);
0869     for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
0870         if (fa->fa_file != filp)
0871             continue;
0872
0873         write_lock_irq(&fa->fa_lock);
0874         fa->fa_file = NULL;
0875         write_unlock_irq(&fa->fa_lock);
0876
0877         *fp = fa->fa_next;
0878         call_rcu(&fa->fa_rcu, fasync_free_rcu);
0879         filp->f_flags &= ~FASYNC;
0880         result = 1;
0881         break;
0882     }
0883     spin_unlock(&fasync_lock);
0884     spin_unlock(&filp->f_lock);
0885     return result;
0886 }
0887
0888 struct fasync_struct *fasync_alloc(void)
0889 {
0890     return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
0891 }
0892
0893 /*
0894  * NOTE! This can be used only for unused fasync entries:
0895  * entries that actually got inserted on the fasync list
0896  * need to be released by rcu - see fasync_remove_entry.
0897  */
0898 void fasync_free(struct fasync_struct *new)
0899 {
0900     kmem_cache_free(fasync_cache, new);
0901 }
0902
0903 /*
0904  * Insert a new entry into the fasync list.  Return the pointer to the
0905  * old one if we didn't use the new one.
0906  *
0907  * NOTE! It is very important that the FASYNC flag always
0908  * match the state "is the filp on a fasync list".
0909  */
0910 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
0911 {
0912         struct fasync_struct *fa, **fp;
0913
0914     spin_lock(&filp->f_lock);
0915     spin_lock(&fasync_lock);
0916     for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
0917         if (fa->fa_file != filp)
0918             continue;
0919
0920         write_lock_irq(&fa->fa_lock);
0921         fa->fa_fd = fd;
0922         write_unlock_irq(&fa->fa_lock);
0923         goto out;
0924     }
0925
0926     rwlock_init(&new->fa_lock);
0927     new->magic = FASYNC_MAGIC;
0928     new->fa_file = filp;
0929     new->fa_fd = fd;
0930     new->fa_next = *fapp;
0931     rcu_assign_pointer(*fapp, new);
0932     filp->f_flags |= FASYNC;
0933
0934 out:
0935     spin_unlock(&fasync_lock);
0936     spin_unlock(&filp->f_lock);
0937     return fa;
0938 }
0939
0940 /*
0941  * Add a fasync entry. Return negative on error, positive if
0942  * added, and zero if did nothing but change an existing one.
0943  */
0944 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
0945 {
0946     struct fasync_struct *new;
0947
0948     new = fasync_alloc();
0949     if (!new)
0950         return -ENOMEM;
0951
0952     /*
0953      * fasync_insert_entry() returns the old (update) entry if
0954      * it existed.
0955      *
0956      * So free the (unused) new entry and return 0 to let the
0957      * caller know that we didn't add any new fasync entries.
0958      */
0959     if (fasync_insert_entry(fd, filp, fapp, new)) {
0960         fasync_free(new);
0961         return 0;
0962     }
0963
0964     return 1;
0965 }
0966
0967 /*
0968  * fasync_helper() is used by almost all character device drivers
0969  * to set up the fasync queue, and for regular files by the file
0970  * lease code. It returns negative on error, 0 if it did no changes
0971  * and positive if it added/deleted the entry.
0972  */
0973 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
0974 {
0975     if (!on)
0976         return fasync_remove_entry(filp, fapp);
0977     return fasync_add_entry(fd, filp, fapp);
0978 }
0979
0980 EXPORT_SYMBOL(fasync_helper);
0981
0982 /*
0983  * rcu_read_lock() is held
0984  */
0985 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
0986 {
0987     while (fa) {
0988         struct fown_struct *fown;
0989         unsigned long flags;
0990
0991         if (fa->magic != FASYNC_MAGIC) {
0992             printk(KERN_ERR "kill_fasync: bad magic number in "
0993                    "fasync_struct!\n");
0994             return;
0995         }
0996         read_lock_irqsave(&fa->fa_lock, flags);
0997         if (fa->fa_file) {
0998             fown = &fa->fa_file->f_owner;
0999             /* Don't send SIGURG to processes which have not set a
1000                queued signum: SIGURG has its own default signalling
1001                mechanism. */
1002             if (!(sig == SIGURG && fown->signum == 0))
1003                 send_sigio(fown, fa->fa_fd, band);
1004         }
1005         read_unlock_irqrestore(&fa->fa_lock, flags);
1006         fa = rcu_dereference(fa->fa_next);
1007     }
1008 }
1009
1010 void kill_fasync(struct fasync_struct **fp, int sig, int band)
1011 {
1012     /* First a quick test without locking: usually
1013      * the list is empty.
1014      */
1015     if (*fp) {
1016         rcu_read_lock();
1017         kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1018         rcu_read_unlock();
1019     }
1020 }
1021 EXPORT_SYMBOL(kill_fasync);
1022
1023 static int __init fcntl_init(void)
1024 {
1025     /*
1026      * Please add new bits here to ensure allocation uniqueness.
1027      * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1028      * is defined as O_NONBLOCK on some platforms and not on others.
1029      */
1030     BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1031         HWEIGHT32(
1032             (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1033             __FMODE_EXEC | __FMODE_NONOTIFY));
1034
1035     fasync_cache = kmem_cache_create("fasync_cache",
1036                      sizeof(struct fasync_struct), 0,
1037                      SLAB_PANIC | SLAB_ACCOUNT, NULL);
1038     return 0;
1039 }
1040
1041 module_init(fcntl_init)