Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * fs/ioprio.c
0004  *
0005  * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
0006  *
0007  * Helper functions for setting/querying io priorities of processes. The
0008  * system calls closely mimmick getpriority/setpriority, see the man page for
0009  * those. The prio argument is a composite of prio class and prio data, where
0010  * the data argument has meaning within that class. The standard scheduling
0011  * classes have 8 distinct prio levels, with 0 being the highest prio and 7
0012  * being the lowest.
0013  *
0014  * IOW, setting BE scheduling class with prio 2 is done ala:
0015  *
0016  * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
0017  *
0018  * ioprio_set(PRIO_PROCESS, pid, prio);
0019  *
0020  * See also Documentation/block/ioprio.rst
0021  *
0022  */
0023 #include <linux/gfp.h>
0024 #include <linux/kernel.h>
0025 #include <linux/ioprio.h>
0026 #include <linux/cred.h>
0027 #include <linux/blkdev.h>
0028 #include <linux/capability.h>
0029 #include <linux/syscalls.h>
0030 #include <linux/security.h>
0031 #include <linux/pid_namespace.h>
0032 
0033 int ioprio_check_cap(int ioprio)
0034 {
0035     int class = IOPRIO_PRIO_CLASS(ioprio);
0036     int data = IOPRIO_PRIO_DATA(ioprio);
0037 
0038     switch (class) {
0039         case IOPRIO_CLASS_RT:
0040             /*
0041              * Originally this only checked for CAP_SYS_ADMIN,
0042              * which was implicitly allowed for pid 0 by security
0043              * modules such as SELinux. Make sure we check
0044              * CAP_SYS_ADMIN first to avoid a denial/avc for
0045              * possibly missing CAP_SYS_NICE permission.
0046              */
0047             if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
0048                 return -EPERM;
0049             fallthrough;
0050             /* rt has prio field too */
0051         case IOPRIO_CLASS_BE:
0052             if (data >= IOPRIO_NR_LEVELS || data < 0)
0053                 return -EINVAL;
0054             break;
0055         case IOPRIO_CLASS_IDLE:
0056             break;
0057         case IOPRIO_CLASS_NONE:
0058             if (data)
0059                 return -EINVAL;
0060             break;
0061         default:
0062             return -EINVAL;
0063     }
0064 
0065     return 0;
0066 }
0067 
0068 SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
0069 {
0070     struct task_struct *p, *g;
0071     struct user_struct *user;
0072     struct pid *pgrp;
0073     kuid_t uid;
0074     int ret;
0075 
0076     ret = ioprio_check_cap(ioprio);
0077     if (ret)
0078         return ret;
0079 
0080     ret = -ESRCH;
0081     rcu_read_lock();
0082     switch (which) {
0083         case IOPRIO_WHO_PROCESS:
0084             if (!who)
0085                 p = current;
0086             else
0087                 p = find_task_by_vpid(who);
0088             if (p)
0089                 ret = set_task_ioprio(p, ioprio);
0090             break;
0091         case IOPRIO_WHO_PGRP:
0092             if (!who)
0093                 pgrp = task_pgrp(current);
0094             else
0095                 pgrp = find_vpid(who);
0096 
0097             read_lock(&tasklist_lock);
0098             do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
0099                 ret = set_task_ioprio(p, ioprio);
0100                 if (ret) {
0101                     read_unlock(&tasklist_lock);
0102                     goto out;
0103                 }
0104             } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
0105             read_unlock(&tasklist_lock);
0106 
0107             break;
0108         case IOPRIO_WHO_USER:
0109             uid = make_kuid(current_user_ns(), who);
0110             if (!uid_valid(uid))
0111                 break;
0112             if (!who)
0113                 user = current_user();
0114             else
0115                 user = find_user(uid);
0116 
0117             if (!user)
0118                 break;
0119 
0120             for_each_process_thread(g, p) {
0121                 if (!uid_eq(task_uid(p), uid) ||
0122                     !task_pid_vnr(p))
0123                     continue;
0124                 ret = set_task_ioprio(p, ioprio);
0125                 if (ret)
0126                     goto free_uid;
0127             }
0128 free_uid:
0129             if (who)
0130                 free_uid(user);
0131             break;
0132         default:
0133             ret = -EINVAL;
0134     }
0135 
0136 out:
0137     rcu_read_unlock();
0138     return ret;
0139 }
0140 
0141 /*
0142  * If the task has set an I/O priority, use that. Otherwise, return
0143  * the default I/O priority.
0144  *
0145  * Expected to be called for current task or with task_lock() held to keep
0146  * io_context stable.
0147  */
0148 int __get_task_ioprio(struct task_struct *p)
0149 {
0150     struct io_context *ioc = p->io_context;
0151     int prio;
0152 
0153     if (p != current)
0154         lockdep_assert_held(&p->alloc_lock);
0155     if (ioc)
0156         prio = ioc->ioprio;
0157     else
0158         prio = IOPRIO_DEFAULT;
0159 
0160     if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
0161         prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
0162                      task_nice_ioprio(p));
0163     return prio;
0164 }
0165 EXPORT_SYMBOL_GPL(__get_task_ioprio);
0166 
0167 static int get_task_ioprio(struct task_struct *p)
0168 {
0169     int ret;
0170 
0171     ret = security_task_getioprio(p);
0172     if (ret)
0173         goto out;
0174     task_lock(p);
0175     ret = __get_task_ioprio(p);
0176     task_unlock(p);
0177 out:
0178     return ret;
0179 }
0180 
0181 /*
0182  * Return raw IO priority value as set by userspace. We use this for
0183  * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and
0184  * also so that userspace can distinguish unset IO priority (which just gets
0185  * overriden based on task's nice value) from IO priority set to some value.
0186  */
0187 static int get_task_raw_ioprio(struct task_struct *p)
0188 {
0189     int ret;
0190 
0191     ret = security_task_getioprio(p);
0192     if (ret)
0193         goto out;
0194     task_lock(p);
0195     if (p->io_context)
0196         ret = p->io_context->ioprio;
0197     else
0198         ret = IOPRIO_DEFAULT;
0199     task_unlock(p);
0200 out:
0201     return ret;
0202 }
0203 
0204 static int ioprio_best(unsigned short aprio, unsigned short bprio)
0205 {
0206     return min(aprio, bprio);
0207 }
0208 
0209 SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
0210 {
0211     struct task_struct *g, *p;
0212     struct user_struct *user;
0213     struct pid *pgrp;
0214     kuid_t uid;
0215     int ret = -ESRCH;
0216     int tmpio;
0217 
0218     rcu_read_lock();
0219     switch (which) {
0220         case IOPRIO_WHO_PROCESS:
0221             if (!who)
0222                 p = current;
0223             else
0224                 p = find_task_by_vpid(who);
0225             if (p)
0226                 ret = get_task_raw_ioprio(p);
0227             break;
0228         case IOPRIO_WHO_PGRP:
0229             if (!who)
0230                 pgrp = task_pgrp(current);
0231             else
0232                 pgrp = find_vpid(who);
0233             read_lock(&tasklist_lock);
0234             do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
0235                 tmpio = get_task_ioprio(p);
0236                 if (tmpio < 0)
0237                     continue;
0238                 if (ret == -ESRCH)
0239                     ret = tmpio;
0240                 else
0241                     ret = ioprio_best(ret, tmpio);
0242             } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
0243             read_unlock(&tasklist_lock);
0244 
0245             break;
0246         case IOPRIO_WHO_USER:
0247             uid = make_kuid(current_user_ns(), who);
0248             if (!who)
0249                 user = current_user();
0250             else
0251                 user = find_user(uid);
0252 
0253             if (!user)
0254                 break;
0255 
0256             for_each_process_thread(g, p) {
0257                 if (!uid_eq(task_uid(p), user->uid) ||
0258                     !task_pid_vnr(p))
0259                     continue;
0260                 tmpio = get_task_ioprio(p);
0261                 if (tmpio < 0)
0262                     continue;
0263                 if (ret == -ESRCH)
0264                     ret = tmpio;
0265                 else
0266                     ret = ioprio_best(ret, tmpio);
0267             }
0268 
0269             if (who)
0270                 free_uid(user);
0271             break;
0272         default:
0273             ret = -EINVAL;
0274     }
0275 
0276     rcu_read_unlock();
0277     return ret;
0278 }