0001 ===================
0002 Block io priorities
0003 ===================
0004
0005
0006 Intro
0007 -----
0008
0009 With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
0010 priorities are supported for reads on files. This enables users to io nice
0011 processes or process groups, similar to what has been possible with cpu
0012 scheduling for ages. This document mainly details the current possibilities
0013 with cfq; other io schedulers do not support io priorities thus far.
0014
0015 Scheduling classes
0016 ------------------
0017
0018 CFQ implements three generic scheduling classes that determine how io is
0019 served for a process.
0020
0021 IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
0022 higher priority than any other in the system, processes from this class are
0023 given first access to the disk every time. Thus it needs to be used with some
0024 care, one io RT process can starve the entire system. Within the RT class,
0025 there are 8 levels of class data that determine exactly how much time this
0026 process needs the disk for on each service. In the future this might change
0027 to be more directly mappable to performance, by passing in a wanted data
0028 rate instead.
0029
0030 IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
0031 for any process that hasn't set a specific io priority. The class data
0032 determines how much io bandwidth the process will get, it's directly mappable
0033 to the cpu nice levels just more coarsely implemented. 0 is the highest
0034 BE prio level, 7 is the lowest. The mapping between cpu nice level and io
0035 nice level is determined as: io_nice = (cpu_nice + 20) / 5.
0036
0037 IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
0038 level only get io time when no one else needs the disk. The idle class has no
0039 class data, since it doesn't really apply here.
0040
0041 Tools
0042 -----
0043
0044 See below for a sample ionice tool. Usage::
0045
0046 # ionice -c<class> -n<level> -p<pid>
0047
0048 If pid isn't given, the current process is assumed. IO priority settings
0049 are inherited on fork, so you can use ionice to start the process at a given
0050 level::
0051
0052 # ionice -c2 -n0 /bin/ls
0053
0054 will run ls at the best-effort scheduling class at the highest priority.
0055 For a running process, you can give the pid instead::
0056
0057 # ionice -c1 -n2 -p100
0058
0059 will change pid 100 to run at the realtime scheduling class, at priority 2.
0060
0061 ionice.c tool::
0062
0063 #include <stdio.h>
0064 #include <stdlib.h>
0065 #include <errno.h>
0066 #include <getopt.h>
0067 #include <unistd.h>
0068 #include <sys/ptrace.h>
0069 #include <asm/unistd.h>
0070
0071 extern int sys_ioprio_set(int, int, int);
0072 extern int sys_ioprio_get(int, int);
0073
0074 #if defined(__i386__)
0075 #define __NR_ioprio_set 289
0076 #define __NR_ioprio_get 290
0077 #elif defined(__ppc__)
0078 #define __NR_ioprio_set 273
0079 #define __NR_ioprio_get 274
0080 #elif defined(__x86_64__)
0081 #define __NR_ioprio_set 251
0082 #define __NR_ioprio_get 252
0083 #elif defined(__ia64__)
0084 #define __NR_ioprio_set 1274
0085 #define __NR_ioprio_get 1275
0086 #else
0087 #error "Unsupported arch"
0088 #endif
0089
0090 static inline int ioprio_set(int which, int who, int ioprio)
0091 {
0092 return syscall(__NR_ioprio_set, which, who, ioprio);
0093 }
0094
0095 static inline int ioprio_get(int which, int who)
0096 {
0097 return syscall(__NR_ioprio_get, which, who);
0098 }
0099
0100 enum {
0101 IOPRIO_CLASS_NONE,
0102 IOPRIO_CLASS_RT,
0103 IOPRIO_CLASS_BE,
0104 IOPRIO_CLASS_IDLE,
0105 };
0106
0107 enum {
0108 IOPRIO_WHO_PROCESS = 1,
0109 IOPRIO_WHO_PGRP,
0110 IOPRIO_WHO_USER,
0111 };
0112
0113 #define IOPRIO_CLASS_SHIFT 13
0114
0115 const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
0116
0117 int main(int argc, char *argv[])
0118 {
0119 int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
0120 int c, pid = 0;
0121
0122 while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
0123 switch (c) {
0124 case 'n':
0125 ioprio = strtol(optarg, NULL, 10);
0126 set = 1;
0127 break;
0128 case 'c':
0129 ioprio_class = strtol(optarg, NULL, 10);
0130 set = 1;
0131 break;
0132 case 'p':
0133 pid = strtol(optarg, NULL, 10);
0134 break;
0135 }
0136 }
0137
0138 switch (ioprio_class) {
0139 case IOPRIO_CLASS_NONE:
0140 ioprio_class = IOPRIO_CLASS_BE;
0141 break;
0142 case IOPRIO_CLASS_RT:
0143 case IOPRIO_CLASS_BE:
0144 break;
0145 case IOPRIO_CLASS_IDLE:
0146 ioprio = 7;
0147 break;
0148 default:
0149 printf("bad prio class %d\n", ioprio_class);
0150 return 1;
0151 }
0152
0153 if (!set) {
0154 if (!pid && argv[optind])
0155 pid = strtol(argv[optind], NULL, 10);
0156
0157 ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
0158
0159 printf("pid=%d, %d\n", pid, ioprio);
0160
0161 if (ioprio == -1)
0162 perror("ioprio_get");
0163 else {
0164 ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
0165 ioprio = ioprio & 0xff;
0166 printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
0167 }
0168 } else {
0169 if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
0170 perror("ioprio_set");
0171 return 1;
0172 }
0173
0174 if (argv[optind])
0175 execvp(argv[optind], &argv[optind]);
0176 }
0177
0178 return 0;
0179 }
0180
0181
0182 March 11 2005, Jens Axboe <jens.axboe@oracle.com>