Back to home page

OSCL-LXR

 
 

    


0001 ===================
0002 Block io priorities
0003 ===================
0004 
0005 
0006 Intro
0007 -----
0008 
0009 With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
0010 priorities are supported for reads on files.  This enables users to io nice
0011 processes or process groups, similar to what has been possible with cpu
0012 scheduling for ages.  This document mainly details the current possibilities
0013 with cfq; other io schedulers do not support io priorities thus far.
0014 
0015 Scheduling classes
0016 ------------------
0017 
0018 CFQ implements three generic scheduling classes that determine how io is
0019 served for a process.
0020 
0021 IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
0022 higher priority than any other in the system, processes from this class are
0023 given first access to the disk every time. Thus it needs to be used with some
0024 care, one io RT process can starve the entire system. Within the RT class,
0025 there are 8 levels of class data that determine exactly how much time this
0026 process needs the disk for on each service. In the future this might change
0027 to be more directly mappable to performance, by passing in a wanted data
0028 rate instead.
0029 
0030 IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
0031 for any process that hasn't set a specific io priority. The class data
0032 determines how much io bandwidth the process will get, it's directly mappable
0033 to the cpu nice levels just more coarsely implemented. 0 is the highest
0034 BE prio level, 7 is the lowest. The mapping between cpu nice level and io
0035 nice level is determined as: io_nice = (cpu_nice + 20) / 5.
0036 
0037 IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
0038 level only get io time when no one else needs the disk. The idle class has no
0039 class data, since it doesn't really apply here.
0040 
0041 Tools
0042 -----
0043 
0044 See below for a sample ionice tool. Usage::
0045 
0046         # ionice -c<class> -n<level> -p<pid>
0047 
0048 If pid isn't given, the current process is assumed. IO priority settings
0049 are inherited on fork, so you can use ionice to start the process at a given
0050 level::
0051 
0052         # ionice -c2 -n0 /bin/ls
0053 
0054 will run ls at the best-effort scheduling class at the highest priority.
0055 For a running process, you can give the pid instead::
0056 
0057         # ionice -c1 -n2 -p100
0058 
0059 will change pid 100 to run at the realtime scheduling class, at priority 2.
0060 
0061 ionice.c tool::
0062 
0063   #include <stdio.h>
0064   #include <stdlib.h>
0065   #include <errno.h>
0066   #include <getopt.h>
0067   #include <unistd.h>
0068   #include <sys/ptrace.h>
0069   #include <asm/unistd.h>
0070 
0071   extern int sys_ioprio_set(int, int, int);
0072   extern int sys_ioprio_get(int, int);
0073 
0074   #if defined(__i386__)
0075   #define __NR_ioprio_set               289
0076   #define __NR_ioprio_get               290
0077   #elif defined(__ppc__)
0078   #define __NR_ioprio_set               273
0079   #define __NR_ioprio_get               274
0080   #elif defined(__x86_64__)
0081   #define __NR_ioprio_set               251
0082   #define __NR_ioprio_get               252
0083   #elif defined(__ia64__)
0084   #define __NR_ioprio_set               1274
0085   #define __NR_ioprio_get               1275
0086   #else
0087   #error "Unsupported arch"
0088   #endif
0089 
0090   static inline int ioprio_set(int which, int who, int ioprio)
0091   {
0092         return syscall(__NR_ioprio_set, which, who, ioprio);
0093   }
0094 
0095   static inline int ioprio_get(int which, int who)
0096   {
0097         return syscall(__NR_ioprio_get, which, who);
0098   }
0099 
0100   enum {
0101         IOPRIO_CLASS_NONE,
0102         IOPRIO_CLASS_RT,
0103         IOPRIO_CLASS_BE,
0104         IOPRIO_CLASS_IDLE,
0105   };
0106 
0107   enum {
0108         IOPRIO_WHO_PROCESS = 1,
0109         IOPRIO_WHO_PGRP,
0110         IOPRIO_WHO_USER,
0111   };
0112 
0113   #define IOPRIO_CLASS_SHIFT    13
0114 
0115   const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
0116 
0117   int main(int argc, char *argv[])
0118   {
0119         int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
0120         int c, pid = 0;
0121 
0122         while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
0123                 switch (c) {
0124                 case 'n':
0125                         ioprio = strtol(optarg, NULL, 10);
0126                         set = 1;
0127                         break;
0128                 case 'c':
0129                         ioprio_class = strtol(optarg, NULL, 10);
0130                         set = 1;
0131                         break;
0132                 case 'p':
0133                         pid = strtol(optarg, NULL, 10);
0134                         break;
0135                 }
0136         }
0137 
0138         switch (ioprio_class) {
0139                 case IOPRIO_CLASS_NONE:
0140                         ioprio_class = IOPRIO_CLASS_BE;
0141                         break;
0142                 case IOPRIO_CLASS_RT:
0143                 case IOPRIO_CLASS_BE:
0144                         break;
0145                 case IOPRIO_CLASS_IDLE:
0146                         ioprio = 7;
0147                         break;
0148                 default:
0149                         printf("bad prio class %d\n", ioprio_class);
0150                         return 1;
0151         }
0152 
0153         if (!set) {
0154                 if (!pid && argv[optind])
0155                         pid = strtol(argv[optind], NULL, 10);
0156 
0157                 ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
0158 
0159                 printf("pid=%d, %d\n", pid, ioprio);
0160 
0161                 if (ioprio == -1)
0162                         perror("ioprio_get");
0163                 else {
0164                         ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
0165                         ioprio = ioprio & 0xff;
0166                         printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
0167                 }
0168         } else {
0169                 if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
0170                         perror("ioprio_set");
0171                         return 1;
0172                 }
0173 
0174                 if (argv[optind])
0175                         execvp(argv[optind], &argv[optind]);
0176         }
0177 
0178         return 0;
0179   }
0180 
0181 
0182 March 11 2005, Jens Axboe <jens.axboe@oracle.com>