Back to home page

LXR

 
 

    


0001 /*
0002  * device_cgroup.c - device cgroup subsystem
0003  *
0004  * Copyright 2007 IBM Corp
0005  */
0006 
0007 #include <linux/device_cgroup.h>
0008 #include <linux/cgroup.h>
0009 #include <linux/ctype.h>
0010 #include <linux/list.h>
0011 #include <linux/uaccess.h>
0012 #include <linux/seq_file.h>
0013 #include <linux/slab.h>
0014 #include <linux/rcupdate.h>
0015 #include <linux/mutex.h>
0016 
0017 #define ACC_MKNOD 1
0018 #define ACC_READ  2
0019 #define ACC_WRITE 4
0020 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
0021 
0022 #define DEV_BLOCK 1
0023 #define DEV_CHAR  2
0024 #define DEV_ALL   4  /* this represents all devices */
0025 
0026 static DEFINE_MUTEX(devcgroup_mutex);
0027 
0028 enum devcg_behavior {
0029     DEVCG_DEFAULT_NONE,
0030     DEVCG_DEFAULT_ALLOW,
0031     DEVCG_DEFAULT_DENY,
0032 };
0033 
0034 /*
0035  * exception list locking rules:
0036  * hold devcgroup_mutex for update/read.
0037  * hold rcu_read_lock() for read.
0038  */
0039 
0040 struct dev_exception_item {
0041     u32 major, minor;
0042     short type;
0043     short access;
0044     struct list_head list;
0045     struct rcu_head rcu;
0046 };
0047 
0048 struct dev_cgroup {
0049     struct cgroup_subsys_state css;
0050     struct list_head exceptions;
0051     enum devcg_behavior behavior;
0052 };
0053 
0054 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
0055 {
0056     return s ? container_of(s, struct dev_cgroup, css) : NULL;
0057 }
0058 
0059 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
0060 {
0061     return css_to_devcgroup(task_css(task, devices_cgrp_id));
0062 }
0063 
0064 /*
0065  * called under devcgroup_mutex
0066  */
0067 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
0068 {
0069     struct dev_exception_item *ex, *tmp, *new;
0070 
0071     lockdep_assert_held(&devcgroup_mutex);
0072 
0073     list_for_each_entry(ex, orig, list) {
0074         new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
0075         if (!new)
0076             goto free_and_exit;
0077         list_add_tail(&new->list, dest);
0078     }
0079 
0080     return 0;
0081 
0082 free_and_exit:
0083     list_for_each_entry_safe(ex, tmp, dest, list) {
0084         list_del(&ex->list);
0085         kfree(ex);
0086     }
0087     return -ENOMEM;
0088 }
0089 
0090 /*
0091  * called under devcgroup_mutex
0092  */
0093 static int dev_exception_add(struct dev_cgroup *dev_cgroup,
0094                  struct dev_exception_item *ex)
0095 {
0096     struct dev_exception_item *excopy, *walk;
0097 
0098     lockdep_assert_held(&devcgroup_mutex);
0099 
0100     excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
0101     if (!excopy)
0102         return -ENOMEM;
0103 
0104     list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
0105         if (walk->type != ex->type)
0106             continue;
0107         if (walk->major != ex->major)
0108             continue;
0109         if (walk->minor != ex->minor)
0110             continue;
0111 
0112         walk->access |= ex->access;
0113         kfree(excopy);
0114         excopy = NULL;
0115     }
0116 
0117     if (excopy != NULL)
0118         list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
0119     return 0;
0120 }
0121 
0122 /*
0123  * called under devcgroup_mutex
0124  */
0125 static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
0126                  struct dev_exception_item *ex)
0127 {
0128     struct dev_exception_item *walk, *tmp;
0129 
0130     lockdep_assert_held(&devcgroup_mutex);
0131 
0132     list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
0133         if (walk->type != ex->type)
0134             continue;
0135         if (walk->major != ex->major)
0136             continue;
0137         if (walk->minor != ex->minor)
0138             continue;
0139 
0140         walk->access &= ~ex->access;
0141         if (!walk->access) {
0142             list_del_rcu(&walk->list);
0143             kfree_rcu(walk, rcu);
0144         }
0145     }
0146 }
0147 
0148 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
0149 {
0150     struct dev_exception_item *ex, *tmp;
0151 
0152     list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
0153         list_del_rcu(&ex->list);
0154         kfree_rcu(ex, rcu);
0155     }
0156 }
0157 
0158 /**
0159  * dev_exception_clean - frees all entries of the exception list
0160  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
0161  *
0162  * called under devcgroup_mutex
0163  */
0164 static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
0165 {
0166     lockdep_assert_held(&devcgroup_mutex);
0167 
0168     __dev_exception_clean(dev_cgroup);
0169 }
0170 
0171 static inline bool is_devcg_online(const struct dev_cgroup *devcg)
0172 {
0173     return (devcg->behavior != DEVCG_DEFAULT_NONE);
0174 }
0175 
0176 /**
0177  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
0178  *            parent's
0179  * @css: css getting online
0180  * returns 0 in case of success, error code otherwise
0181  */
0182 static int devcgroup_online(struct cgroup_subsys_state *css)
0183 {
0184     struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
0185     struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
0186     int ret = 0;
0187 
0188     mutex_lock(&devcgroup_mutex);
0189 
0190     if (parent_dev_cgroup == NULL)
0191         dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
0192     else {
0193         ret = dev_exceptions_copy(&dev_cgroup->exceptions,
0194                       &parent_dev_cgroup->exceptions);
0195         if (!ret)
0196             dev_cgroup->behavior = parent_dev_cgroup->behavior;
0197     }
0198     mutex_unlock(&devcgroup_mutex);
0199 
0200     return ret;
0201 }
0202 
0203 static void devcgroup_offline(struct cgroup_subsys_state *css)
0204 {
0205     struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
0206 
0207     mutex_lock(&devcgroup_mutex);
0208     dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
0209     mutex_unlock(&devcgroup_mutex);
0210 }
0211 
0212 /*
0213  * called from kernel/cgroup.c with cgroup_lock() held.
0214  */
0215 static struct cgroup_subsys_state *
0216 devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
0217 {
0218     struct dev_cgroup *dev_cgroup;
0219 
0220     dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
0221     if (!dev_cgroup)
0222         return ERR_PTR(-ENOMEM);
0223     INIT_LIST_HEAD(&dev_cgroup->exceptions);
0224     dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
0225 
0226     return &dev_cgroup->css;
0227 }
0228 
0229 static void devcgroup_css_free(struct cgroup_subsys_state *css)
0230 {
0231     struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
0232 
0233     __dev_exception_clean(dev_cgroup);
0234     kfree(dev_cgroup);
0235 }
0236 
0237 #define DEVCG_ALLOW 1
0238 #define DEVCG_DENY 2
0239 #define DEVCG_LIST 3
0240 
0241 #define MAJMINLEN 13
0242 #define ACCLEN 4
0243 
0244 static void set_access(char *acc, short access)
0245 {
0246     int idx = 0;
0247     memset(acc, 0, ACCLEN);
0248     if (access & ACC_READ)
0249         acc[idx++] = 'r';
0250     if (access & ACC_WRITE)
0251         acc[idx++] = 'w';
0252     if (access & ACC_MKNOD)
0253         acc[idx++] = 'm';
0254 }
0255 
0256 static char type_to_char(short type)
0257 {
0258     if (type == DEV_ALL)
0259         return 'a';
0260     if (type == DEV_CHAR)
0261         return 'c';
0262     if (type == DEV_BLOCK)
0263         return 'b';
0264     return 'X';
0265 }
0266 
0267 static void set_majmin(char *str, unsigned m)
0268 {
0269     if (m == ~0)
0270         strcpy(str, "*");
0271     else
0272         sprintf(str, "%u", m);
0273 }
0274 
0275 static int devcgroup_seq_show(struct seq_file *m, void *v)
0276 {
0277     struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
0278     struct dev_exception_item *ex;
0279     char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
0280 
0281     rcu_read_lock();
0282     /*
0283      * To preserve the compatibility:
0284      * - Only show the "all devices" when the default policy is to allow
0285      * - List the exceptions in case the default policy is to deny
0286      * This way, the file remains as a "whitelist of devices"
0287      */
0288     if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
0289         set_access(acc, ACC_MASK);
0290         set_majmin(maj, ~0);
0291         set_majmin(min, ~0);
0292         seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
0293                maj, min, acc);
0294     } else {
0295         list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
0296             set_access(acc, ex->access);
0297             set_majmin(maj, ex->major);
0298             set_majmin(min, ex->minor);
0299             seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
0300                    maj, min, acc);
0301         }
0302     }
0303     rcu_read_unlock();
0304 
0305     return 0;
0306 }
0307 
0308 /**
0309  * match_exception  - iterates the exception list trying to find a complete match
0310  * @exceptions: list of exceptions
0311  * @type: device type (DEV_BLOCK or DEV_CHAR)
0312  * @major: device file major number, ~0 to match all
0313  * @minor: device file minor number, ~0 to match all
0314  * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
0315  *
0316  * It is considered a complete match if an exception is found that will
0317  * contain the entire range of provided parameters.
0318  *
0319  * Return: true in case it matches an exception completely
0320  */
0321 static bool match_exception(struct list_head *exceptions, short type,
0322                 u32 major, u32 minor, short access)
0323 {
0324     struct dev_exception_item *ex;
0325 
0326     list_for_each_entry_rcu(ex, exceptions, list) {
0327         if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
0328             continue;
0329         if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
0330             continue;
0331         if (ex->major != ~0 && ex->major != major)
0332             continue;
0333         if (ex->minor != ~0 && ex->minor != minor)
0334             continue;
0335         /* provided access cannot have more than the exception rule */
0336         if (access & (~ex->access))
0337             continue;
0338         return true;
0339     }
0340     return false;
0341 }
0342 
0343 /**
0344  * match_exception_partial - iterates the exception list trying to find a partial match
0345  * @exceptions: list of exceptions
0346  * @type: device type (DEV_BLOCK or DEV_CHAR)
0347  * @major: device file major number, ~0 to match all
0348  * @minor: device file minor number, ~0 to match all
0349  * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
0350  *
0351  * It is considered a partial match if an exception's range is found to
0352  * contain *any* of the devices specified by provided parameters. This is
0353  * used to make sure no extra access is being granted that is forbidden by
0354  * any of the exception list.
0355  *
0356  * Return: true in case the provided range mat matches an exception completely
0357  */
0358 static bool match_exception_partial(struct list_head *exceptions, short type,
0359                     u32 major, u32 minor, short access)
0360 {
0361     struct dev_exception_item *ex;
0362 
0363     list_for_each_entry_rcu(ex, exceptions, list) {
0364         if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
0365             continue;
0366         if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
0367             continue;
0368         /*
0369          * We must be sure that both the exception and the provided
0370          * range aren't masking all devices
0371          */
0372         if (ex->major != ~0 && major != ~0 && ex->major != major)
0373             continue;
0374         if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
0375             continue;
0376         /*
0377          * In order to make sure the provided range isn't matching
0378          * an exception, all its access bits shouldn't match the
0379          * exception's access bits
0380          */
0381         if (!(access & ex->access))
0382             continue;
0383         return true;
0384     }
0385     return false;
0386 }
0387 
0388 /**
0389  * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
0390  * @dev_cgroup: dev cgroup to be tested against
0391  * @refex: new exception
0392  * @behavior: behavior of the exception's dev_cgroup
0393  *
0394  * This is used to make sure a child cgroup won't have more privileges
0395  * than its parent
0396  */
0397 static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
0398                   struct dev_exception_item *refex,
0399                   enum devcg_behavior behavior)
0400 {
0401     bool match = false;
0402 
0403     RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
0404              !lockdep_is_held(&devcgroup_mutex),
0405              "device_cgroup:verify_new_ex called without proper synchronization");
0406 
0407     if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
0408         if (behavior == DEVCG_DEFAULT_ALLOW) {
0409             /*
0410              * new exception in the child doesn't matter, only
0411              * adding extra restrictions
0412              */ 
0413             return true;
0414         } else {
0415             /*
0416              * new exception in the child will add more devices
0417              * that can be acessed, so it can't match any of
0418              * parent's exceptions, even slightly
0419              */ 
0420             match = match_exception_partial(&dev_cgroup->exceptions,
0421                             refex->type,
0422                             refex->major,
0423                             refex->minor,
0424                             refex->access);
0425 
0426             if (match)
0427                 return false;
0428             return true;
0429         }
0430     } else {
0431         /*
0432          * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
0433          * the new exception will add access to more devices and must
0434          * be contained completely in an parent's exception to be
0435          * allowed
0436          */
0437         match = match_exception(&dev_cgroup->exceptions, refex->type,
0438                     refex->major, refex->minor,
0439                     refex->access);
0440 
0441         if (match)
0442             /* parent has an exception that matches the proposed */
0443             return true;
0444         else
0445             return false;
0446     }
0447     return false;
0448 }
0449 
0450 /*
0451  * parent_has_perm:
0452  * when adding a new allow rule to a device exception list, the rule
0453  * must be allowed in the parent device
0454  */
0455 static int parent_has_perm(struct dev_cgroup *childcg,
0456                   struct dev_exception_item *ex)
0457 {
0458     struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
0459 
0460     if (!parent)
0461         return 1;
0462     return verify_new_ex(parent, ex, childcg->behavior);
0463 }
0464 
0465 /**
0466  * parent_allows_removal - verify if it's ok to remove an exception
0467  * @childcg: child cgroup from where the exception will be removed
0468  * @ex: exception being removed
0469  *
0470  * When removing an exception in cgroups with default ALLOW policy, it must
0471  * be checked if removing it will give the child cgroup more access than the
0472  * parent.
0473  *
0474  * Return: true if it's ok to remove exception, false otherwise
0475  */
0476 static bool parent_allows_removal(struct dev_cgroup *childcg,
0477                   struct dev_exception_item *ex)
0478 {
0479     struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
0480 
0481     if (!parent)
0482         return true;
0483 
0484     /* It's always allowed to remove access to devices */
0485     if (childcg->behavior == DEVCG_DEFAULT_DENY)
0486         return true;
0487 
0488     /*
0489      * Make sure you're not removing part or a whole exception existing in
0490      * the parent cgroup
0491      */
0492     return !match_exception_partial(&parent->exceptions, ex->type,
0493                     ex->major, ex->minor, ex->access);
0494 }
0495 
0496 /**
0497  * may_allow_all - checks if it's possible to change the behavior to
0498  *         allow based on parent's rules.
0499  * @parent: device cgroup's parent
0500  * returns: != 0 in case it's allowed, 0 otherwise
0501  */
0502 static inline int may_allow_all(struct dev_cgroup *parent)
0503 {
0504     if (!parent)
0505         return 1;
0506     return parent->behavior == DEVCG_DEFAULT_ALLOW;
0507 }
0508 
0509 /**
0510  * revalidate_active_exceptions - walks through the active exception list and
0511  *                revalidates the exceptions based on parent's
0512  *                behavior and exceptions. The exceptions that
0513  *                are no longer valid will be removed.
0514  *                Called with devcgroup_mutex held.
0515  * @devcg: cgroup which exceptions will be checked
0516  *
0517  * This is one of the three key functions for hierarchy implementation.
0518  * This function is responsible for re-evaluating all the cgroup's active
0519  * exceptions due to a parent's exception change.
0520  * Refer to Documentation/cgroups/devices.txt for more details.
0521  */
0522 static void revalidate_active_exceptions(struct dev_cgroup *devcg)
0523 {
0524     struct dev_exception_item *ex;
0525     struct list_head *this, *tmp;
0526 
0527     list_for_each_safe(this, tmp, &devcg->exceptions) {
0528         ex = container_of(this, struct dev_exception_item, list);
0529         if (!parent_has_perm(devcg, ex))
0530             dev_exception_rm(devcg, ex);
0531     }
0532 }
0533 
0534 /**
0535  * propagate_exception - propagates a new exception to the children
0536  * @devcg_root: device cgroup that added a new exception
0537  * @ex: new exception to be propagated
0538  *
0539  * returns: 0 in case of success, != 0 in case of error
0540  */
0541 static int propagate_exception(struct dev_cgroup *devcg_root,
0542                    struct dev_exception_item *ex)
0543 {
0544     struct cgroup_subsys_state *pos;
0545     int rc = 0;
0546 
0547     rcu_read_lock();
0548 
0549     css_for_each_descendant_pre(pos, &devcg_root->css) {
0550         struct dev_cgroup *devcg = css_to_devcgroup(pos);
0551 
0552         /*
0553          * Because devcgroup_mutex is held, no devcg will become
0554          * online or offline during the tree walk (see on/offline
0555          * methods), and online ones are safe to access outside RCU
0556          * read lock without bumping refcnt.
0557          */
0558         if (pos == &devcg_root->css || !is_devcg_online(devcg))
0559             continue;
0560 
0561         rcu_read_unlock();
0562 
0563         /*
0564          * in case both root's behavior and devcg is allow, a new
0565          * restriction means adding to the exception list
0566          */
0567         if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
0568             devcg->behavior == DEVCG_DEFAULT_ALLOW) {
0569             rc = dev_exception_add(devcg, ex);
0570             if (rc)
0571                 break;
0572         } else {
0573             /*
0574              * in the other possible cases:
0575              * root's behavior: allow, devcg's: deny
0576              * root's behavior: deny, devcg's: deny
0577              * the exception will be removed
0578              */
0579             dev_exception_rm(devcg, ex);
0580         }
0581         revalidate_active_exceptions(devcg);
0582 
0583         rcu_read_lock();
0584     }
0585 
0586     rcu_read_unlock();
0587     return rc;
0588 }
0589 
0590 /*
0591  * Modify the exception list using allow/deny rules.
0592  * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
0593  * so we can give a container CAP_MKNOD to let it create devices but not
0594  * modify the exception list.
0595  * It seems likely we'll want to add a CAP_CONTAINER capability to allow
0596  * us to also grant CAP_SYS_ADMIN to containers without giving away the
0597  * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
0598  *
0599  * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
0600  * new access is only allowed if you're in the top-level cgroup, or your
0601  * parent cgroup has the access you're asking for.
0602  */
0603 static int devcgroup_update_access(struct dev_cgroup *devcgroup,
0604                    int filetype, char *buffer)
0605 {
0606     const char *b;
0607     char temp[12];      /* 11 + 1 characters needed for a u32 */
0608     int count, rc = 0;
0609     struct dev_exception_item ex;
0610     struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
0611 
0612     if (!capable(CAP_SYS_ADMIN))
0613         return -EPERM;
0614 
0615     memset(&ex, 0, sizeof(ex));
0616     b = buffer;
0617 
0618     switch (*b) {
0619     case 'a':
0620         switch (filetype) {
0621         case DEVCG_ALLOW:
0622             if (css_has_online_children(&devcgroup->css))
0623                 return -EINVAL;
0624 
0625             if (!may_allow_all(parent))
0626                 return -EPERM;
0627             dev_exception_clean(devcgroup);
0628             devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
0629             if (!parent)
0630                 break;
0631 
0632             rc = dev_exceptions_copy(&devcgroup->exceptions,
0633                          &parent->exceptions);
0634             if (rc)
0635                 return rc;
0636             break;
0637         case DEVCG_DENY:
0638             if (css_has_online_children(&devcgroup->css))
0639                 return -EINVAL;
0640 
0641             dev_exception_clean(devcgroup);
0642             devcgroup->behavior = DEVCG_DEFAULT_DENY;
0643             break;
0644         default:
0645             return -EINVAL;
0646         }
0647         return 0;
0648     case 'b':
0649         ex.type = DEV_BLOCK;
0650         break;
0651     case 'c':
0652         ex.type = DEV_CHAR;
0653         break;
0654     default:
0655         return -EINVAL;
0656     }
0657     b++;
0658     if (!isspace(*b))
0659         return -EINVAL;
0660     b++;
0661     if (*b == '*') {
0662         ex.major = ~0;
0663         b++;
0664     } else if (isdigit(*b)) {
0665         memset(temp, 0, sizeof(temp));
0666         for (count = 0; count < sizeof(temp) - 1; count++) {
0667             temp[count] = *b;
0668             b++;
0669             if (!isdigit(*b))
0670                 break;
0671         }
0672         rc = kstrtou32(temp, 10, &ex.major);
0673         if (rc)
0674             return -EINVAL;
0675     } else {
0676         return -EINVAL;
0677     }
0678     if (*b != ':')
0679         return -EINVAL;
0680     b++;
0681 
0682     /* read minor */
0683     if (*b == '*') {
0684         ex.minor = ~0;
0685         b++;
0686     } else if (isdigit(*b)) {
0687         memset(temp, 0, sizeof(temp));
0688         for (count = 0; count < sizeof(temp) - 1; count++) {
0689             temp[count] = *b;
0690             b++;
0691             if (!isdigit(*b))
0692                 break;
0693         }
0694         rc = kstrtou32(temp, 10, &ex.minor);
0695         if (rc)
0696             return -EINVAL;
0697     } else {
0698         return -EINVAL;
0699     }
0700     if (!isspace(*b))
0701         return -EINVAL;
0702     for (b++, count = 0; count < 3; count++, b++) {
0703         switch (*b) {
0704         case 'r':
0705             ex.access |= ACC_READ;
0706             break;
0707         case 'w':
0708             ex.access |= ACC_WRITE;
0709             break;
0710         case 'm':
0711             ex.access |= ACC_MKNOD;
0712             break;
0713         case '\n':
0714         case '\0':
0715             count = 3;
0716             break;
0717         default:
0718             return -EINVAL;
0719         }
0720     }
0721 
0722     switch (filetype) {
0723     case DEVCG_ALLOW:
0724         /*
0725          * If the default policy is to allow by default, try to remove
0726          * an matching exception instead. And be silent about it: we
0727          * don't want to break compatibility
0728          */
0729         if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
0730             /* Check if the parent allows removing it first */
0731             if (!parent_allows_removal(devcgroup, &ex))
0732                 return -EPERM;
0733             dev_exception_rm(devcgroup, &ex);
0734             break;
0735         }
0736 
0737         if (!parent_has_perm(devcgroup, &ex))
0738             return -EPERM;
0739         rc = dev_exception_add(devcgroup, &ex);
0740         break;
0741     case DEVCG_DENY:
0742         /*
0743          * If the default policy is to deny by default, try to remove
0744          * an matching exception instead. And be silent about it: we
0745          * don't want to break compatibility
0746          */
0747         if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
0748             dev_exception_rm(devcgroup, &ex);
0749         else
0750             rc = dev_exception_add(devcgroup, &ex);
0751 
0752         if (rc)
0753             break;
0754         /* we only propagate new restrictions */
0755         rc = propagate_exception(devcgroup, &ex);
0756         break;
0757     default:
0758         rc = -EINVAL;
0759     }
0760     return rc;
0761 }
0762 
0763 static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
0764                       char *buf, size_t nbytes, loff_t off)
0765 {
0766     int retval;
0767 
0768     mutex_lock(&devcgroup_mutex);
0769     retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
0770                      of_cft(of)->private, strstrip(buf));
0771     mutex_unlock(&devcgroup_mutex);
0772     return retval ?: nbytes;
0773 }
0774 
0775 static struct cftype dev_cgroup_files[] = {
0776     {
0777         .name = "allow",
0778         .write = devcgroup_access_write,
0779         .private = DEVCG_ALLOW,
0780     },
0781     {
0782         .name = "deny",
0783         .write = devcgroup_access_write,
0784         .private = DEVCG_DENY,
0785     },
0786     {
0787         .name = "list",
0788         .seq_show = devcgroup_seq_show,
0789         .private = DEVCG_LIST,
0790     },
0791     { } /* terminate */
0792 };
0793 
0794 struct cgroup_subsys devices_cgrp_subsys = {
0795     .css_alloc = devcgroup_css_alloc,
0796     .css_free = devcgroup_css_free,
0797     .css_online = devcgroup_online,
0798     .css_offline = devcgroup_offline,
0799     .legacy_cftypes = dev_cgroup_files,
0800 };
0801 
0802 /**
0803  * __devcgroup_check_permission - checks if an inode operation is permitted
0804  * @dev_cgroup: the dev cgroup to be tested against
0805  * @type: device type
0806  * @major: device major number
0807  * @minor: device minor number
0808  * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
0809  *
0810  * returns 0 on success, -EPERM case the operation is not permitted
0811  */
0812 static int __devcgroup_check_permission(short type, u32 major, u32 minor,
0813                         short access)
0814 {
0815     struct dev_cgroup *dev_cgroup;
0816     bool rc;
0817 
0818     rcu_read_lock();
0819     dev_cgroup = task_devcgroup(current);
0820     if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
0821         /* Can't match any of the exceptions, even partially */
0822         rc = !match_exception_partial(&dev_cgroup->exceptions,
0823                           type, major, minor, access);
0824     else
0825         /* Need to match completely one exception to be allowed */
0826         rc = match_exception(&dev_cgroup->exceptions, type, major,
0827                      minor, access);
0828     rcu_read_unlock();
0829 
0830     if (!rc)
0831         return -EPERM;
0832 
0833     return 0;
0834 }
0835 
0836 int __devcgroup_inode_permission(struct inode *inode, int mask)
0837 {
0838     short type, access = 0;
0839 
0840     if (S_ISBLK(inode->i_mode))
0841         type = DEV_BLOCK;
0842     if (S_ISCHR(inode->i_mode))
0843         type = DEV_CHAR;
0844     if (mask & MAY_WRITE)
0845         access |= ACC_WRITE;
0846     if (mask & MAY_READ)
0847         access |= ACC_READ;
0848 
0849     return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
0850             access);
0851 }
0852 
0853 int devcgroup_inode_mknod(int mode, dev_t dev)
0854 {
0855     short type;
0856 
0857     if (!S_ISBLK(mode) && !S_ISCHR(mode))
0858         return 0;
0859 
0860     if (S_ISBLK(mode))
0861         type = DEV_BLOCK;
0862     else
0863         type = DEV_CHAR;
0864 
0865     return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
0866             ACC_MKNOD);
0867 
0868 }