Back to home page

LXR

 
 

    


0001 /*
0002  *  This program is free software; you can redistribute it and/or
0003  *  modify it under the terms of the GNU General Public License as
0004  *  published by the Free Software Foundation, version 2 of the
0005  *  License.
0006  */
0007 
0008 #include <linux/export.h>
0009 #include <linux/nsproxy.h>
0010 #include <linux/slab.h>
0011 #include <linux/user_namespace.h>
0012 #include <linux/proc_ns.h>
0013 #include <linux/highuid.h>
0014 #include <linux/cred.h>
0015 #include <linux/securebits.h>
0016 #include <linux/keyctl.h>
0017 #include <linux/key-type.h>
0018 #include <keys/user-type.h>
0019 #include <linux/seq_file.h>
0020 #include <linux/fs.h>
0021 #include <linux/uaccess.h>
0022 #include <linux/ctype.h>
0023 #include <linux/projid.h>
0024 #include <linux/fs_struct.h>
0025 
0026 static struct kmem_cache *user_ns_cachep __read_mostly;
0027 static DEFINE_MUTEX(userns_state_mutex);
0028 
0029 static bool new_idmap_permitted(const struct file *file,
0030                 struct user_namespace *ns, int cap_setid,
0031                 struct uid_gid_map *map);
0032 static void free_user_ns(struct work_struct *work);
0033 
0034 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
0035 {
0036     return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
0037 }
0038 
0039 static void dec_user_namespaces(struct ucounts *ucounts)
0040 {
0041     return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
0042 }
0043 
0044 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
0045 {
0046     /* Start with the same capabilities as init but useless for doing
0047      * anything as the capabilities are bound to the new user namespace.
0048      */
0049     cred->securebits = SECUREBITS_DEFAULT;
0050     cred->cap_inheritable = CAP_EMPTY_SET;
0051     cred->cap_permitted = CAP_FULL_SET;
0052     cred->cap_effective = CAP_FULL_SET;
0053     cred->cap_ambient = CAP_EMPTY_SET;
0054     cred->cap_bset = CAP_FULL_SET;
0055 #ifdef CONFIG_KEYS
0056     key_put(cred->request_key_auth);
0057     cred->request_key_auth = NULL;
0058 #endif
0059     /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
0060     cred->user_ns = user_ns;
0061 }
0062 
0063 /*
0064  * Create a new user namespace, deriving the creator from the user in the
0065  * passed credentials, and replacing that user with the new root user for the
0066  * new namespace.
0067  *
0068  * This is called by copy_creds(), which will finish setting the target task's
0069  * credentials.
0070  */
0071 int create_user_ns(struct cred *new)
0072 {
0073     struct user_namespace *ns, *parent_ns = new->user_ns;
0074     kuid_t owner = new->euid;
0075     kgid_t group = new->egid;
0076     struct ucounts *ucounts;
0077     int ret, i;
0078 
0079     ret = -ENOSPC;
0080     if (parent_ns->level > 32)
0081         goto fail;
0082 
0083     ucounts = inc_user_namespaces(parent_ns, owner);
0084     if (!ucounts)
0085         goto fail;
0086 
0087     /*
0088      * Verify that we can not violate the policy of which files
0089      * may be accessed that is specified by the root directory,
0090      * by verifing that the root directory is at the root of the
0091      * mount namespace which allows all files to be accessed.
0092      */
0093     ret = -EPERM;
0094     if (current_chrooted())
0095         goto fail_dec;
0096 
0097     /* The creator needs a mapping in the parent user namespace
0098      * or else we won't be able to reasonably tell userspace who
0099      * created a user_namespace.
0100      */
0101     ret = -EPERM;
0102     if (!kuid_has_mapping(parent_ns, owner) ||
0103         !kgid_has_mapping(parent_ns, group))
0104         goto fail_dec;
0105 
0106     ret = -ENOMEM;
0107     ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
0108     if (!ns)
0109         goto fail_dec;
0110 
0111     ret = ns_alloc_inum(&ns->ns);
0112     if (ret)
0113         goto fail_free;
0114     ns->ns.ops = &userns_operations;
0115 
0116     atomic_set(&ns->count, 1);
0117     /* Leave the new->user_ns reference with the new user namespace. */
0118     ns->parent = parent_ns;
0119     ns->level = parent_ns->level + 1;
0120     ns->owner = owner;
0121     ns->group = group;
0122     INIT_WORK(&ns->work, free_user_ns);
0123     for (i = 0; i < UCOUNT_COUNTS; i++) {
0124         ns->ucount_max[i] = INT_MAX;
0125     }
0126     ns->ucounts = ucounts;
0127 
0128     /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
0129     mutex_lock(&userns_state_mutex);
0130     ns->flags = parent_ns->flags;
0131     mutex_unlock(&userns_state_mutex);
0132 
0133 #ifdef CONFIG_PERSISTENT_KEYRINGS
0134     init_rwsem(&ns->persistent_keyring_register_sem);
0135 #endif
0136     ret = -ENOMEM;
0137     if (!setup_userns_sysctls(ns))
0138         goto fail_keyring;
0139 
0140     set_cred_user_ns(new, ns);
0141     return 0;
0142 fail_keyring:
0143 #ifdef CONFIG_PERSISTENT_KEYRINGS
0144     key_put(ns->persistent_keyring_register);
0145 #endif
0146     ns_free_inum(&ns->ns);
0147 fail_free:
0148     kmem_cache_free(user_ns_cachep, ns);
0149 fail_dec:
0150     dec_user_namespaces(ucounts);
0151 fail:
0152     return ret;
0153 }
0154 
0155 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
0156 {
0157     struct cred *cred;
0158     int err = -ENOMEM;
0159 
0160     if (!(unshare_flags & CLONE_NEWUSER))
0161         return 0;
0162 
0163     cred = prepare_creds();
0164     if (cred) {
0165         err = create_user_ns(cred);
0166         if (err)
0167             put_cred(cred);
0168         else
0169             *new_cred = cred;
0170     }
0171 
0172     return err;
0173 }
0174 
0175 static void free_user_ns(struct work_struct *work)
0176 {
0177     struct user_namespace *parent, *ns =
0178         container_of(work, struct user_namespace, work);
0179 
0180     do {
0181         struct ucounts *ucounts = ns->ucounts;
0182         parent = ns->parent;
0183         retire_userns_sysctls(ns);
0184 #ifdef CONFIG_PERSISTENT_KEYRINGS
0185         key_put(ns->persistent_keyring_register);
0186 #endif
0187         ns_free_inum(&ns->ns);
0188         kmem_cache_free(user_ns_cachep, ns);
0189         dec_user_namespaces(ucounts);
0190         ns = parent;
0191     } while (atomic_dec_and_test(&parent->count));
0192 }
0193 
0194 void __put_user_ns(struct user_namespace *ns)
0195 {
0196     schedule_work(&ns->work);
0197 }
0198 EXPORT_SYMBOL(__put_user_ns);
0199 
0200 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
0201 {
0202     unsigned idx, extents;
0203     u32 first, last, id2;
0204 
0205     id2 = id + count - 1;
0206 
0207     /* Find the matching extent */
0208     extents = map->nr_extents;
0209     smp_rmb();
0210     for (idx = 0; idx < extents; idx++) {
0211         first = map->extent[idx].first;
0212         last = first + map->extent[idx].count - 1;
0213         if (id >= first && id <= last &&
0214             (id2 >= first && id2 <= last))
0215             break;
0216     }
0217     /* Map the id or note failure */
0218     if (idx < extents)
0219         id = (id - first) + map->extent[idx].lower_first;
0220     else
0221         id = (u32) -1;
0222 
0223     return id;
0224 }
0225 
0226 static u32 map_id_down(struct uid_gid_map *map, u32 id)
0227 {
0228     unsigned idx, extents;
0229     u32 first, last;
0230 
0231     /* Find the matching extent */
0232     extents = map->nr_extents;
0233     smp_rmb();
0234     for (idx = 0; idx < extents; idx++) {
0235         first = map->extent[idx].first;
0236         last = first + map->extent[idx].count - 1;
0237         if (id >= first && id <= last)
0238             break;
0239     }
0240     /* Map the id or note failure */
0241     if (idx < extents)
0242         id = (id - first) + map->extent[idx].lower_first;
0243     else
0244         id = (u32) -1;
0245 
0246     return id;
0247 }
0248 
0249 static u32 map_id_up(struct uid_gid_map *map, u32 id)
0250 {
0251     unsigned idx, extents;
0252     u32 first, last;
0253 
0254     /* Find the matching extent */
0255     extents = map->nr_extents;
0256     smp_rmb();
0257     for (idx = 0; idx < extents; idx++) {
0258         first = map->extent[idx].lower_first;
0259         last = first + map->extent[idx].count - 1;
0260         if (id >= first && id <= last)
0261             break;
0262     }
0263     /* Map the id or note failure */
0264     if (idx < extents)
0265         id = (id - first) + map->extent[idx].first;
0266     else
0267         id = (u32) -1;
0268 
0269     return id;
0270 }
0271 
0272 /**
0273  *  make_kuid - Map a user-namespace uid pair into a kuid.
0274  *  @ns:  User namespace that the uid is in
0275  *  @uid: User identifier
0276  *
0277  *  Maps a user-namespace uid pair into a kernel internal kuid,
0278  *  and returns that kuid.
0279  *
0280  *  When there is no mapping defined for the user-namespace uid
0281  *  pair INVALID_UID is returned.  Callers are expected to test
0282  *  for and handle INVALID_UID being returned.  INVALID_UID
0283  *  may be tested for using uid_valid().
0284  */
0285 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
0286 {
0287     /* Map the uid to a global kernel uid */
0288     return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
0289 }
0290 EXPORT_SYMBOL(make_kuid);
0291 
0292 /**
0293  *  from_kuid - Create a uid from a kuid user-namespace pair.
0294  *  @targ: The user namespace we want a uid in.
0295  *  @kuid: The kernel internal uid to start with.
0296  *
0297  *  Map @kuid into the user-namespace specified by @targ and
0298  *  return the resulting uid.
0299  *
0300  *  There is always a mapping into the initial user_namespace.
0301  *
0302  *  If @kuid has no mapping in @targ (uid_t)-1 is returned.
0303  */
0304 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
0305 {
0306     /* Map the uid from a global kernel uid */
0307     return map_id_up(&targ->uid_map, __kuid_val(kuid));
0308 }
0309 EXPORT_SYMBOL(from_kuid);
0310 
0311 /**
0312  *  from_kuid_munged - Create a uid from a kuid user-namespace pair.
0313  *  @targ: The user namespace we want a uid in.
0314  *  @kuid: The kernel internal uid to start with.
0315  *
0316  *  Map @kuid into the user-namespace specified by @targ and
0317  *  return the resulting uid.
0318  *
0319  *  There is always a mapping into the initial user_namespace.
0320  *
0321  *  Unlike from_kuid from_kuid_munged never fails and always
0322  *  returns a valid uid.  This makes from_kuid_munged appropriate
0323  *  for use in syscalls like stat and getuid where failing the
0324  *  system call and failing to provide a valid uid are not an
0325  *  options.
0326  *
0327  *  If @kuid has no mapping in @targ overflowuid is returned.
0328  */
0329 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
0330 {
0331     uid_t uid;
0332     uid = from_kuid(targ, kuid);
0333 
0334     if (uid == (uid_t) -1)
0335         uid = overflowuid;
0336     return uid;
0337 }
0338 EXPORT_SYMBOL(from_kuid_munged);
0339 
0340 /**
0341  *  make_kgid - Map a user-namespace gid pair into a kgid.
0342  *  @ns:  User namespace that the gid is in
0343  *  @gid: group identifier
0344  *
0345  *  Maps a user-namespace gid pair into a kernel internal kgid,
0346  *  and returns that kgid.
0347  *
0348  *  When there is no mapping defined for the user-namespace gid
0349  *  pair INVALID_GID is returned.  Callers are expected to test
0350  *  for and handle INVALID_GID being returned.  INVALID_GID may be
0351  *  tested for using gid_valid().
0352  */
0353 kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
0354 {
0355     /* Map the gid to a global kernel gid */
0356     return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
0357 }
0358 EXPORT_SYMBOL(make_kgid);
0359 
0360 /**
0361  *  from_kgid - Create a gid from a kgid user-namespace pair.
0362  *  @targ: The user namespace we want a gid in.
0363  *  @kgid: The kernel internal gid to start with.
0364  *
0365  *  Map @kgid into the user-namespace specified by @targ and
0366  *  return the resulting gid.
0367  *
0368  *  There is always a mapping into the initial user_namespace.
0369  *
0370  *  If @kgid has no mapping in @targ (gid_t)-1 is returned.
0371  */
0372 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
0373 {
0374     /* Map the gid from a global kernel gid */
0375     return map_id_up(&targ->gid_map, __kgid_val(kgid));
0376 }
0377 EXPORT_SYMBOL(from_kgid);
0378 
0379 /**
0380  *  from_kgid_munged - Create a gid from a kgid user-namespace pair.
0381  *  @targ: The user namespace we want a gid in.
0382  *  @kgid: The kernel internal gid to start with.
0383  *
0384  *  Map @kgid into the user-namespace specified by @targ and
0385  *  return the resulting gid.
0386  *
0387  *  There is always a mapping into the initial user_namespace.
0388  *
0389  *  Unlike from_kgid from_kgid_munged never fails and always
0390  *  returns a valid gid.  This makes from_kgid_munged appropriate
0391  *  for use in syscalls like stat and getgid where failing the
0392  *  system call and failing to provide a valid gid are not options.
0393  *
0394  *  If @kgid has no mapping in @targ overflowgid is returned.
0395  */
0396 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
0397 {
0398     gid_t gid;
0399     gid = from_kgid(targ, kgid);
0400 
0401     if (gid == (gid_t) -1)
0402         gid = overflowgid;
0403     return gid;
0404 }
0405 EXPORT_SYMBOL(from_kgid_munged);
0406 
0407 /**
0408  *  make_kprojid - Map a user-namespace projid pair into a kprojid.
0409  *  @ns:  User namespace that the projid is in
0410  *  @projid: Project identifier
0411  *
0412  *  Maps a user-namespace uid pair into a kernel internal kuid,
0413  *  and returns that kuid.
0414  *
0415  *  When there is no mapping defined for the user-namespace projid
0416  *  pair INVALID_PROJID is returned.  Callers are expected to test
0417  *  for and handle handle INVALID_PROJID being returned.  INVALID_PROJID
0418  *  may be tested for using projid_valid().
0419  */
0420 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
0421 {
0422     /* Map the uid to a global kernel uid */
0423     return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
0424 }
0425 EXPORT_SYMBOL(make_kprojid);
0426 
0427 /**
0428  *  from_kprojid - Create a projid from a kprojid user-namespace pair.
0429  *  @targ: The user namespace we want a projid in.
0430  *  @kprojid: The kernel internal project identifier to start with.
0431  *
0432  *  Map @kprojid into the user-namespace specified by @targ and
0433  *  return the resulting projid.
0434  *
0435  *  There is always a mapping into the initial user_namespace.
0436  *
0437  *  If @kprojid has no mapping in @targ (projid_t)-1 is returned.
0438  */
0439 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
0440 {
0441     /* Map the uid from a global kernel uid */
0442     return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
0443 }
0444 EXPORT_SYMBOL(from_kprojid);
0445 
0446 /**
0447  *  from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
0448  *  @targ: The user namespace we want a projid in.
0449  *  @kprojid: The kernel internal projid to start with.
0450  *
0451  *  Map @kprojid into the user-namespace specified by @targ and
0452  *  return the resulting projid.
0453  *
0454  *  There is always a mapping into the initial user_namespace.
0455  *
0456  *  Unlike from_kprojid from_kprojid_munged never fails and always
0457  *  returns a valid projid.  This makes from_kprojid_munged
0458  *  appropriate for use in syscalls like stat and where
0459  *  failing the system call and failing to provide a valid projid are
0460  *  not an options.
0461  *
0462  *  If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
0463  */
0464 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
0465 {
0466     projid_t projid;
0467     projid = from_kprojid(targ, kprojid);
0468 
0469     if (projid == (projid_t) -1)
0470         projid = OVERFLOW_PROJID;
0471     return projid;
0472 }
0473 EXPORT_SYMBOL(from_kprojid_munged);
0474 
0475 
0476 static int uid_m_show(struct seq_file *seq, void *v)
0477 {
0478     struct user_namespace *ns = seq->private;
0479     struct uid_gid_extent *extent = v;
0480     struct user_namespace *lower_ns;
0481     uid_t lower;
0482 
0483     lower_ns = seq_user_ns(seq);
0484     if ((lower_ns == ns) && lower_ns->parent)
0485         lower_ns = lower_ns->parent;
0486 
0487     lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
0488 
0489     seq_printf(seq, "%10u %10u %10u\n",
0490         extent->first,
0491         lower,
0492         extent->count);
0493 
0494     return 0;
0495 }
0496 
0497 static int gid_m_show(struct seq_file *seq, void *v)
0498 {
0499     struct user_namespace *ns = seq->private;
0500     struct uid_gid_extent *extent = v;
0501     struct user_namespace *lower_ns;
0502     gid_t lower;
0503 
0504     lower_ns = seq_user_ns(seq);
0505     if ((lower_ns == ns) && lower_ns->parent)
0506         lower_ns = lower_ns->parent;
0507 
0508     lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
0509 
0510     seq_printf(seq, "%10u %10u %10u\n",
0511         extent->first,
0512         lower,
0513         extent->count);
0514 
0515     return 0;
0516 }
0517 
0518 static int projid_m_show(struct seq_file *seq, void *v)
0519 {
0520     struct user_namespace *ns = seq->private;
0521     struct uid_gid_extent *extent = v;
0522     struct user_namespace *lower_ns;
0523     projid_t lower;
0524 
0525     lower_ns = seq_user_ns(seq);
0526     if ((lower_ns == ns) && lower_ns->parent)
0527         lower_ns = lower_ns->parent;
0528 
0529     lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
0530 
0531     seq_printf(seq, "%10u %10u %10u\n",
0532         extent->first,
0533         lower,
0534         extent->count);
0535 
0536     return 0;
0537 }
0538 
0539 static void *m_start(struct seq_file *seq, loff_t *ppos,
0540              struct uid_gid_map *map)
0541 {
0542     struct uid_gid_extent *extent = NULL;
0543     loff_t pos = *ppos;
0544 
0545     if (pos < map->nr_extents)
0546         extent = &map->extent[pos];
0547 
0548     return extent;
0549 }
0550 
0551 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
0552 {
0553     struct user_namespace *ns = seq->private;
0554 
0555     return m_start(seq, ppos, &ns->uid_map);
0556 }
0557 
0558 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
0559 {
0560     struct user_namespace *ns = seq->private;
0561 
0562     return m_start(seq, ppos, &ns->gid_map);
0563 }
0564 
0565 static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
0566 {
0567     struct user_namespace *ns = seq->private;
0568 
0569     return m_start(seq, ppos, &ns->projid_map);
0570 }
0571 
0572 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
0573 {
0574     (*pos)++;
0575     return seq->op->start(seq, pos);
0576 }
0577 
0578 static void m_stop(struct seq_file *seq, void *v)
0579 {
0580     return;
0581 }
0582 
0583 const struct seq_operations proc_uid_seq_operations = {
0584     .start = uid_m_start,
0585     .stop = m_stop,
0586     .next = m_next,
0587     .show = uid_m_show,
0588 };
0589 
0590 const struct seq_operations proc_gid_seq_operations = {
0591     .start = gid_m_start,
0592     .stop = m_stop,
0593     .next = m_next,
0594     .show = gid_m_show,
0595 };
0596 
0597 const struct seq_operations proc_projid_seq_operations = {
0598     .start = projid_m_start,
0599     .stop = m_stop,
0600     .next = m_next,
0601     .show = projid_m_show,
0602 };
0603 
0604 static bool mappings_overlap(struct uid_gid_map *new_map,
0605                  struct uid_gid_extent *extent)
0606 {
0607     u32 upper_first, lower_first, upper_last, lower_last;
0608     unsigned idx;
0609 
0610     upper_first = extent->first;
0611     lower_first = extent->lower_first;
0612     upper_last = upper_first + extent->count - 1;
0613     lower_last = lower_first + extent->count - 1;
0614 
0615     for (idx = 0; idx < new_map->nr_extents; idx++) {
0616         u32 prev_upper_first, prev_lower_first;
0617         u32 prev_upper_last, prev_lower_last;
0618         struct uid_gid_extent *prev;
0619 
0620         prev = &new_map->extent[idx];
0621 
0622         prev_upper_first = prev->first;
0623         prev_lower_first = prev->lower_first;
0624         prev_upper_last = prev_upper_first + prev->count - 1;
0625         prev_lower_last = prev_lower_first + prev->count - 1;
0626 
0627         /* Does the upper range intersect a previous extent? */
0628         if ((prev_upper_first <= upper_last) &&
0629             (prev_upper_last >= upper_first))
0630             return true;
0631 
0632         /* Does the lower range intersect a previous extent? */
0633         if ((prev_lower_first <= lower_last) &&
0634             (prev_lower_last >= lower_first))
0635             return true;
0636     }
0637     return false;
0638 }
0639 
0640 static ssize_t map_write(struct file *file, const char __user *buf,
0641              size_t count, loff_t *ppos,
0642              int cap_setid,
0643              struct uid_gid_map *map,
0644              struct uid_gid_map *parent_map)
0645 {
0646     struct seq_file *seq = file->private_data;
0647     struct user_namespace *ns = seq->private;
0648     struct uid_gid_map new_map;
0649     unsigned idx;
0650     struct uid_gid_extent *extent = NULL;
0651     char *kbuf = NULL, *pos, *next_line;
0652     ssize_t ret = -EINVAL;
0653 
0654     /*
0655      * The userns_state_mutex serializes all writes to any given map.
0656      *
0657      * Any map is only ever written once.
0658      *
0659      * An id map fits within 1 cache line on most architectures.
0660      *
0661      * On read nothing needs to be done unless you are on an
0662      * architecture with a crazy cache coherency model like alpha.
0663      *
0664      * There is a one time data dependency between reading the
0665      * count of the extents and the values of the extents.  The
0666      * desired behavior is to see the values of the extents that
0667      * were written before the count of the extents.
0668      *
0669      * To achieve this smp_wmb() is used on guarantee the write
0670      * order and smp_rmb() is guaranteed that we don't have crazy
0671      * architectures returning stale data.
0672      */
0673     mutex_lock(&userns_state_mutex);
0674 
0675     ret = -EPERM;
0676     /* Only allow one successful write to the map */
0677     if (map->nr_extents != 0)
0678         goto out;
0679 
0680     /*
0681      * Adjusting namespace settings requires capabilities on the target.
0682      */
0683     if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
0684         goto out;
0685 
0686     /* Only allow < page size writes at the beginning of the file */
0687     ret = -EINVAL;
0688     if ((*ppos != 0) || (count >= PAGE_SIZE))
0689         goto out;
0690 
0691     /* Slurp in the user data */
0692     kbuf = memdup_user_nul(buf, count);
0693     if (IS_ERR(kbuf)) {
0694         ret = PTR_ERR(kbuf);
0695         kbuf = NULL;
0696         goto out;
0697     }
0698 
0699     /* Parse the user data */
0700     ret = -EINVAL;
0701     pos = kbuf;
0702     new_map.nr_extents = 0;
0703     for (; pos; pos = next_line) {
0704         extent = &new_map.extent[new_map.nr_extents];
0705 
0706         /* Find the end of line and ensure I don't look past it */
0707         next_line = strchr(pos, '\n');
0708         if (next_line) {
0709             *next_line = '\0';
0710             next_line++;
0711             if (*next_line == '\0')
0712                 next_line = NULL;
0713         }
0714 
0715         pos = skip_spaces(pos);
0716         extent->first = simple_strtoul(pos, &pos, 10);
0717         if (!isspace(*pos))
0718             goto out;
0719 
0720         pos = skip_spaces(pos);
0721         extent->lower_first = simple_strtoul(pos, &pos, 10);
0722         if (!isspace(*pos))
0723             goto out;
0724 
0725         pos = skip_spaces(pos);
0726         extent->count = simple_strtoul(pos, &pos, 10);
0727         if (*pos && !isspace(*pos))
0728             goto out;
0729 
0730         /* Verify there is not trailing junk on the line */
0731         pos = skip_spaces(pos);
0732         if (*pos != '\0')
0733             goto out;
0734 
0735         /* Verify we have been given valid starting values */
0736         if ((extent->first == (u32) -1) ||
0737             (extent->lower_first == (u32) -1))
0738             goto out;
0739 
0740         /* Verify count is not zero and does not cause the
0741          * extent to wrap
0742          */
0743         if ((extent->first + extent->count) <= extent->first)
0744             goto out;
0745         if ((extent->lower_first + extent->count) <=
0746              extent->lower_first)
0747             goto out;
0748 
0749         /* Do the ranges in extent overlap any previous extents? */
0750         if (mappings_overlap(&new_map, extent))
0751             goto out;
0752 
0753         new_map.nr_extents++;
0754 
0755         /* Fail if the file contains too many extents */
0756         if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
0757             (next_line != NULL))
0758             goto out;
0759     }
0760     /* Be very certaint the new map actually exists */
0761     if (new_map.nr_extents == 0)
0762         goto out;
0763 
0764     ret = -EPERM;
0765     /* Validate the user is allowed to use user id's mapped to. */
0766     if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
0767         goto out;
0768 
0769     /* Map the lower ids from the parent user namespace to the
0770      * kernel global id space.
0771      */
0772     for (idx = 0; idx < new_map.nr_extents; idx++) {
0773         u32 lower_first;
0774         extent = &new_map.extent[idx];
0775 
0776         lower_first = map_id_range_down(parent_map,
0777                         extent->lower_first,
0778                         extent->count);
0779 
0780         /* Fail if we can not map the specified extent to
0781          * the kernel global id space.
0782          */
0783         if (lower_first == (u32) -1)
0784             goto out;
0785 
0786         extent->lower_first = lower_first;
0787     }
0788 
0789     /* Install the map */
0790     memcpy(map->extent, new_map.extent,
0791         new_map.nr_extents*sizeof(new_map.extent[0]));
0792     smp_wmb();
0793     map->nr_extents = new_map.nr_extents;
0794 
0795     *ppos = count;
0796     ret = count;
0797 out:
0798     mutex_unlock(&userns_state_mutex);
0799     kfree(kbuf);
0800     return ret;
0801 }
0802 
0803 ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
0804                size_t size, loff_t *ppos)
0805 {
0806     struct seq_file *seq = file->private_data;
0807     struct user_namespace *ns = seq->private;
0808     struct user_namespace *seq_ns = seq_user_ns(seq);
0809 
0810     if (!ns->parent)
0811         return -EPERM;
0812 
0813     if ((seq_ns != ns) && (seq_ns != ns->parent))
0814         return -EPERM;
0815 
0816     return map_write(file, buf, size, ppos, CAP_SETUID,
0817              &ns->uid_map, &ns->parent->uid_map);
0818 }
0819 
0820 ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
0821                size_t size, loff_t *ppos)
0822 {
0823     struct seq_file *seq = file->private_data;
0824     struct user_namespace *ns = seq->private;
0825     struct user_namespace *seq_ns = seq_user_ns(seq);
0826 
0827     if (!ns->parent)
0828         return -EPERM;
0829 
0830     if ((seq_ns != ns) && (seq_ns != ns->parent))
0831         return -EPERM;
0832 
0833     return map_write(file, buf, size, ppos, CAP_SETGID,
0834              &ns->gid_map, &ns->parent->gid_map);
0835 }
0836 
0837 ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
0838                   size_t size, loff_t *ppos)
0839 {
0840     struct seq_file *seq = file->private_data;
0841     struct user_namespace *ns = seq->private;
0842     struct user_namespace *seq_ns = seq_user_ns(seq);
0843 
0844     if (!ns->parent)
0845         return -EPERM;
0846 
0847     if ((seq_ns != ns) && (seq_ns != ns->parent))
0848         return -EPERM;
0849 
0850     /* Anyone can set any valid project id no capability needed */
0851     return map_write(file, buf, size, ppos, -1,
0852              &ns->projid_map, &ns->parent->projid_map);
0853 }
0854 
0855 static bool new_idmap_permitted(const struct file *file,
0856                 struct user_namespace *ns, int cap_setid,
0857                 struct uid_gid_map *new_map)
0858 {
0859     const struct cred *cred = file->f_cred;
0860     /* Don't allow mappings that would allow anything that wouldn't
0861      * be allowed without the establishment of unprivileged mappings.
0862      */
0863     if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
0864         uid_eq(ns->owner, cred->euid)) {
0865         u32 id = new_map->extent[0].lower_first;
0866         if (cap_setid == CAP_SETUID) {
0867             kuid_t uid = make_kuid(ns->parent, id);
0868             if (uid_eq(uid, cred->euid))
0869                 return true;
0870         } else if (cap_setid == CAP_SETGID) {
0871             kgid_t gid = make_kgid(ns->parent, id);
0872             if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
0873                 gid_eq(gid, cred->egid))
0874                 return true;
0875         }
0876     }
0877 
0878     /* Allow anyone to set a mapping that doesn't require privilege */
0879     if (!cap_valid(cap_setid))
0880         return true;
0881 
0882     /* Allow the specified ids if we have the appropriate capability
0883      * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
0884      * And the opener of the id file also had the approprpiate capability.
0885      */
0886     if (ns_capable(ns->parent, cap_setid) &&
0887         file_ns_capable(file, ns->parent, cap_setid))
0888         return true;
0889 
0890     return false;
0891 }
0892 
0893 int proc_setgroups_show(struct seq_file *seq, void *v)
0894 {
0895     struct user_namespace *ns = seq->private;
0896     unsigned long userns_flags = ACCESS_ONCE(ns->flags);
0897 
0898     seq_printf(seq, "%s\n",
0899            (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
0900            "allow" : "deny");
0901     return 0;
0902 }
0903 
0904 ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
0905                  size_t count, loff_t *ppos)
0906 {
0907     struct seq_file *seq = file->private_data;
0908     struct user_namespace *ns = seq->private;
0909     char kbuf[8], *pos;
0910     bool setgroups_allowed;
0911     ssize_t ret;
0912 
0913     /* Only allow a very narrow range of strings to be written */
0914     ret = -EINVAL;
0915     if ((*ppos != 0) || (count >= sizeof(kbuf)))
0916         goto out;
0917 
0918     /* What was written? */
0919     ret = -EFAULT;
0920     if (copy_from_user(kbuf, buf, count))
0921         goto out;
0922     kbuf[count] = '\0';
0923     pos = kbuf;
0924 
0925     /* What is being requested? */
0926     ret = -EINVAL;
0927     if (strncmp(pos, "allow", 5) == 0) {
0928         pos += 5;
0929         setgroups_allowed = true;
0930     }
0931     else if (strncmp(pos, "deny", 4) == 0) {
0932         pos += 4;
0933         setgroups_allowed = false;
0934     }
0935     else
0936         goto out;
0937 
0938     /* Verify there is not trailing junk on the line */
0939     pos = skip_spaces(pos);
0940     if (*pos != '\0')
0941         goto out;
0942 
0943     ret = -EPERM;
0944     mutex_lock(&userns_state_mutex);
0945     if (setgroups_allowed) {
0946         /* Enabling setgroups after setgroups has been disabled
0947          * is not allowed.
0948          */
0949         if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
0950             goto out_unlock;
0951     } else {
0952         /* Permanently disabling setgroups after setgroups has
0953          * been enabled by writing the gid_map is not allowed.
0954          */
0955         if (ns->gid_map.nr_extents != 0)
0956             goto out_unlock;
0957         ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
0958     }
0959     mutex_unlock(&userns_state_mutex);
0960 
0961     /* Report a successful write */
0962     *ppos = count;
0963     ret = count;
0964 out:
0965     return ret;
0966 out_unlock:
0967     mutex_unlock(&userns_state_mutex);
0968     goto out;
0969 }
0970 
0971 bool userns_may_setgroups(const struct user_namespace *ns)
0972 {
0973     bool allowed;
0974 
0975     mutex_lock(&userns_state_mutex);
0976     /* It is not safe to use setgroups until a gid mapping in
0977      * the user namespace has been established.
0978      */
0979     allowed = ns->gid_map.nr_extents != 0;
0980     /* Is setgroups allowed? */
0981     allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
0982     mutex_unlock(&userns_state_mutex);
0983 
0984     return allowed;
0985 }
0986 
0987 /*
0988  * Returns true if @ns is the same namespace as or a descendant of
0989  * @target_ns.
0990  */
0991 bool current_in_userns(const struct user_namespace *target_ns)
0992 {
0993     struct user_namespace *ns;
0994     for (ns = current_user_ns(); ns; ns = ns->parent) {
0995         if (ns == target_ns)
0996             return true;
0997     }
0998     return false;
0999 }
1000 
1001 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
1002 {
1003     return container_of(ns, struct user_namespace, ns);
1004 }
1005 
1006 static struct ns_common *userns_get(struct task_struct *task)
1007 {
1008     struct user_namespace *user_ns;
1009 
1010     rcu_read_lock();
1011     user_ns = get_user_ns(__task_cred(task)->user_ns);
1012     rcu_read_unlock();
1013 
1014     return user_ns ? &user_ns->ns : NULL;
1015 }
1016 
1017 static void userns_put(struct ns_common *ns)
1018 {
1019     put_user_ns(to_user_ns(ns));
1020 }
1021 
1022 static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1023 {
1024     struct user_namespace *user_ns = to_user_ns(ns);
1025     struct cred *cred;
1026 
1027     /* Don't allow gaining capabilities by reentering
1028      * the same user namespace.
1029      */
1030     if (user_ns == current_user_ns())
1031         return -EINVAL;
1032 
1033     /* Tasks that share a thread group must share a user namespace */
1034     if (!thread_group_empty(current))
1035         return -EINVAL;
1036 
1037     if (current->fs->users != 1)
1038         return -EINVAL;
1039 
1040     if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1041         return -EPERM;
1042 
1043     cred = prepare_creds();
1044     if (!cred)
1045         return -ENOMEM;
1046 
1047     put_user_ns(cred->user_ns);
1048     set_cred_user_ns(cred, get_user_ns(user_ns));
1049 
1050     return commit_creds(cred);
1051 }
1052 
1053 struct ns_common *ns_get_owner(struct ns_common *ns)
1054 {
1055     struct user_namespace *my_user_ns = current_user_ns();
1056     struct user_namespace *owner, *p;
1057 
1058     /* See if the owner is in the current user namespace */
1059     owner = p = ns->ops->owner(ns);
1060     for (;;) {
1061         if (!p)
1062             return ERR_PTR(-EPERM);
1063         if (p == my_user_ns)
1064             break;
1065         p = p->parent;
1066     }
1067 
1068     return &get_user_ns(owner)->ns;
1069 }
1070 
1071 static struct user_namespace *userns_owner(struct ns_common *ns)
1072 {
1073     return to_user_ns(ns)->parent;
1074 }
1075 
1076 const struct proc_ns_operations userns_operations = {
1077     .name       = "user",
1078     .type       = CLONE_NEWUSER,
1079     .get        = userns_get,
1080     .put        = userns_put,
1081     .install    = userns_install,
1082     .owner      = userns_owner,
1083     .get_parent = ns_get_owner,
1084 };
1085 
1086 static __init int user_namespaces_init(void)
1087 {
1088     user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1089     return 0;
1090 }
1091 subsys_initcall(user_namespaces_init);