Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 
0003 #include <linux/export.h>
0004 #include <linux/nsproxy.h>
0005 #include <linux/slab.h>
0006 #include <linux/sched/signal.h>
0007 #include <linux/user_namespace.h>
0008 #include <linux/proc_ns.h>
0009 #include <linux/highuid.h>
0010 #include <linux/cred.h>
0011 #include <linux/securebits.h>
0012 #include <linux/keyctl.h>
0013 #include <linux/key-type.h>
0014 #include <keys/user-type.h>
0015 #include <linux/seq_file.h>
0016 #include <linux/fs.h>
0017 #include <linux/uaccess.h>
0018 #include <linux/ctype.h>
0019 #include <linux/projid.h>
0020 #include <linux/fs_struct.h>
0021 #include <linux/bsearch.h>
0022 #include <linux/sort.h>
0023 
0024 static struct kmem_cache *user_ns_cachep __read_mostly;
0025 static DEFINE_MUTEX(userns_state_mutex);
0026 
0027 static bool new_idmap_permitted(const struct file *file,
0028                 struct user_namespace *ns, int cap_setid,
0029                 struct uid_gid_map *map);
0030 static void free_user_ns(struct work_struct *work);
0031 
0032 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
0033 {
0034     return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
0035 }
0036 
0037 static void dec_user_namespaces(struct ucounts *ucounts)
0038 {
0039     return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
0040 }
0041 
0042 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
0043 {
0044     /* Start with the same capabilities as init but useless for doing
0045      * anything as the capabilities are bound to the new user namespace.
0046      */
0047     cred->securebits = SECUREBITS_DEFAULT;
0048     cred->cap_inheritable = CAP_EMPTY_SET;
0049     cred->cap_permitted = CAP_FULL_SET;
0050     cred->cap_effective = CAP_FULL_SET;
0051     cred->cap_ambient = CAP_EMPTY_SET;
0052     cred->cap_bset = CAP_FULL_SET;
0053 #ifdef CONFIG_KEYS
0054     key_put(cred->request_key_auth);
0055     cred->request_key_auth = NULL;
0056 #endif
0057     /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
0058     cred->user_ns = user_ns;
0059 }
0060 
0061 static unsigned long enforced_nproc_rlimit(void)
0062 {
0063     unsigned long limit = RLIM_INFINITY;
0064 
0065     /* Is RLIMIT_NPROC currently enforced? */
0066     if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
0067         (current_user_ns() != &init_user_ns))
0068         limit = rlimit(RLIMIT_NPROC);
0069 
0070     return limit;
0071 }
0072 
0073 /*
0074  * Create a new user namespace, deriving the creator from the user in the
0075  * passed credentials, and replacing that user with the new root user for the
0076  * new namespace.
0077  *
0078  * This is called by copy_creds(), which will finish setting the target task's
0079  * credentials.
0080  */
0081 int create_user_ns(struct cred *new)
0082 {
0083     struct user_namespace *ns, *parent_ns = new->user_ns;
0084     kuid_t owner = new->euid;
0085     kgid_t group = new->egid;
0086     struct ucounts *ucounts;
0087     int ret, i;
0088 
0089     ret = -ENOSPC;
0090     if (parent_ns->level > 32)
0091         goto fail;
0092 
0093     ucounts = inc_user_namespaces(parent_ns, owner);
0094     if (!ucounts)
0095         goto fail;
0096 
0097     /*
0098      * Verify that we can not violate the policy of which files
0099      * may be accessed that is specified by the root directory,
0100      * by verifying that the root directory is at the root of the
0101      * mount namespace which allows all files to be accessed.
0102      */
0103     ret = -EPERM;
0104     if (current_chrooted())
0105         goto fail_dec;
0106 
0107     /* The creator needs a mapping in the parent user namespace
0108      * or else we won't be able to reasonably tell userspace who
0109      * created a user_namespace.
0110      */
0111     ret = -EPERM;
0112     if (!kuid_has_mapping(parent_ns, owner) ||
0113         !kgid_has_mapping(parent_ns, group))
0114         goto fail_dec;
0115 
0116     ret = -ENOMEM;
0117     ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
0118     if (!ns)
0119         goto fail_dec;
0120 
0121     ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
0122     ret = ns_alloc_inum(&ns->ns);
0123     if (ret)
0124         goto fail_free;
0125     ns->ns.ops = &userns_operations;
0126 
0127     refcount_set(&ns->ns.count, 1);
0128     /* Leave the new->user_ns reference with the new user namespace. */
0129     ns->parent = parent_ns;
0130     ns->level = parent_ns->level + 1;
0131     ns->owner = owner;
0132     ns->group = group;
0133     INIT_WORK(&ns->work, free_user_ns);
0134     for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
0135         ns->ucount_max[i] = INT_MAX;
0136     }
0137     set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
0138     set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
0139     set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
0140     set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
0141     ns->ucounts = ucounts;
0142 
0143     /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
0144     mutex_lock(&userns_state_mutex);
0145     ns->flags = parent_ns->flags;
0146     mutex_unlock(&userns_state_mutex);
0147 
0148 #ifdef CONFIG_KEYS
0149     INIT_LIST_HEAD(&ns->keyring_name_list);
0150     init_rwsem(&ns->keyring_sem);
0151 #endif
0152     ret = -ENOMEM;
0153     if (!setup_userns_sysctls(ns))
0154         goto fail_keyring;
0155 
0156     set_cred_user_ns(new, ns);
0157     return 0;
0158 fail_keyring:
0159 #ifdef CONFIG_PERSISTENT_KEYRINGS
0160     key_put(ns->persistent_keyring_register);
0161 #endif
0162     ns_free_inum(&ns->ns);
0163 fail_free:
0164     kmem_cache_free(user_ns_cachep, ns);
0165 fail_dec:
0166     dec_user_namespaces(ucounts);
0167 fail:
0168     return ret;
0169 }
0170 
0171 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
0172 {
0173     struct cred *cred;
0174     int err = -ENOMEM;
0175 
0176     if (!(unshare_flags & CLONE_NEWUSER))
0177         return 0;
0178 
0179     cred = prepare_creds();
0180     if (cred) {
0181         err = create_user_ns(cred);
0182         if (err)
0183             put_cred(cred);
0184         else
0185             *new_cred = cred;
0186     }
0187 
0188     return err;
0189 }
0190 
0191 static void free_user_ns(struct work_struct *work)
0192 {
0193     struct user_namespace *parent, *ns =
0194         container_of(work, struct user_namespace, work);
0195 
0196     do {
0197         struct ucounts *ucounts = ns->ucounts;
0198         parent = ns->parent;
0199         if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
0200             kfree(ns->gid_map.forward);
0201             kfree(ns->gid_map.reverse);
0202         }
0203         if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
0204             kfree(ns->uid_map.forward);
0205             kfree(ns->uid_map.reverse);
0206         }
0207         if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
0208             kfree(ns->projid_map.forward);
0209             kfree(ns->projid_map.reverse);
0210         }
0211         retire_userns_sysctls(ns);
0212         key_free_user_ns(ns);
0213         ns_free_inum(&ns->ns);
0214         kmem_cache_free(user_ns_cachep, ns);
0215         dec_user_namespaces(ucounts);
0216         ns = parent;
0217     } while (refcount_dec_and_test(&parent->ns.count));
0218 }
0219 
0220 void __put_user_ns(struct user_namespace *ns)
0221 {
0222     schedule_work(&ns->work);
0223 }
0224 EXPORT_SYMBOL(__put_user_ns);
0225 
0226 /**
0227  * idmap_key struct holds the information necessary to find an idmapping in a
0228  * sorted idmap array. It is passed to cmp_map_id() as first argument.
0229  */
0230 struct idmap_key {
0231     bool map_up; /* true  -> id from kid; false -> kid from id */
0232     u32 id; /* id to find */
0233     u32 count; /* == 0 unless used with map_id_range_down() */
0234 };
0235 
0236 /**
0237  * cmp_map_id - Function to be passed to bsearch() to find the requested
0238  * idmapping. Expects struct idmap_key to be passed via @k.
0239  */
0240 static int cmp_map_id(const void *k, const void *e)
0241 {
0242     u32 first, last, id2;
0243     const struct idmap_key *key = k;
0244     const struct uid_gid_extent *el = e;
0245 
0246     id2 = key->id + key->count - 1;
0247 
0248     /* handle map_id_{down,up}() */
0249     if (key->map_up)
0250         first = el->lower_first;
0251     else
0252         first = el->first;
0253 
0254     last = first + el->count - 1;
0255 
0256     if (key->id >= first && key->id <= last &&
0257         (id2 >= first && id2 <= last))
0258         return 0;
0259 
0260     if (key->id < first || id2 < first)
0261         return -1;
0262 
0263     return 1;
0264 }
0265 
0266 /**
0267  * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
0268  * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
0269  */
0270 static struct uid_gid_extent *
0271 map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
0272 {
0273     struct idmap_key key;
0274 
0275     key.map_up = false;
0276     key.count = count;
0277     key.id = id;
0278 
0279     return bsearch(&key, map->forward, extents,
0280                sizeof(struct uid_gid_extent), cmp_map_id);
0281 }
0282 
0283 /**
0284  * map_id_range_down_base - Find idmap via binary search in static extent array.
0285  * Can only be called if number of mappings is equal or less than
0286  * UID_GID_MAP_MAX_BASE_EXTENTS.
0287  */
0288 static struct uid_gid_extent *
0289 map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
0290 {
0291     unsigned idx;
0292     u32 first, last, id2;
0293 
0294     id2 = id + count - 1;
0295 
0296     /* Find the matching extent */
0297     for (idx = 0; idx < extents; idx++) {
0298         first = map->extent[idx].first;
0299         last = first + map->extent[idx].count - 1;
0300         if (id >= first && id <= last &&
0301             (id2 >= first && id2 <= last))
0302             return &map->extent[idx];
0303     }
0304     return NULL;
0305 }
0306 
0307 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
0308 {
0309     struct uid_gid_extent *extent;
0310     unsigned extents = map->nr_extents;
0311     smp_rmb();
0312 
0313     if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0314         extent = map_id_range_down_base(extents, map, id, count);
0315     else
0316         extent = map_id_range_down_max(extents, map, id, count);
0317 
0318     /* Map the id or note failure */
0319     if (extent)
0320         id = (id - extent->first) + extent->lower_first;
0321     else
0322         id = (u32) -1;
0323 
0324     return id;
0325 }
0326 
0327 static u32 map_id_down(struct uid_gid_map *map, u32 id)
0328 {
0329     return map_id_range_down(map, id, 1);
0330 }
0331 
0332 /**
0333  * map_id_up_base - Find idmap via binary search in static extent array.
0334  * Can only be called if number of mappings is equal or less than
0335  * UID_GID_MAP_MAX_BASE_EXTENTS.
0336  */
0337 static struct uid_gid_extent *
0338 map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
0339 {
0340     unsigned idx;
0341     u32 first, last;
0342 
0343     /* Find the matching extent */
0344     for (idx = 0; idx < extents; idx++) {
0345         first = map->extent[idx].lower_first;
0346         last = first + map->extent[idx].count - 1;
0347         if (id >= first && id <= last)
0348             return &map->extent[idx];
0349     }
0350     return NULL;
0351 }
0352 
0353 /**
0354  * map_id_up_max - Find idmap via binary search in ordered idmap array.
0355  * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
0356  */
0357 static struct uid_gid_extent *
0358 map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
0359 {
0360     struct idmap_key key;
0361 
0362     key.map_up = true;
0363     key.count = 1;
0364     key.id = id;
0365 
0366     return bsearch(&key, map->reverse, extents,
0367                sizeof(struct uid_gid_extent), cmp_map_id);
0368 }
0369 
0370 static u32 map_id_up(struct uid_gid_map *map, u32 id)
0371 {
0372     struct uid_gid_extent *extent;
0373     unsigned extents = map->nr_extents;
0374     smp_rmb();
0375 
0376     if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0377         extent = map_id_up_base(extents, map, id);
0378     else
0379         extent = map_id_up_max(extents, map, id);
0380 
0381     /* Map the id or note failure */
0382     if (extent)
0383         id = (id - extent->lower_first) + extent->first;
0384     else
0385         id = (u32) -1;
0386 
0387     return id;
0388 }
0389 
0390 /**
0391  *  make_kuid - Map a user-namespace uid pair into a kuid.
0392  *  @ns:  User namespace that the uid is in
0393  *  @uid: User identifier
0394  *
0395  *  Maps a user-namespace uid pair into a kernel internal kuid,
0396  *  and returns that kuid.
0397  *
0398  *  When there is no mapping defined for the user-namespace uid
0399  *  pair INVALID_UID is returned.  Callers are expected to test
0400  *  for and handle INVALID_UID being returned.  INVALID_UID
0401  *  may be tested for using uid_valid().
0402  */
0403 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
0404 {
0405     /* Map the uid to a global kernel uid */
0406     return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
0407 }
0408 EXPORT_SYMBOL(make_kuid);
0409 
0410 /**
0411  *  from_kuid - Create a uid from a kuid user-namespace pair.
0412  *  @targ: The user namespace we want a uid in.
0413  *  @kuid: The kernel internal uid to start with.
0414  *
0415  *  Map @kuid into the user-namespace specified by @targ and
0416  *  return the resulting uid.
0417  *
0418  *  There is always a mapping into the initial user_namespace.
0419  *
0420  *  If @kuid has no mapping in @targ (uid_t)-1 is returned.
0421  */
0422 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
0423 {
0424     /* Map the uid from a global kernel uid */
0425     return map_id_up(&targ->uid_map, __kuid_val(kuid));
0426 }
0427 EXPORT_SYMBOL(from_kuid);
0428 
0429 /**
0430  *  from_kuid_munged - Create a uid from a kuid user-namespace pair.
0431  *  @targ: The user namespace we want a uid in.
0432  *  @kuid: The kernel internal uid to start with.
0433  *
0434  *  Map @kuid into the user-namespace specified by @targ and
0435  *  return the resulting uid.
0436  *
0437  *  There is always a mapping into the initial user_namespace.
0438  *
0439  *  Unlike from_kuid from_kuid_munged never fails and always
0440  *  returns a valid uid.  This makes from_kuid_munged appropriate
0441  *  for use in syscalls like stat and getuid where failing the
0442  *  system call and failing to provide a valid uid are not an
0443  *  options.
0444  *
0445  *  If @kuid has no mapping in @targ overflowuid is returned.
0446  */
0447 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
0448 {
0449     uid_t uid;
0450     uid = from_kuid(targ, kuid);
0451 
0452     if (uid == (uid_t) -1)
0453         uid = overflowuid;
0454     return uid;
0455 }
0456 EXPORT_SYMBOL(from_kuid_munged);
0457 
0458 /**
0459  *  make_kgid - Map a user-namespace gid pair into a kgid.
0460  *  @ns:  User namespace that the gid is in
0461  *  @gid: group identifier
0462  *
0463  *  Maps a user-namespace gid pair into a kernel internal kgid,
0464  *  and returns that kgid.
0465  *
0466  *  When there is no mapping defined for the user-namespace gid
0467  *  pair INVALID_GID is returned.  Callers are expected to test
0468  *  for and handle INVALID_GID being returned.  INVALID_GID may be
0469  *  tested for using gid_valid().
0470  */
0471 kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
0472 {
0473     /* Map the gid to a global kernel gid */
0474     return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
0475 }
0476 EXPORT_SYMBOL(make_kgid);
0477 
0478 /**
0479  *  from_kgid - Create a gid from a kgid user-namespace pair.
0480  *  @targ: The user namespace we want a gid in.
0481  *  @kgid: The kernel internal gid to start with.
0482  *
0483  *  Map @kgid into the user-namespace specified by @targ and
0484  *  return the resulting gid.
0485  *
0486  *  There is always a mapping into the initial user_namespace.
0487  *
0488  *  If @kgid has no mapping in @targ (gid_t)-1 is returned.
0489  */
0490 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
0491 {
0492     /* Map the gid from a global kernel gid */
0493     return map_id_up(&targ->gid_map, __kgid_val(kgid));
0494 }
0495 EXPORT_SYMBOL(from_kgid);
0496 
0497 /**
0498  *  from_kgid_munged - Create a gid from a kgid user-namespace pair.
0499  *  @targ: The user namespace we want a gid in.
0500  *  @kgid: The kernel internal gid to start with.
0501  *
0502  *  Map @kgid into the user-namespace specified by @targ and
0503  *  return the resulting gid.
0504  *
0505  *  There is always a mapping into the initial user_namespace.
0506  *
0507  *  Unlike from_kgid from_kgid_munged never fails and always
0508  *  returns a valid gid.  This makes from_kgid_munged appropriate
0509  *  for use in syscalls like stat and getgid where failing the
0510  *  system call and failing to provide a valid gid are not options.
0511  *
0512  *  If @kgid has no mapping in @targ overflowgid is returned.
0513  */
0514 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
0515 {
0516     gid_t gid;
0517     gid = from_kgid(targ, kgid);
0518 
0519     if (gid == (gid_t) -1)
0520         gid = overflowgid;
0521     return gid;
0522 }
0523 EXPORT_SYMBOL(from_kgid_munged);
0524 
0525 /**
0526  *  make_kprojid - Map a user-namespace projid pair into a kprojid.
0527  *  @ns:  User namespace that the projid is in
0528  *  @projid: Project identifier
0529  *
0530  *  Maps a user-namespace uid pair into a kernel internal kuid,
0531  *  and returns that kuid.
0532  *
0533  *  When there is no mapping defined for the user-namespace projid
0534  *  pair INVALID_PROJID is returned.  Callers are expected to test
0535  *  for and handle INVALID_PROJID being returned.  INVALID_PROJID
0536  *  may be tested for using projid_valid().
0537  */
0538 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
0539 {
0540     /* Map the uid to a global kernel uid */
0541     return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
0542 }
0543 EXPORT_SYMBOL(make_kprojid);
0544 
0545 /**
0546  *  from_kprojid - Create a projid from a kprojid user-namespace pair.
0547  *  @targ: The user namespace we want a projid in.
0548  *  @kprojid: The kernel internal project identifier to start with.
0549  *
0550  *  Map @kprojid into the user-namespace specified by @targ and
0551  *  return the resulting projid.
0552  *
0553  *  There is always a mapping into the initial user_namespace.
0554  *
0555  *  If @kprojid has no mapping in @targ (projid_t)-1 is returned.
0556  */
0557 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
0558 {
0559     /* Map the uid from a global kernel uid */
0560     return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
0561 }
0562 EXPORT_SYMBOL(from_kprojid);
0563 
0564 /**
0565  *  from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
0566  *  @targ: The user namespace we want a projid in.
0567  *  @kprojid: The kernel internal projid to start with.
0568  *
0569  *  Map @kprojid into the user-namespace specified by @targ and
0570  *  return the resulting projid.
0571  *
0572  *  There is always a mapping into the initial user_namespace.
0573  *
0574  *  Unlike from_kprojid from_kprojid_munged never fails and always
0575  *  returns a valid projid.  This makes from_kprojid_munged
0576  *  appropriate for use in syscalls like stat and where
0577  *  failing the system call and failing to provide a valid projid are
0578  *  not an options.
0579  *
0580  *  If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
0581  */
0582 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
0583 {
0584     projid_t projid;
0585     projid = from_kprojid(targ, kprojid);
0586 
0587     if (projid == (projid_t) -1)
0588         projid = OVERFLOW_PROJID;
0589     return projid;
0590 }
0591 EXPORT_SYMBOL(from_kprojid_munged);
0592 
0593 
0594 static int uid_m_show(struct seq_file *seq, void *v)
0595 {
0596     struct user_namespace *ns = seq->private;
0597     struct uid_gid_extent *extent = v;
0598     struct user_namespace *lower_ns;
0599     uid_t lower;
0600 
0601     lower_ns = seq_user_ns(seq);
0602     if ((lower_ns == ns) && lower_ns->parent)
0603         lower_ns = lower_ns->parent;
0604 
0605     lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
0606 
0607     seq_printf(seq, "%10u %10u %10u\n",
0608         extent->first,
0609         lower,
0610         extent->count);
0611 
0612     return 0;
0613 }
0614 
0615 static int gid_m_show(struct seq_file *seq, void *v)
0616 {
0617     struct user_namespace *ns = seq->private;
0618     struct uid_gid_extent *extent = v;
0619     struct user_namespace *lower_ns;
0620     gid_t lower;
0621 
0622     lower_ns = seq_user_ns(seq);
0623     if ((lower_ns == ns) && lower_ns->parent)
0624         lower_ns = lower_ns->parent;
0625 
0626     lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
0627 
0628     seq_printf(seq, "%10u %10u %10u\n",
0629         extent->first,
0630         lower,
0631         extent->count);
0632 
0633     return 0;
0634 }
0635 
0636 static int projid_m_show(struct seq_file *seq, void *v)
0637 {
0638     struct user_namespace *ns = seq->private;
0639     struct uid_gid_extent *extent = v;
0640     struct user_namespace *lower_ns;
0641     projid_t lower;
0642 
0643     lower_ns = seq_user_ns(seq);
0644     if ((lower_ns == ns) && lower_ns->parent)
0645         lower_ns = lower_ns->parent;
0646 
0647     lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
0648 
0649     seq_printf(seq, "%10u %10u %10u\n",
0650         extent->first,
0651         lower,
0652         extent->count);
0653 
0654     return 0;
0655 }
0656 
0657 static void *m_start(struct seq_file *seq, loff_t *ppos,
0658              struct uid_gid_map *map)
0659 {
0660     loff_t pos = *ppos;
0661     unsigned extents = map->nr_extents;
0662     smp_rmb();
0663 
0664     if (pos >= extents)
0665         return NULL;
0666 
0667     if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0668         return &map->extent[pos];
0669 
0670     return &map->forward[pos];
0671 }
0672 
0673 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
0674 {
0675     struct user_namespace *ns = seq->private;
0676 
0677     return m_start(seq, ppos, &ns->uid_map);
0678 }
0679 
0680 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
0681 {
0682     struct user_namespace *ns = seq->private;
0683 
0684     return m_start(seq, ppos, &ns->gid_map);
0685 }
0686 
0687 static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
0688 {
0689     struct user_namespace *ns = seq->private;
0690 
0691     return m_start(seq, ppos, &ns->projid_map);
0692 }
0693 
0694 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
0695 {
0696     (*pos)++;
0697     return seq->op->start(seq, pos);
0698 }
0699 
0700 static void m_stop(struct seq_file *seq, void *v)
0701 {
0702     return;
0703 }
0704 
0705 const struct seq_operations proc_uid_seq_operations = {
0706     .start = uid_m_start,
0707     .stop = m_stop,
0708     .next = m_next,
0709     .show = uid_m_show,
0710 };
0711 
0712 const struct seq_operations proc_gid_seq_operations = {
0713     .start = gid_m_start,
0714     .stop = m_stop,
0715     .next = m_next,
0716     .show = gid_m_show,
0717 };
0718 
0719 const struct seq_operations proc_projid_seq_operations = {
0720     .start = projid_m_start,
0721     .stop = m_stop,
0722     .next = m_next,
0723     .show = projid_m_show,
0724 };
0725 
0726 static bool mappings_overlap(struct uid_gid_map *new_map,
0727                  struct uid_gid_extent *extent)
0728 {
0729     u32 upper_first, lower_first, upper_last, lower_last;
0730     unsigned idx;
0731 
0732     upper_first = extent->first;
0733     lower_first = extent->lower_first;
0734     upper_last = upper_first + extent->count - 1;
0735     lower_last = lower_first + extent->count - 1;
0736 
0737     for (idx = 0; idx < new_map->nr_extents; idx++) {
0738         u32 prev_upper_first, prev_lower_first;
0739         u32 prev_upper_last, prev_lower_last;
0740         struct uid_gid_extent *prev;
0741 
0742         if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0743             prev = &new_map->extent[idx];
0744         else
0745             prev = &new_map->forward[idx];
0746 
0747         prev_upper_first = prev->first;
0748         prev_lower_first = prev->lower_first;
0749         prev_upper_last = prev_upper_first + prev->count - 1;
0750         prev_lower_last = prev_lower_first + prev->count - 1;
0751 
0752         /* Does the upper range intersect a previous extent? */
0753         if ((prev_upper_first <= upper_last) &&
0754             (prev_upper_last >= upper_first))
0755             return true;
0756 
0757         /* Does the lower range intersect a previous extent? */
0758         if ((prev_lower_first <= lower_last) &&
0759             (prev_lower_last >= lower_first))
0760             return true;
0761     }
0762     return false;
0763 }
0764 
0765 /**
0766  * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
0767  * Takes care to allocate a 4K block of memory if the number of mappings exceeds
0768  * UID_GID_MAP_MAX_BASE_EXTENTS.
0769  */
0770 static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
0771 {
0772     struct uid_gid_extent *dest;
0773 
0774     if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
0775         struct uid_gid_extent *forward;
0776 
0777         /* Allocate memory for 340 mappings. */
0778         forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS,
0779                     sizeof(struct uid_gid_extent),
0780                     GFP_KERNEL);
0781         if (!forward)
0782             return -ENOMEM;
0783 
0784         /* Copy over memory. Only set up memory for the forward pointer.
0785          * Defer the memory setup for the reverse pointer.
0786          */
0787         memcpy(forward, map->extent,
0788                map->nr_extents * sizeof(map->extent[0]));
0789 
0790         map->forward = forward;
0791         map->reverse = NULL;
0792     }
0793 
0794     if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
0795         dest = &map->extent[map->nr_extents];
0796     else
0797         dest = &map->forward[map->nr_extents];
0798 
0799     *dest = *extent;
0800     map->nr_extents++;
0801     return 0;
0802 }
0803 
0804 /* cmp function to sort() forward mappings */
0805 static int cmp_extents_forward(const void *a, const void *b)
0806 {
0807     const struct uid_gid_extent *e1 = a;
0808     const struct uid_gid_extent *e2 = b;
0809 
0810     if (e1->first < e2->first)
0811         return -1;
0812 
0813     if (e1->first > e2->first)
0814         return 1;
0815 
0816     return 0;
0817 }
0818 
0819 /* cmp function to sort() reverse mappings */
0820 static int cmp_extents_reverse(const void *a, const void *b)
0821 {
0822     const struct uid_gid_extent *e1 = a;
0823     const struct uid_gid_extent *e2 = b;
0824 
0825     if (e1->lower_first < e2->lower_first)
0826         return -1;
0827 
0828     if (e1->lower_first > e2->lower_first)
0829         return 1;
0830 
0831     return 0;
0832 }
0833 
0834 /**
0835  * sort_idmaps - Sorts an array of idmap entries.
0836  * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
0837  */
0838 static int sort_idmaps(struct uid_gid_map *map)
0839 {
0840     if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0841         return 0;
0842 
0843     /* Sort forward array. */
0844     sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
0845          cmp_extents_forward, NULL);
0846 
0847     /* Only copy the memory from forward we actually need. */
0848     map->reverse = kmemdup(map->forward,
0849                    map->nr_extents * sizeof(struct uid_gid_extent),
0850                    GFP_KERNEL);
0851     if (!map->reverse)
0852         return -ENOMEM;
0853 
0854     /* Sort reverse array. */
0855     sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
0856          cmp_extents_reverse, NULL);
0857 
0858     return 0;
0859 }
0860 
0861 /**
0862  * verify_root_map() - check the uid 0 mapping
0863  * @file: idmapping file
0864  * @map_ns: user namespace of the target process
0865  * @new_map: requested idmap
0866  *
0867  * If a process requests mapping parent uid 0 into the new ns, verify that the
0868  * process writing the map had the CAP_SETFCAP capability as the target process
0869  * will be able to write fscaps that are valid in ancestor user namespaces.
0870  *
0871  * Return: true if the mapping is allowed, false if not.
0872  */
0873 static bool verify_root_map(const struct file *file,
0874                 struct user_namespace *map_ns,
0875                 struct uid_gid_map *new_map)
0876 {
0877     int idx;
0878     const struct user_namespace *file_ns = file->f_cred->user_ns;
0879     struct uid_gid_extent *extent0 = NULL;
0880 
0881     for (idx = 0; idx < new_map->nr_extents; idx++) {
0882         if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
0883             extent0 = &new_map->extent[idx];
0884         else
0885             extent0 = &new_map->forward[idx];
0886         if (extent0->lower_first == 0)
0887             break;
0888 
0889         extent0 = NULL;
0890     }
0891 
0892     if (!extent0)
0893         return true;
0894 
0895     if (map_ns == file_ns) {
0896         /* The process unshared its ns and is writing to its own
0897          * /proc/self/uid_map.  User already has full capabilites in
0898          * the new namespace.  Verify that the parent had CAP_SETFCAP
0899          * when it unshared.
0900          * */
0901         if (!file_ns->parent_could_setfcap)
0902             return false;
0903     } else {
0904         /* Process p1 is writing to uid_map of p2, who is in a child
0905          * user namespace to p1's.  Verify that the opener of the map
0906          * file has CAP_SETFCAP against the parent of the new map
0907          * namespace */
0908         if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP))
0909             return false;
0910     }
0911 
0912     return true;
0913 }
0914 
0915 static ssize_t map_write(struct file *file, const char __user *buf,
0916              size_t count, loff_t *ppos,
0917              int cap_setid,
0918              struct uid_gid_map *map,
0919              struct uid_gid_map *parent_map)
0920 {
0921     struct seq_file *seq = file->private_data;
0922     struct user_namespace *map_ns = seq->private;
0923     struct uid_gid_map new_map;
0924     unsigned idx;
0925     struct uid_gid_extent extent;
0926     char *kbuf = NULL, *pos, *next_line;
0927     ssize_t ret;
0928 
0929     /* Only allow < page size writes at the beginning of the file */
0930     if ((*ppos != 0) || (count >= PAGE_SIZE))
0931         return -EINVAL;
0932 
0933     /* Slurp in the user data */
0934     kbuf = memdup_user_nul(buf, count);
0935     if (IS_ERR(kbuf))
0936         return PTR_ERR(kbuf);
0937 
0938     /*
0939      * The userns_state_mutex serializes all writes to any given map.
0940      *
0941      * Any map is only ever written once.
0942      *
0943      * An id map fits within 1 cache line on most architectures.
0944      *
0945      * On read nothing needs to be done unless you are on an
0946      * architecture with a crazy cache coherency model like alpha.
0947      *
0948      * There is a one time data dependency between reading the
0949      * count of the extents and the values of the extents.  The
0950      * desired behavior is to see the values of the extents that
0951      * were written before the count of the extents.
0952      *
0953      * To achieve this smp_wmb() is used on guarantee the write
0954      * order and smp_rmb() is guaranteed that we don't have crazy
0955      * architectures returning stale data.
0956      */
0957     mutex_lock(&userns_state_mutex);
0958 
0959     memset(&new_map, 0, sizeof(struct uid_gid_map));
0960 
0961     ret = -EPERM;
0962     /* Only allow one successful write to the map */
0963     if (map->nr_extents != 0)
0964         goto out;
0965 
0966     /*
0967      * Adjusting namespace settings requires capabilities on the target.
0968      */
0969     if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
0970         goto out;
0971 
0972     /* Parse the user data */
0973     ret = -EINVAL;
0974     pos = kbuf;
0975     for (; pos; pos = next_line) {
0976 
0977         /* Find the end of line and ensure I don't look past it */
0978         next_line = strchr(pos, '\n');
0979         if (next_line) {
0980             *next_line = '\0';
0981             next_line++;
0982             if (*next_line == '\0')
0983                 next_line = NULL;
0984         }
0985 
0986         pos = skip_spaces(pos);
0987         extent.first = simple_strtoul(pos, &pos, 10);
0988         if (!isspace(*pos))
0989             goto out;
0990 
0991         pos = skip_spaces(pos);
0992         extent.lower_first = simple_strtoul(pos, &pos, 10);
0993         if (!isspace(*pos))
0994             goto out;
0995 
0996         pos = skip_spaces(pos);
0997         extent.count = simple_strtoul(pos, &pos, 10);
0998         if (*pos && !isspace(*pos))
0999             goto out;
1000 
1001         /* Verify there is not trailing junk on the line */
1002         pos = skip_spaces(pos);
1003         if (*pos != '\0')
1004             goto out;
1005 
1006         /* Verify we have been given valid starting values */
1007         if ((extent.first == (u32) -1) ||
1008             (extent.lower_first == (u32) -1))
1009             goto out;
1010 
1011         /* Verify count is not zero and does not cause the
1012          * extent to wrap
1013          */
1014         if ((extent.first + extent.count) <= extent.first)
1015             goto out;
1016         if ((extent.lower_first + extent.count) <=
1017              extent.lower_first)
1018             goto out;
1019 
1020         /* Do the ranges in extent overlap any previous extents? */
1021         if (mappings_overlap(&new_map, &extent))
1022             goto out;
1023 
1024         if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
1025             (next_line != NULL))
1026             goto out;
1027 
1028         ret = insert_extent(&new_map, &extent);
1029         if (ret < 0)
1030             goto out;
1031         ret = -EINVAL;
1032     }
1033     /* Be very certain the new map actually exists */
1034     if (new_map.nr_extents == 0)
1035         goto out;
1036 
1037     ret = -EPERM;
1038     /* Validate the user is allowed to use user id's mapped to. */
1039     if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
1040         goto out;
1041 
1042     ret = -EPERM;
1043     /* Map the lower ids from the parent user namespace to the
1044      * kernel global id space.
1045      */
1046     for (idx = 0; idx < new_map.nr_extents; idx++) {
1047         struct uid_gid_extent *e;
1048         u32 lower_first;
1049 
1050         if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
1051             e = &new_map.extent[idx];
1052         else
1053             e = &new_map.forward[idx];
1054 
1055         lower_first = map_id_range_down(parent_map,
1056                         e->lower_first,
1057                         e->count);
1058 
1059         /* Fail if we can not map the specified extent to
1060          * the kernel global id space.
1061          */
1062         if (lower_first == (u32) -1)
1063             goto out;
1064 
1065         e->lower_first = lower_first;
1066     }
1067 
1068     /*
1069      * If we want to use binary search for lookup, this clones the extent
1070      * array and sorts both copies.
1071      */
1072     ret = sort_idmaps(&new_map);
1073     if (ret < 0)
1074         goto out;
1075 
1076     /* Install the map */
1077     if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
1078         memcpy(map->extent, new_map.extent,
1079                new_map.nr_extents * sizeof(new_map.extent[0]));
1080     } else {
1081         map->forward = new_map.forward;
1082         map->reverse = new_map.reverse;
1083     }
1084     smp_wmb();
1085     map->nr_extents = new_map.nr_extents;
1086 
1087     *ppos = count;
1088     ret = count;
1089 out:
1090     if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
1091         kfree(new_map.forward);
1092         kfree(new_map.reverse);
1093         map->forward = NULL;
1094         map->reverse = NULL;
1095         map->nr_extents = 0;
1096     }
1097 
1098     mutex_unlock(&userns_state_mutex);
1099     kfree(kbuf);
1100     return ret;
1101 }
1102 
1103 ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
1104                size_t size, loff_t *ppos)
1105 {
1106     struct seq_file *seq = file->private_data;
1107     struct user_namespace *ns = seq->private;
1108     struct user_namespace *seq_ns = seq_user_ns(seq);
1109 
1110     if (!ns->parent)
1111         return -EPERM;
1112 
1113     if ((seq_ns != ns) && (seq_ns != ns->parent))
1114         return -EPERM;
1115 
1116     return map_write(file, buf, size, ppos, CAP_SETUID,
1117              &ns->uid_map, &ns->parent->uid_map);
1118 }
1119 
1120 ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
1121                size_t size, loff_t *ppos)
1122 {
1123     struct seq_file *seq = file->private_data;
1124     struct user_namespace *ns = seq->private;
1125     struct user_namespace *seq_ns = seq_user_ns(seq);
1126 
1127     if (!ns->parent)
1128         return -EPERM;
1129 
1130     if ((seq_ns != ns) && (seq_ns != ns->parent))
1131         return -EPERM;
1132 
1133     return map_write(file, buf, size, ppos, CAP_SETGID,
1134              &ns->gid_map, &ns->parent->gid_map);
1135 }
1136 
1137 ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
1138                   size_t size, loff_t *ppos)
1139 {
1140     struct seq_file *seq = file->private_data;
1141     struct user_namespace *ns = seq->private;
1142     struct user_namespace *seq_ns = seq_user_ns(seq);
1143 
1144     if (!ns->parent)
1145         return -EPERM;
1146 
1147     if ((seq_ns != ns) && (seq_ns != ns->parent))
1148         return -EPERM;
1149 
1150     /* Anyone can set any valid project id no capability needed */
1151     return map_write(file, buf, size, ppos, -1,
1152              &ns->projid_map, &ns->parent->projid_map);
1153 }
1154 
1155 static bool new_idmap_permitted(const struct file *file,
1156                 struct user_namespace *ns, int cap_setid,
1157                 struct uid_gid_map *new_map)
1158 {
1159     const struct cred *cred = file->f_cred;
1160 
1161     if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map))
1162         return false;
1163 
1164     /* Don't allow mappings that would allow anything that wouldn't
1165      * be allowed without the establishment of unprivileged mappings.
1166      */
1167     if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
1168         uid_eq(ns->owner, cred->euid)) {
1169         u32 id = new_map->extent[0].lower_first;
1170         if (cap_setid == CAP_SETUID) {
1171             kuid_t uid = make_kuid(ns->parent, id);
1172             if (uid_eq(uid, cred->euid))
1173                 return true;
1174         } else if (cap_setid == CAP_SETGID) {
1175             kgid_t gid = make_kgid(ns->parent, id);
1176             if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
1177                 gid_eq(gid, cred->egid))
1178                 return true;
1179         }
1180     }
1181 
1182     /* Allow anyone to set a mapping that doesn't require privilege */
1183     if (!cap_valid(cap_setid))
1184         return true;
1185 
1186     /* Allow the specified ids if we have the appropriate capability
1187      * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
1188      * And the opener of the id file also has the appropriate capability.
1189      */
1190     if (ns_capable(ns->parent, cap_setid) &&
1191         file_ns_capable(file, ns->parent, cap_setid))
1192         return true;
1193 
1194     return false;
1195 }
1196 
1197 int proc_setgroups_show(struct seq_file *seq, void *v)
1198 {
1199     struct user_namespace *ns = seq->private;
1200     unsigned long userns_flags = READ_ONCE(ns->flags);
1201 
1202     seq_printf(seq, "%s\n",
1203            (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
1204            "allow" : "deny");
1205     return 0;
1206 }
1207 
1208 ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
1209                  size_t count, loff_t *ppos)
1210 {
1211     struct seq_file *seq = file->private_data;
1212     struct user_namespace *ns = seq->private;
1213     char kbuf[8], *pos;
1214     bool setgroups_allowed;
1215     ssize_t ret;
1216 
1217     /* Only allow a very narrow range of strings to be written */
1218     ret = -EINVAL;
1219     if ((*ppos != 0) || (count >= sizeof(kbuf)))
1220         goto out;
1221 
1222     /* What was written? */
1223     ret = -EFAULT;
1224     if (copy_from_user(kbuf, buf, count))
1225         goto out;
1226     kbuf[count] = '\0';
1227     pos = kbuf;
1228 
1229     /* What is being requested? */
1230     ret = -EINVAL;
1231     if (strncmp(pos, "allow", 5) == 0) {
1232         pos += 5;
1233         setgroups_allowed = true;
1234     }
1235     else if (strncmp(pos, "deny", 4) == 0) {
1236         pos += 4;
1237         setgroups_allowed = false;
1238     }
1239     else
1240         goto out;
1241 
1242     /* Verify there is not trailing junk on the line */
1243     pos = skip_spaces(pos);
1244     if (*pos != '\0')
1245         goto out;
1246 
1247     ret = -EPERM;
1248     mutex_lock(&userns_state_mutex);
1249     if (setgroups_allowed) {
1250         /* Enabling setgroups after setgroups has been disabled
1251          * is not allowed.
1252          */
1253         if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
1254             goto out_unlock;
1255     } else {
1256         /* Permanently disabling setgroups after setgroups has
1257          * been enabled by writing the gid_map is not allowed.
1258          */
1259         if (ns->gid_map.nr_extents != 0)
1260             goto out_unlock;
1261         ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
1262     }
1263     mutex_unlock(&userns_state_mutex);
1264 
1265     /* Report a successful write */
1266     *ppos = count;
1267     ret = count;
1268 out:
1269     return ret;
1270 out_unlock:
1271     mutex_unlock(&userns_state_mutex);
1272     goto out;
1273 }
1274 
1275 bool userns_may_setgroups(const struct user_namespace *ns)
1276 {
1277     bool allowed;
1278 
1279     mutex_lock(&userns_state_mutex);
1280     /* It is not safe to use setgroups until a gid mapping in
1281      * the user namespace has been established.
1282      */
1283     allowed = ns->gid_map.nr_extents != 0;
1284     /* Is setgroups allowed? */
1285     allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
1286     mutex_unlock(&userns_state_mutex);
1287 
1288     return allowed;
1289 }
1290 
1291 /*
1292  * Returns true if @child is the same namespace or a descendant of
1293  * @ancestor.
1294  */
1295 bool in_userns(const struct user_namespace *ancestor,
1296            const struct user_namespace *child)
1297 {
1298     const struct user_namespace *ns;
1299     for (ns = child; ns->level > ancestor->level; ns = ns->parent)
1300         ;
1301     return (ns == ancestor);
1302 }
1303 
1304 bool current_in_userns(const struct user_namespace *target_ns)
1305 {
1306     return in_userns(target_ns, current_user_ns());
1307 }
1308 EXPORT_SYMBOL(current_in_userns);
1309 
1310 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
1311 {
1312     return container_of(ns, struct user_namespace, ns);
1313 }
1314 
1315 static struct ns_common *userns_get(struct task_struct *task)
1316 {
1317     struct user_namespace *user_ns;
1318 
1319     rcu_read_lock();
1320     user_ns = get_user_ns(__task_cred(task)->user_ns);
1321     rcu_read_unlock();
1322 
1323     return user_ns ? &user_ns->ns : NULL;
1324 }
1325 
1326 static void userns_put(struct ns_common *ns)
1327 {
1328     put_user_ns(to_user_ns(ns));
1329 }
1330 
1331 static int userns_install(struct nsset *nsset, struct ns_common *ns)
1332 {
1333     struct user_namespace *user_ns = to_user_ns(ns);
1334     struct cred *cred;
1335 
1336     /* Don't allow gaining capabilities by reentering
1337      * the same user namespace.
1338      */
1339     if (user_ns == current_user_ns())
1340         return -EINVAL;
1341 
1342     /* Tasks that share a thread group must share a user namespace */
1343     if (!thread_group_empty(current))
1344         return -EINVAL;
1345 
1346     if (current->fs->users != 1)
1347         return -EINVAL;
1348 
1349     if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1350         return -EPERM;
1351 
1352     cred = nsset_cred(nsset);
1353     if (!cred)
1354         return -EINVAL;
1355 
1356     put_user_ns(cred->user_ns);
1357     set_cred_user_ns(cred, get_user_ns(user_ns));
1358 
1359     if (set_cred_ucounts(cred) < 0)
1360         return -EINVAL;
1361 
1362     return 0;
1363 }
1364 
1365 struct ns_common *ns_get_owner(struct ns_common *ns)
1366 {
1367     struct user_namespace *my_user_ns = current_user_ns();
1368     struct user_namespace *owner, *p;
1369 
1370     /* See if the owner is in the current user namespace */
1371     owner = p = ns->ops->owner(ns);
1372     for (;;) {
1373         if (!p)
1374             return ERR_PTR(-EPERM);
1375         if (p == my_user_ns)
1376             break;
1377         p = p->parent;
1378     }
1379 
1380     return &get_user_ns(owner)->ns;
1381 }
1382 
1383 static struct user_namespace *userns_owner(struct ns_common *ns)
1384 {
1385     return to_user_ns(ns)->parent;
1386 }
1387 
1388 const struct proc_ns_operations userns_operations = {
1389     .name       = "user",
1390     .type       = CLONE_NEWUSER,
1391     .get        = userns_get,
1392     .put        = userns_put,
1393     .install    = userns_install,
1394     .owner      = userns_owner,
1395     .get_parent = ns_get_owner,
1396 };
1397 
1398 static __init int user_namespaces_init(void)
1399 {
1400     user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
1401     return 0;
1402 }
1403 subsys_initcall(user_namespaces_init);