Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 
0003 #include <linux/stat.h>
0004 #include <linux/sysctl.h>
0005 #include <linux/slab.h>
0006 #include <linux/cred.h>
0007 #include <linux/hash.h>
0008 #include <linux/kmemleak.h>
0009 #include <linux/user_namespace.h>
0010 
0011 struct ucounts init_ucounts = {
0012     .ns    = &init_user_ns,
0013     .uid   = GLOBAL_ROOT_UID,
0014     .count = ATOMIC_INIT(1),
0015 };
0016 
0017 #define UCOUNTS_HASHTABLE_BITS 10
0018 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
0019 static DEFINE_SPINLOCK(ucounts_lock);
0020 
0021 #define ucounts_hashfn(ns, uid)                     \
0022     hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
0023           UCOUNTS_HASHTABLE_BITS)
0024 #define ucounts_hashentry(ns, uid)  \
0025     (ucounts_hashtable + ucounts_hashfn(ns, uid))
0026 
0027 
0028 #ifdef CONFIG_SYSCTL
0029 static struct ctl_table_set *
0030 set_lookup(struct ctl_table_root *root)
0031 {
0032     return &current_user_ns()->set;
0033 }
0034 
0035 static int set_is_seen(struct ctl_table_set *set)
0036 {
0037     return &current_user_ns()->set == set;
0038 }
0039 
0040 static int set_permissions(struct ctl_table_header *head,
0041                   struct ctl_table *table)
0042 {
0043     struct user_namespace *user_ns =
0044         container_of(head->set, struct user_namespace, set);
0045     int mode;
0046 
0047     /* Allow users with CAP_SYS_RESOURCE unrestrained access */
0048     if (ns_capable(user_ns, CAP_SYS_RESOURCE))
0049         mode = (table->mode & S_IRWXU) >> 6;
0050     else
0051     /* Allow all others at most read-only access */
0052         mode = table->mode & S_IROTH;
0053     return (mode << 6) | (mode << 3) | mode;
0054 }
0055 
0056 static struct ctl_table_root set_root = {
0057     .lookup = set_lookup,
0058     .permissions = set_permissions,
0059 };
0060 
0061 static long ue_zero = 0;
0062 static long ue_int_max = INT_MAX;
0063 
0064 #define UCOUNT_ENTRY(name)                  \
0065     {                           \
0066         .procname   = name,             \
0067         .maxlen     = sizeof(long),         \
0068         .mode       = 0644,             \
0069         .proc_handler   = proc_doulongvec_minmax,   \
0070         .extra1     = &ue_zero,         \
0071         .extra2     = &ue_int_max,          \
0072     }
0073 static struct ctl_table user_table[] = {
0074     UCOUNT_ENTRY("max_user_namespaces"),
0075     UCOUNT_ENTRY("max_pid_namespaces"),
0076     UCOUNT_ENTRY("max_uts_namespaces"),
0077     UCOUNT_ENTRY("max_ipc_namespaces"),
0078     UCOUNT_ENTRY("max_net_namespaces"),
0079     UCOUNT_ENTRY("max_mnt_namespaces"),
0080     UCOUNT_ENTRY("max_cgroup_namespaces"),
0081     UCOUNT_ENTRY("max_time_namespaces"),
0082 #ifdef CONFIG_INOTIFY_USER
0083     UCOUNT_ENTRY("max_inotify_instances"),
0084     UCOUNT_ENTRY("max_inotify_watches"),
0085 #endif
0086 #ifdef CONFIG_FANOTIFY
0087     UCOUNT_ENTRY("max_fanotify_groups"),
0088     UCOUNT_ENTRY("max_fanotify_marks"),
0089 #endif
0090     { },
0091     { },
0092     { },
0093     { },
0094     { }
0095 };
0096 #endif /* CONFIG_SYSCTL */
0097 
0098 bool setup_userns_sysctls(struct user_namespace *ns)
0099 {
0100 #ifdef CONFIG_SYSCTL
0101     struct ctl_table *tbl;
0102 
0103     BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
0104     setup_sysctl_set(&ns->set, &set_root, set_is_seen);
0105     tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
0106     if (tbl) {
0107         int i;
0108         for (i = 0; i < UCOUNT_COUNTS; i++) {
0109             tbl[i].data = &ns->ucount_max[i];
0110         }
0111         ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
0112     }
0113     if (!ns->sysctls) {
0114         kfree(tbl);
0115         retire_sysctl_set(&ns->set);
0116         return false;
0117     }
0118 #endif
0119     return true;
0120 }
0121 
0122 void retire_userns_sysctls(struct user_namespace *ns)
0123 {
0124 #ifdef CONFIG_SYSCTL
0125     struct ctl_table *tbl;
0126 
0127     tbl = ns->sysctls->ctl_table_arg;
0128     unregister_sysctl_table(ns->sysctls);
0129     retire_sysctl_set(&ns->set);
0130     kfree(tbl);
0131 #endif
0132 }
0133 
0134 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
0135 {
0136     struct ucounts *ucounts;
0137 
0138     hlist_for_each_entry(ucounts, hashent, node) {
0139         if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
0140             return ucounts;
0141     }
0142     return NULL;
0143 }
0144 
0145 static void hlist_add_ucounts(struct ucounts *ucounts)
0146 {
0147     struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
0148     spin_lock_irq(&ucounts_lock);
0149     hlist_add_head(&ucounts->node, hashent);
0150     spin_unlock_irq(&ucounts_lock);
0151 }
0152 
0153 static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
0154 {
0155     /* Returns true on a successful get, false if the count wraps. */
0156     return !atomic_add_negative(1, &ucounts->count);
0157 }
0158 
0159 struct ucounts *get_ucounts(struct ucounts *ucounts)
0160 {
0161     if (!get_ucounts_or_wrap(ucounts)) {
0162         put_ucounts(ucounts);
0163         ucounts = NULL;
0164     }
0165     return ucounts;
0166 }
0167 
0168 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
0169 {
0170     struct hlist_head *hashent = ucounts_hashentry(ns, uid);
0171     struct ucounts *ucounts, *new;
0172     bool wrapped;
0173 
0174     spin_lock_irq(&ucounts_lock);
0175     ucounts = find_ucounts(ns, uid, hashent);
0176     if (!ucounts) {
0177         spin_unlock_irq(&ucounts_lock);
0178 
0179         new = kzalloc(sizeof(*new), GFP_KERNEL);
0180         if (!new)
0181             return NULL;
0182 
0183         new->ns = ns;
0184         new->uid = uid;
0185         atomic_set(&new->count, 1);
0186 
0187         spin_lock_irq(&ucounts_lock);
0188         ucounts = find_ucounts(ns, uid, hashent);
0189         if (ucounts) {
0190             kfree(new);
0191         } else {
0192             hlist_add_head(&new->node, hashent);
0193             get_user_ns(new->ns);
0194             spin_unlock_irq(&ucounts_lock);
0195             return new;
0196         }
0197     }
0198     wrapped = !get_ucounts_or_wrap(ucounts);
0199     spin_unlock_irq(&ucounts_lock);
0200     if (wrapped) {
0201         put_ucounts(ucounts);
0202         return NULL;
0203     }
0204     return ucounts;
0205 }
0206 
0207 void put_ucounts(struct ucounts *ucounts)
0208 {
0209     unsigned long flags;
0210 
0211     if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
0212         hlist_del_init(&ucounts->node);
0213         spin_unlock_irqrestore(&ucounts_lock, flags);
0214         put_user_ns(ucounts->ns);
0215         kfree(ucounts);
0216     }
0217 }
0218 
0219 static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
0220 {
0221     long c, old;
0222     c = atomic_long_read(v);
0223     for (;;) {
0224         if (unlikely(c >= u))
0225             return false;
0226         old = atomic_long_cmpxchg(v, c, c+1);
0227         if (likely(old == c))
0228             return true;
0229         c = old;
0230     }
0231 }
0232 
0233 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
0234                enum ucount_type type)
0235 {
0236     struct ucounts *ucounts, *iter, *bad;
0237     struct user_namespace *tns;
0238     ucounts = alloc_ucounts(ns, uid);
0239     for (iter = ucounts; iter; iter = tns->ucounts) {
0240         long max;
0241         tns = iter->ns;
0242         max = READ_ONCE(tns->ucount_max[type]);
0243         if (!atomic_long_inc_below(&iter->ucount[type], max))
0244             goto fail;
0245     }
0246     return ucounts;
0247 fail:
0248     bad = iter;
0249     for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
0250         atomic_long_dec(&iter->ucount[type]);
0251 
0252     put_ucounts(ucounts);
0253     return NULL;
0254 }
0255 
0256 void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
0257 {
0258     struct ucounts *iter;
0259     for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0260         long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
0261         WARN_ON_ONCE(dec < 0);
0262     }
0263     put_ucounts(ucounts);
0264 }
0265 
0266 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
0267 {
0268     struct ucounts *iter;
0269     long max = LONG_MAX;
0270     long ret = 0;
0271 
0272     for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0273         long new = atomic_long_add_return(v, &iter->ucount[type]);
0274         if (new < 0 || new > max)
0275             ret = LONG_MAX;
0276         else if (iter == ucounts)
0277             ret = new;
0278         max = READ_ONCE(iter->ns->ucount_max[type]);
0279     }
0280     return ret;
0281 }
0282 
0283 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
0284 {
0285     struct ucounts *iter;
0286     long new = -1; /* Silence compiler warning */
0287     for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0288         long dec = atomic_long_sub_return(v, &iter->ucount[type]);
0289         WARN_ON_ONCE(dec < 0);
0290         if (iter == ucounts)
0291             new = dec;
0292     }
0293     return (new == 0);
0294 }
0295 
0296 static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
0297                 struct ucounts *last, enum ucount_type type)
0298 {
0299     struct ucounts *iter, *next;
0300     for (iter = ucounts; iter != last; iter = next) {
0301         long dec = atomic_long_sub_return(1, &iter->ucount[type]);
0302         WARN_ON_ONCE(dec < 0);
0303         next = iter->ns->ucounts;
0304         if (dec == 0)
0305             put_ucounts(iter);
0306     }
0307 }
0308 
0309 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
0310 {
0311     do_dec_rlimit_put_ucounts(ucounts, NULL, type);
0312 }
0313 
0314 long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
0315 {
0316     /* Caller must hold a reference to ucounts */
0317     struct ucounts *iter;
0318     long max = LONG_MAX;
0319     long dec, ret = 0;
0320 
0321     for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0322         long new = atomic_long_add_return(1, &iter->ucount[type]);
0323         if (new < 0 || new > max)
0324             goto unwind;
0325         if (iter == ucounts)
0326             ret = new;
0327         max = READ_ONCE(iter->ns->ucount_max[type]);
0328         /*
0329          * Grab an extra ucount reference for the caller when
0330          * the rlimit count was previously 0.
0331          */
0332         if (new != 1)
0333             continue;
0334         if (!get_ucounts(iter))
0335             goto dec_unwind;
0336     }
0337     return ret;
0338 dec_unwind:
0339     dec = atomic_long_sub_return(1, &iter->ucount[type]);
0340     WARN_ON_ONCE(dec < 0);
0341 unwind:
0342     do_dec_rlimit_put_ucounts(ucounts, iter, type);
0343     return 0;
0344 }
0345 
0346 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit)
0347 {
0348     struct ucounts *iter;
0349     long max = rlimit;
0350     if (rlimit > LONG_MAX)
0351         max = LONG_MAX;
0352     for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0353         long val = get_ucounts_value(iter, type);
0354         if (val < 0 || val > max)
0355             return true;
0356         max = READ_ONCE(iter->ns->ucount_max[type]);
0357     }
0358     return false;
0359 }
0360 
0361 static __init int user_namespace_sysctl_init(void)
0362 {
0363 #ifdef CONFIG_SYSCTL
0364     static struct ctl_table_header *user_header;
0365     static struct ctl_table empty[1];
0366     /*
0367      * It is necessary to register the user directory in the
0368      * default set so that registrations in the child sets work
0369      * properly.
0370      */
0371     user_header = register_sysctl("user", empty);
0372     kmemleak_ignore(user_header);
0373     BUG_ON(!user_header);
0374     BUG_ON(!setup_userns_sysctls(&init_user_ns));
0375 #endif
0376     hlist_add_ucounts(&init_ucounts);
0377     inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
0378     return 0;
0379 }
0380 subsys_initcall(user_namespace_sysctl_init);