0001
0002
0003 #include <linux/stat.h>
0004 #include <linux/sysctl.h>
0005 #include <linux/slab.h>
0006 #include <linux/cred.h>
0007 #include <linux/hash.h>
0008 #include <linux/kmemleak.h>
0009 #include <linux/user_namespace.h>
0010
0011 struct ucounts init_ucounts = {
0012 .ns = &init_user_ns,
0013 .uid = GLOBAL_ROOT_UID,
0014 .count = ATOMIC_INIT(1),
0015 };
0016
0017 #define UCOUNTS_HASHTABLE_BITS 10
0018 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
0019 static DEFINE_SPINLOCK(ucounts_lock);
0020
0021 #define ucounts_hashfn(ns, uid) \
0022 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
0023 UCOUNTS_HASHTABLE_BITS)
0024 #define ucounts_hashentry(ns, uid) \
0025 (ucounts_hashtable + ucounts_hashfn(ns, uid))
0026
0027
0028 #ifdef CONFIG_SYSCTL
0029 static struct ctl_table_set *
0030 set_lookup(struct ctl_table_root *root)
0031 {
0032 return ¤t_user_ns()->set;
0033 }
0034
0035 static int set_is_seen(struct ctl_table_set *set)
0036 {
0037 return ¤t_user_ns()->set == set;
0038 }
0039
0040 static int set_permissions(struct ctl_table_header *head,
0041 struct ctl_table *table)
0042 {
0043 struct user_namespace *user_ns =
0044 container_of(head->set, struct user_namespace, set);
0045 int mode;
0046
0047
0048 if (ns_capable(user_ns, CAP_SYS_RESOURCE))
0049 mode = (table->mode & S_IRWXU) >> 6;
0050 else
0051
0052 mode = table->mode & S_IROTH;
0053 return (mode << 6) | (mode << 3) | mode;
0054 }
0055
0056 static struct ctl_table_root set_root = {
0057 .lookup = set_lookup,
0058 .permissions = set_permissions,
0059 };
0060
0061 static long ue_zero = 0;
0062 static long ue_int_max = INT_MAX;
0063
0064 #define UCOUNT_ENTRY(name) \
0065 { \
0066 .procname = name, \
0067 .maxlen = sizeof(long), \
0068 .mode = 0644, \
0069 .proc_handler = proc_doulongvec_minmax, \
0070 .extra1 = &ue_zero, \
0071 .extra2 = &ue_int_max, \
0072 }
0073 static struct ctl_table user_table[] = {
0074 UCOUNT_ENTRY("max_user_namespaces"),
0075 UCOUNT_ENTRY("max_pid_namespaces"),
0076 UCOUNT_ENTRY("max_uts_namespaces"),
0077 UCOUNT_ENTRY("max_ipc_namespaces"),
0078 UCOUNT_ENTRY("max_net_namespaces"),
0079 UCOUNT_ENTRY("max_mnt_namespaces"),
0080 UCOUNT_ENTRY("max_cgroup_namespaces"),
0081 UCOUNT_ENTRY("max_time_namespaces"),
0082 #ifdef CONFIG_INOTIFY_USER
0083 UCOUNT_ENTRY("max_inotify_instances"),
0084 UCOUNT_ENTRY("max_inotify_watches"),
0085 #endif
0086 #ifdef CONFIG_FANOTIFY
0087 UCOUNT_ENTRY("max_fanotify_groups"),
0088 UCOUNT_ENTRY("max_fanotify_marks"),
0089 #endif
0090 { },
0091 { },
0092 { },
0093 { },
0094 { }
0095 };
0096 #endif
0097
0098 bool setup_userns_sysctls(struct user_namespace *ns)
0099 {
0100 #ifdef CONFIG_SYSCTL
0101 struct ctl_table *tbl;
0102
0103 BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
0104 setup_sysctl_set(&ns->set, &set_root, set_is_seen);
0105 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
0106 if (tbl) {
0107 int i;
0108 for (i = 0; i < UCOUNT_COUNTS; i++) {
0109 tbl[i].data = &ns->ucount_max[i];
0110 }
0111 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
0112 }
0113 if (!ns->sysctls) {
0114 kfree(tbl);
0115 retire_sysctl_set(&ns->set);
0116 return false;
0117 }
0118 #endif
0119 return true;
0120 }
0121
0122 void retire_userns_sysctls(struct user_namespace *ns)
0123 {
0124 #ifdef CONFIG_SYSCTL
0125 struct ctl_table *tbl;
0126
0127 tbl = ns->sysctls->ctl_table_arg;
0128 unregister_sysctl_table(ns->sysctls);
0129 retire_sysctl_set(&ns->set);
0130 kfree(tbl);
0131 #endif
0132 }
0133
0134 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
0135 {
0136 struct ucounts *ucounts;
0137
0138 hlist_for_each_entry(ucounts, hashent, node) {
0139 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
0140 return ucounts;
0141 }
0142 return NULL;
0143 }
0144
0145 static void hlist_add_ucounts(struct ucounts *ucounts)
0146 {
0147 struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
0148 spin_lock_irq(&ucounts_lock);
0149 hlist_add_head(&ucounts->node, hashent);
0150 spin_unlock_irq(&ucounts_lock);
0151 }
0152
0153 static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
0154 {
0155
0156 return !atomic_add_negative(1, &ucounts->count);
0157 }
0158
0159 struct ucounts *get_ucounts(struct ucounts *ucounts)
0160 {
0161 if (!get_ucounts_or_wrap(ucounts)) {
0162 put_ucounts(ucounts);
0163 ucounts = NULL;
0164 }
0165 return ucounts;
0166 }
0167
0168 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
0169 {
0170 struct hlist_head *hashent = ucounts_hashentry(ns, uid);
0171 struct ucounts *ucounts, *new;
0172 bool wrapped;
0173
0174 spin_lock_irq(&ucounts_lock);
0175 ucounts = find_ucounts(ns, uid, hashent);
0176 if (!ucounts) {
0177 spin_unlock_irq(&ucounts_lock);
0178
0179 new = kzalloc(sizeof(*new), GFP_KERNEL);
0180 if (!new)
0181 return NULL;
0182
0183 new->ns = ns;
0184 new->uid = uid;
0185 atomic_set(&new->count, 1);
0186
0187 spin_lock_irq(&ucounts_lock);
0188 ucounts = find_ucounts(ns, uid, hashent);
0189 if (ucounts) {
0190 kfree(new);
0191 } else {
0192 hlist_add_head(&new->node, hashent);
0193 get_user_ns(new->ns);
0194 spin_unlock_irq(&ucounts_lock);
0195 return new;
0196 }
0197 }
0198 wrapped = !get_ucounts_or_wrap(ucounts);
0199 spin_unlock_irq(&ucounts_lock);
0200 if (wrapped) {
0201 put_ucounts(ucounts);
0202 return NULL;
0203 }
0204 return ucounts;
0205 }
0206
0207 void put_ucounts(struct ucounts *ucounts)
0208 {
0209 unsigned long flags;
0210
0211 if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
0212 hlist_del_init(&ucounts->node);
0213 spin_unlock_irqrestore(&ucounts_lock, flags);
0214 put_user_ns(ucounts->ns);
0215 kfree(ucounts);
0216 }
0217 }
0218
0219 static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
0220 {
0221 long c, old;
0222 c = atomic_long_read(v);
0223 for (;;) {
0224 if (unlikely(c >= u))
0225 return false;
0226 old = atomic_long_cmpxchg(v, c, c+1);
0227 if (likely(old == c))
0228 return true;
0229 c = old;
0230 }
0231 }
0232
0233 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
0234 enum ucount_type type)
0235 {
0236 struct ucounts *ucounts, *iter, *bad;
0237 struct user_namespace *tns;
0238 ucounts = alloc_ucounts(ns, uid);
0239 for (iter = ucounts; iter; iter = tns->ucounts) {
0240 long max;
0241 tns = iter->ns;
0242 max = READ_ONCE(tns->ucount_max[type]);
0243 if (!atomic_long_inc_below(&iter->ucount[type], max))
0244 goto fail;
0245 }
0246 return ucounts;
0247 fail:
0248 bad = iter;
0249 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
0250 atomic_long_dec(&iter->ucount[type]);
0251
0252 put_ucounts(ucounts);
0253 return NULL;
0254 }
0255
0256 void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
0257 {
0258 struct ucounts *iter;
0259 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0260 long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
0261 WARN_ON_ONCE(dec < 0);
0262 }
0263 put_ucounts(ucounts);
0264 }
0265
0266 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
0267 {
0268 struct ucounts *iter;
0269 long max = LONG_MAX;
0270 long ret = 0;
0271
0272 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0273 long new = atomic_long_add_return(v, &iter->ucount[type]);
0274 if (new < 0 || new > max)
0275 ret = LONG_MAX;
0276 else if (iter == ucounts)
0277 ret = new;
0278 max = READ_ONCE(iter->ns->ucount_max[type]);
0279 }
0280 return ret;
0281 }
0282
0283 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
0284 {
0285 struct ucounts *iter;
0286 long new = -1;
0287 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0288 long dec = atomic_long_sub_return(v, &iter->ucount[type]);
0289 WARN_ON_ONCE(dec < 0);
0290 if (iter == ucounts)
0291 new = dec;
0292 }
0293 return (new == 0);
0294 }
0295
0296 static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
0297 struct ucounts *last, enum ucount_type type)
0298 {
0299 struct ucounts *iter, *next;
0300 for (iter = ucounts; iter != last; iter = next) {
0301 long dec = atomic_long_sub_return(1, &iter->ucount[type]);
0302 WARN_ON_ONCE(dec < 0);
0303 next = iter->ns->ucounts;
0304 if (dec == 0)
0305 put_ucounts(iter);
0306 }
0307 }
0308
0309 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
0310 {
0311 do_dec_rlimit_put_ucounts(ucounts, NULL, type);
0312 }
0313
0314 long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
0315 {
0316
0317 struct ucounts *iter;
0318 long max = LONG_MAX;
0319 long dec, ret = 0;
0320
0321 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0322 long new = atomic_long_add_return(1, &iter->ucount[type]);
0323 if (new < 0 || new > max)
0324 goto unwind;
0325 if (iter == ucounts)
0326 ret = new;
0327 max = READ_ONCE(iter->ns->ucount_max[type]);
0328
0329
0330
0331
0332 if (new != 1)
0333 continue;
0334 if (!get_ucounts(iter))
0335 goto dec_unwind;
0336 }
0337 return ret;
0338 dec_unwind:
0339 dec = atomic_long_sub_return(1, &iter->ucount[type]);
0340 WARN_ON_ONCE(dec < 0);
0341 unwind:
0342 do_dec_rlimit_put_ucounts(ucounts, iter, type);
0343 return 0;
0344 }
0345
0346 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit)
0347 {
0348 struct ucounts *iter;
0349 long max = rlimit;
0350 if (rlimit > LONG_MAX)
0351 max = LONG_MAX;
0352 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
0353 long val = get_ucounts_value(iter, type);
0354 if (val < 0 || val > max)
0355 return true;
0356 max = READ_ONCE(iter->ns->ucount_max[type]);
0357 }
0358 return false;
0359 }
0360
0361 static __init int user_namespace_sysctl_init(void)
0362 {
0363 #ifdef CONFIG_SYSCTL
0364 static struct ctl_table_header *user_header;
0365 static struct ctl_table empty[1];
0366
0367
0368
0369
0370
0371 user_header = register_sysctl("user", empty);
0372 kmemleak_ignore(user_header);
0373 BUG_ON(!user_header);
0374 BUG_ON(!setup_userns_sysctls(&init_user_ns));
0375 #endif
0376 hlist_add_ucounts(&init_ucounts);
0377 inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
0378 return 0;
0379 }
0380 subsys_initcall(user_namespace_sysctl_init);