0001
0002
0003 #include <linux/rculist.h>
0004 #include <linux/list.h>
0005 #include <linux/hash.h>
0006 #include <linux/types.h>
0007 #include <linux/spinlock.h>
0008 #include <linux/bpf.h>
0009 #include <linux/btf_ids.h>
0010 #include <linux/bpf_local_storage.h>
0011 #include <net/sock.h>
0012 #include <uapi/linux/sock_diag.h>
0013 #include <uapi/linux/btf.h>
0014 #include <linux/rcupdate.h>
0015 #include <linux/rcupdate_trace.h>
0016 #include <linux/rcupdate_wait.h>
0017
0018 #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
0019
0020 static struct bpf_local_storage_map_bucket *
0021 select_bucket(struct bpf_local_storage_map *smap,
0022 struct bpf_local_storage_elem *selem)
0023 {
0024 return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
0025 }
0026
0027 static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
0028 {
0029 struct bpf_map *map = &smap->map;
0030
0031 if (!map->ops->map_local_storage_charge)
0032 return 0;
0033
0034 return map->ops->map_local_storage_charge(smap, owner, size);
0035 }
0036
0037 static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
0038 u32 size)
0039 {
0040 struct bpf_map *map = &smap->map;
0041
0042 if (map->ops->map_local_storage_uncharge)
0043 map->ops->map_local_storage_uncharge(smap, owner, size);
0044 }
0045
0046 static struct bpf_local_storage __rcu **
0047 owner_storage(struct bpf_local_storage_map *smap, void *owner)
0048 {
0049 struct bpf_map *map = &smap->map;
0050
0051 return map->ops->map_owner_storage_ptr(owner);
0052 }
0053
0054 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
0055 {
0056 return !hlist_unhashed(&selem->snode);
0057 }
0058
0059 static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
0060 {
0061 return !hlist_unhashed(&selem->map_node);
0062 }
0063
0064 struct bpf_local_storage_elem *
0065 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
0066 void *value, bool charge_mem, gfp_t gfp_flags)
0067 {
0068 struct bpf_local_storage_elem *selem;
0069
0070 if (charge_mem && mem_charge(smap, owner, smap->elem_size))
0071 return NULL;
0072
0073 selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
0074 gfp_flags | __GFP_NOWARN);
0075 if (selem) {
0076 if (value)
0077 memcpy(SDATA(selem)->data, value, smap->map.value_size);
0078 return selem;
0079 }
0080
0081 if (charge_mem)
0082 mem_uncharge(smap, owner, smap->elem_size);
0083
0084 return NULL;
0085 }
0086
0087 void bpf_local_storage_free_rcu(struct rcu_head *rcu)
0088 {
0089 struct bpf_local_storage *local_storage;
0090
0091 local_storage = container_of(rcu, struct bpf_local_storage, rcu);
0092 kfree_rcu(local_storage, rcu);
0093 }
0094
0095 static void bpf_selem_free_rcu(struct rcu_head *rcu)
0096 {
0097 struct bpf_local_storage_elem *selem;
0098
0099 selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
0100 kfree_rcu(selem, rcu);
0101 }
0102
0103
0104
0105
0106
0107 bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
0108 struct bpf_local_storage_elem *selem,
0109 bool uncharge_mem, bool use_trace_rcu)
0110 {
0111 struct bpf_local_storage_map *smap;
0112 bool free_local_storage;
0113 void *owner;
0114
0115 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
0116 owner = local_storage->owner;
0117
0118
0119
0120
0121
0122 if (uncharge_mem)
0123 mem_uncharge(smap, owner, smap->elem_size);
0124
0125 free_local_storage = hlist_is_singular_node(&selem->snode,
0126 &local_storage->list);
0127 if (free_local_storage) {
0128 mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
0129 local_storage->owner = NULL;
0130
0131
0132 RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 }
0148 hlist_del_init_rcu(&selem->snode);
0149 if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
0150 SDATA(selem))
0151 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
0152
0153 if (use_trace_rcu)
0154 call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
0155 else
0156 kfree_rcu(selem, rcu);
0157
0158 return free_local_storage;
0159 }
0160
0161 static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
0162 bool use_trace_rcu)
0163 {
0164 struct bpf_local_storage *local_storage;
0165 bool free_local_storage = false;
0166 unsigned long flags;
0167
0168 if (unlikely(!selem_linked_to_storage(selem)))
0169
0170 return;
0171
0172 local_storage = rcu_dereference_check(selem->local_storage,
0173 bpf_rcu_lock_held());
0174 raw_spin_lock_irqsave(&local_storage->lock, flags);
0175 if (likely(selem_linked_to_storage(selem)))
0176 free_local_storage = bpf_selem_unlink_storage_nolock(
0177 local_storage, selem, true, use_trace_rcu);
0178 raw_spin_unlock_irqrestore(&local_storage->lock, flags);
0179
0180 if (free_local_storage) {
0181 if (use_trace_rcu)
0182 call_rcu_tasks_trace(&local_storage->rcu,
0183 bpf_local_storage_free_rcu);
0184 else
0185 kfree_rcu(local_storage, rcu);
0186 }
0187 }
0188
0189 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
0190 struct bpf_local_storage_elem *selem)
0191 {
0192 RCU_INIT_POINTER(selem->local_storage, local_storage);
0193 hlist_add_head_rcu(&selem->snode, &local_storage->list);
0194 }
0195
0196 void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
0197 {
0198 struct bpf_local_storage_map *smap;
0199 struct bpf_local_storage_map_bucket *b;
0200 unsigned long flags;
0201
0202 if (unlikely(!selem_linked_to_map(selem)))
0203
0204 return;
0205
0206 smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
0207 b = select_bucket(smap, selem);
0208 raw_spin_lock_irqsave(&b->lock, flags);
0209 if (likely(selem_linked_to_map(selem)))
0210 hlist_del_init_rcu(&selem->map_node);
0211 raw_spin_unlock_irqrestore(&b->lock, flags);
0212 }
0213
0214 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
0215 struct bpf_local_storage_elem *selem)
0216 {
0217 struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
0218 unsigned long flags;
0219
0220 raw_spin_lock_irqsave(&b->lock, flags);
0221 RCU_INIT_POINTER(SDATA(selem)->smap, smap);
0222 hlist_add_head_rcu(&selem->map_node, &b->list);
0223 raw_spin_unlock_irqrestore(&b->lock, flags);
0224 }
0225
0226 void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
0227 {
0228
0229
0230
0231
0232 bpf_selem_unlink_map(selem);
0233 __bpf_selem_unlink_storage(selem, use_trace_rcu);
0234 }
0235
0236 struct bpf_local_storage_data *
0237 bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
0238 struct bpf_local_storage_map *smap,
0239 bool cacheit_lockit)
0240 {
0241 struct bpf_local_storage_data *sdata;
0242 struct bpf_local_storage_elem *selem;
0243
0244
0245 sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
0246 bpf_rcu_lock_held());
0247 if (sdata && rcu_access_pointer(sdata->smap) == smap)
0248 return sdata;
0249
0250
0251 hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
0252 rcu_read_lock_trace_held())
0253 if (rcu_access_pointer(SDATA(selem)->smap) == smap)
0254 break;
0255
0256 if (!selem)
0257 return NULL;
0258
0259 sdata = SDATA(selem);
0260 if (cacheit_lockit) {
0261 unsigned long flags;
0262
0263
0264
0265
0266
0267
0268 raw_spin_lock_irqsave(&local_storage->lock, flags);
0269 if (selem_linked_to_storage(selem))
0270 rcu_assign_pointer(local_storage->cache[smap->cache_idx],
0271 sdata);
0272 raw_spin_unlock_irqrestore(&local_storage->lock, flags);
0273 }
0274
0275 return sdata;
0276 }
0277
0278 static int check_flags(const struct bpf_local_storage_data *old_sdata,
0279 u64 map_flags)
0280 {
0281 if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
0282
0283 return -EEXIST;
0284
0285 if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
0286
0287 return -ENOENT;
0288
0289 return 0;
0290 }
0291
0292 int bpf_local_storage_alloc(void *owner,
0293 struct bpf_local_storage_map *smap,
0294 struct bpf_local_storage_elem *first_selem,
0295 gfp_t gfp_flags)
0296 {
0297 struct bpf_local_storage *prev_storage, *storage;
0298 struct bpf_local_storage **owner_storage_ptr;
0299 int err;
0300
0301 err = mem_charge(smap, owner, sizeof(*storage));
0302 if (err)
0303 return err;
0304
0305 storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
0306 gfp_flags | __GFP_NOWARN);
0307 if (!storage) {
0308 err = -ENOMEM;
0309 goto uncharge;
0310 }
0311
0312 INIT_HLIST_HEAD(&storage->list);
0313 raw_spin_lock_init(&storage->lock);
0314 storage->owner = owner;
0315
0316 bpf_selem_link_storage_nolock(storage, first_selem);
0317 bpf_selem_link_map(smap, first_selem);
0318
0319 owner_storage_ptr =
0320 (struct bpf_local_storage **)owner_storage(smap, owner);
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331 prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
0332 if (unlikely(prev_storage)) {
0333 bpf_selem_unlink_map(first_selem);
0334 err = -EAGAIN;
0335 goto uncharge;
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346 }
0347
0348 return 0;
0349
0350 uncharge:
0351 kfree(storage);
0352 mem_uncharge(smap, owner, sizeof(*storage));
0353 return err;
0354 }
0355
0356
0357
0358
0359
0360
0361 struct bpf_local_storage_data *
0362 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
0363 void *value, u64 map_flags, gfp_t gfp_flags)
0364 {
0365 struct bpf_local_storage_data *old_sdata = NULL;
0366 struct bpf_local_storage_elem *selem = NULL;
0367 struct bpf_local_storage *local_storage;
0368 unsigned long flags;
0369 int err;
0370
0371
0372 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
0373
0374 unlikely((map_flags & BPF_F_LOCK) &&
0375 !map_value_has_spin_lock(&smap->map)))
0376 return ERR_PTR(-EINVAL);
0377
0378 if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
0379 return ERR_PTR(-EINVAL);
0380
0381 local_storage = rcu_dereference_check(*owner_storage(smap, owner),
0382 bpf_rcu_lock_held());
0383 if (!local_storage || hlist_empty(&local_storage->list)) {
0384
0385 err = check_flags(NULL, map_flags);
0386 if (err)
0387 return ERR_PTR(err);
0388
0389 selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
0390 if (!selem)
0391 return ERR_PTR(-ENOMEM);
0392
0393 err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
0394 if (err) {
0395 kfree(selem);
0396 mem_uncharge(smap, owner, smap->elem_size);
0397 return ERR_PTR(err);
0398 }
0399
0400 return SDATA(selem);
0401 }
0402
0403 if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
0404
0405
0406
0407
0408 old_sdata =
0409 bpf_local_storage_lookup(local_storage, smap, false);
0410 err = check_flags(old_sdata, map_flags);
0411 if (err)
0412 return ERR_PTR(err);
0413 if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
0414 copy_map_value_locked(&smap->map, old_sdata->data,
0415 value, false);
0416 return old_sdata;
0417 }
0418 }
0419
0420 if (gfp_flags == GFP_KERNEL) {
0421 selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
0422 if (!selem)
0423 return ERR_PTR(-ENOMEM);
0424 }
0425
0426 raw_spin_lock_irqsave(&local_storage->lock, flags);
0427
0428
0429 if (unlikely(hlist_empty(&local_storage->list))) {
0430
0431
0432
0433
0434
0435 err = -EAGAIN;
0436 goto unlock_err;
0437 }
0438
0439 old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
0440 err = check_flags(old_sdata, map_flags);
0441 if (err)
0442 goto unlock_err;
0443
0444 if (old_sdata && (map_flags & BPF_F_LOCK)) {
0445 copy_map_value_locked(&smap->map, old_sdata->data, value,
0446 false);
0447 selem = SELEM(old_sdata);
0448 goto unlock;
0449 }
0450
0451 if (gfp_flags != GFP_KERNEL) {
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461 selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
0462 if (!selem) {
0463 err = -ENOMEM;
0464 goto unlock_err;
0465 }
0466 }
0467
0468
0469 bpf_selem_link_map(smap, selem);
0470
0471
0472 bpf_selem_link_storage_nolock(local_storage, selem);
0473
0474
0475 if (old_sdata) {
0476 bpf_selem_unlink_map(SELEM(old_sdata));
0477 bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
0478 false, true);
0479 }
0480
0481 unlock:
0482 raw_spin_unlock_irqrestore(&local_storage->lock, flags);
0483 return SDATA(selem);
0484
0485 unlock_err:
0486 raw_spin_unlock_irqrestore(&local_storage->lock, flags);
0487 if (selem) {
0488 mem_uncharge(smap, owner, smap->elem_size);
0489 kfree(selem);
0490 }
0491 return ERR_PTR(err);
0492 }
0493
0494 u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
0495 {
0496 u64 min_usage = U64_MAX;
0497 u16 i, res = 0;
0498
0499 spin_lock(&cache->idx_lock);
0500
0501 for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
0502 if (cache->idx_usage_counts[i] < min_usage) {
0503 min_usage = cache->idx_usage_counts[i];
0504 res = i;
0505
0506
0507 if (!min_usage)
0508 break;
0509 }
0510 }
0511 cache->idx_usage_counts[res]++;
0512
0513 spin_unlock(&cache->idx_lock);
0514
0515 return res;
0516 }
0517
0518 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
0519 u16 idx)
0520 {
0521 spin_lock(&cache->idx_lock);
0522 cache->idx_usage_counts[idx]--;
0523 spin_unlock(&cache->idx_lock);
0524 }
0525
0526 void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
0527 int __percpu *busy_counter)
0528 {
0529 struct bpf_local_storage_elem *selem;
0530 struct bpf_local_storage_map_bucket *b;
0531 unsigned int i;
0532
0533
0534
0535
0536
0537
0538 synchronize_rcu();
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548 for (i = 0; i < (1U << smap->bucket_log); i++) {
0549 b = &smap->buckets[i];
0550
0551 rcu_read_lock();
0552
0553 while ((selem = hlist_entry_safe(
0554 rcu_dereference_raw(hlist_first_rcu(&b->list)),
0555 struct bpf_local_storage_elem, map_node))) {
0556 if (busy_counter) {
0557 migrate_disable();
0558 __this_cpu_inc(*busy_counter);
0559 }
0560 bpf_selem_unlink(selem, false);
0561 if (busy_counter) {
0562 __this_cpu_dec(*busy_counter);
0563 migrate_enable();
0564 }
0565 cond_resched_rcu();
0566 }
0567 rcu_read_unlock();
0568 }
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582 synchronize_rcu();
0583
0584 kvfree(smap->buckets);
0585 kfree(smap);
0586 }
0587
0588 int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
0589 {
0590 if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
0591 !(attr->map_flags & BPF_F_NO_PREALLOC) ||
0592 attr->max_entries ||
0593 attr->key_size != sizeof(int) || !attr->value_size ||
0594
0595 !attr->btf_key_type_id || !attr->btf_value_type_id)
0596 return -EINVAL;
0597
0598 if (!bpf_capable())
0599 return -EPERM;
0600
0601 if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
0602 return -E2BIG;
0603
0604 return 0;
0605 }
0606
0607 struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
0608 {
0609 struct bpf_local_storage_map *smap;
0610 unsigned int i;
0611 u32 nbuckets;
0612
0613 smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
0614 if (!smap)
0615 return ERR_PTR(-ENOMEM);
0616 bpf_map_init_from_attr(&smap->map, attr);
0617
0618 nbuckets = roundup_pow_of_two(num_possible_cpus());
0619
0620 nbuckets = max_t(u32, 2, nbuckets);
0621 smap->bucket_log = ilog2(nbuckets);
0622
0623 smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
0624 GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
0625 if (!smap->buckets) {
0626 kfree(smap);
0627 return ERR_PTR(-ENOMEM);
0628 }
0629
0630 for (i = 0; i < nbuckets; i++) {
0631 INIT_HLIST_HEAD(&smap->buckets[i].list);
0632 raw_spin_lock_init(&smap->buckets[i].lock);
0633 }
0634
0635 smap->elem_size =
0636 sizeof(struct bpf_local_storage_elem) + attr->value_size;
0637
0638 return smap;
0639 }
0640
0641 int bpf_local_storage_map_check_btf(const struct bpf_map *map,
0642 const struct btf *btf,
0643 const struct btf_type *key_type,
0644 const struct btf_type *value_type)
0645 {
0646 u32 int_data;
0647
0648 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
0649 return -EINVAL;
0650
0651 int_data = *(u32 *)(key_type + 1);
0652 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
0653 return -EINVAL;
0654
0655 return 0;
0656 }