0001
0002
0003
0004
0005 #include <linux/bpf.h>
0006 #include <linux/btf.h>
0007 #include <linux/jhash.h>
0008 #include <linux/filter.h>
0009 #include <linux/rculist_nulls.h>
0010 #include <linux/random.h>
0011 #include <uapi/linux/btf.h>
0012 #include <linux/rcupdate_trace.h>
0013 #include <linux/btf_ids.h>
0014 #include "percpu_freelist.h"
0015 #include "bpf_lru_list.h"
0016 #include "map_in_map.h"
0017
0018 #define HTAB_CREATE_FLAG_MASK \
0019 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
0020 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
0021
0022 #define BATCH_OPS(_name) \
0023 .map_lookup_batch = \
0024 _name##_map_lookup_batch, \
0025 .map_lookup_and_delete_batch = \
0026 _name##_map_lookup_and_delete_batch, \
0027 .map_update_batch = \
0028 generic_map_update_batch, \
0029 .map_delete_batch = \
0030 generic_map_delete_batch
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082 struct bucket {
0083 struct hlist_nulls_head head;
0084 union {
0085 raw_spinlock_t raw_lock;
0086 spinlock_t lock;
0087 };
0088 };
0089
0090 #define HASHTAB_MAP_LOCK_COUNT 8
0091 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
0092
0093 struct bpf_htab {
0094 struct bpf_map map;
0095 struct bucket *buckets;
0096 void *elems;
0097 union {
0098 struct pcpu_freelist freelist;
0099 struct bpf_lru lru;
0100 };
0101 struct htab_elem *__percpu *extra_elems;
0102 atomic_t count;
0103 u32 n_buckets;
0104 u32 elem_size;
0105 u32 hashrnd;
0106 struct lock_class_key lockdep_key;
0107 int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT];
0108 };
0109
0110
0111 struct htab_elem {
0112 union {
0113 struct hlist_nulls_node hash_node;
0114 struct {
0115 void *padding;
0116 union {
0117 struct bpf_htab *htab;
0118 struct pcpu_freelist_node fnode;
0119 struct htab_elem *batch_flink;
0120 };
0121 };
0122 };
0123 union {
0124 struct rcu_head rcu;
0125 struct bpf_lru_node lru_node;
0126 };
0127 u32 hash;
0128 char key[] __aligned(8);
0129 };
0130
0131 static inline bool htab_is_prealloc(const struct bpf_htab *htab)
0132 {
0133 return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
0134 }
0135
0136 static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
0137 {
0138 return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
0139 }
0140
0141 static void htab_init_buckets(struct bpf_htab *htab)
0142 {
0143 unsigned int i;
0144
0145 for (i = 0; i < htab->n_buckets; i++) {
0146 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
0147 if (htab_use_raw_lock(htab)) {
0148 raw_spin_lock_init(&htab->buckets[i].raw_lock);
0149 lockdep_set_class(&htab->buckets[i].raw_lock,
0150 &htab->lockdep_key);
0151 } else {
0152 spin_lock_init(&htab->buckets[i].lock);
0153 lockdep_set_class(&htab->buckets[i].lock,
0154 &htab->lockdep_key);
0155 }
0156 cond_resched();
0157 }
0158 }
0159
0160 static inline int htab_lock_bucket(const struct bpf_htab *htab,
0161 struct bucket *b, u32 hash,
0162 unsigned long *pflags)
0163 {
0164 unsigned long flags;
0165
0166 hash = hash & HASHTAB_MAP_LOCK_MASK;
0167
0168 migrate_disable();
0169 if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
0170 __this_cpu_dec(*(htab->map_locked[hash]));
0171 migrate_enable();
0172 return -EBUSY;
0173 }
0174
0175 if (htab_use_raw_lock(htab))
0176 raw_spin_lock_irqsave(&b->raw_lock, flags);
0177 else
0178 spin_lock_irqsave(&b->lock, flags);
0179 *pflags = flags;
0180
0181 return 0;
0182 }
0183
0184 static inline void htab_unlock_bucket(const struct bpf_htab *htab,
0185 struct bucket *b, u32 hash,
0186 unsigned long flags)
0187 {
0188 hash = hash & HASHTAB_MAP_LOCK_MASK;
0189 if (htab_use_raw_lock(htab))
0190 raw_spin_unlock_irqrestore(&b->raw_lock, flags);
0191 else
0192 spin_unlock_irqrestore(&b->lock, flags);
0193 __this_cpu_dec(*(htab->map_locked[hash]));
0194 migrate_enable();
0195 }
0196
0197 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
0198
0199 static bool htab_is_lru(const struct bpf_htab *htab)
0200 {
0201 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
0202 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
0203 }
0204
0205 static bool htab_is_percpu(const struct bpf_htab *htab)
0206 {
0207 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
0208 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
0209 }
0210
0211 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
0212 void __percpu *pptr)
0213 {
0214 *(void __percpu **)(l->key + key_size) = pptr;
0215 }
0216
0217 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
0218 {
0219 return *(void __percpu **)(l->key + key_size);
0220 }
0221
0222 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
0223 {
0224 return *(void **)(l->key + roundup(map->key_size, 8));
0225 }
0226
0227 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
0228 {
0229 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
0230 }
0231
0232 static bool htab_has_extra_elems(struct bpf_htab *htab)
0233 {
0234 return !htab_is_percpu(htab) && !htab_is_lru(htab);
0235 }
0236
0237 static void htab_free_prealloced_timers(struct bpf_htab *htab)
0238 {
0239 u32 num_entries = htab->map.max_entries;
0240 int i;
0241
0242 if (!map_value_has_timer(&htab->map))
0243 return;
0244 if (htab_has_extra_elems(htab))
0245 num_entries += num_possible_cpus();
0246
0247 for (i = 0; i < num_entries; i++) {
0248 struct htab_elem *elem;
0249
0250 elem = get_htab_elem(htab, i);
0251 bpf_timer_cancel_and_free(elem->key +
0252 round_up(htab->map.key_size, 8) +
0253 htab->map.timer_off);
0254 cond_resched();
0255 }
0256 }
0257
0258 static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
0259 {
0260 u32 num_entries = htab->map.max_entries;
0261 int i;
0262
0263 if (!map_value_has_kptrs(&htab->map))
0264 return;
0265 if (htab_has_extra_elems(htab))
0266 num_entries += num_possible_cpus();
0267
0268 for (i = 0; i < num_entries; i++) {
0269 struct htab_elem *elem;
0270
0271 elem = get_htab_elem(htab, i);
0272 bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8));
0273 cond_resched();
0274 }
0275 }
0276
0277 static void htab_free_elems(struct bpf_htab *htab)
0278 {
0279 int i;
0280
0281 if (!htab_is_percpu(htab))
0282 goto free_elems;
0283
0284 for (i = 0; i < htab->map.max_entries; i++) {
0285 void __percpu *pptr;
0286
0287 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
0288 htab->map.key_size);
0289 free_percpu(pptr);
0290 cond_resched();
0291 }
0292 free_elems:
0293 bpf_map_area_free(htab->elems);
0294 }
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
0308 u32 hash)
0309 {
0310 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
0311 struct htab_elem *l;
0312
0313 if (node) {
0314 l = container_of(node, struct htab_elem, lru_node);
0315 memcpy(l->key, key, htab->map.key_size);
0316 return l;
0317 }
0318
0319 return NULL;
0320 }
0321
0322 static int prealloc_init(struct bpf_htab *htab)
0323 {
0324 u32 num_entries = htab->map.max_entries;
0325 int err = -ENOMEM, i;
0326
0327 if (htab_has_extra_elems(htab))
0328 num_entries += num_possible_cpus();
0329
0330 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
0331 htab->map.numa_node);
0332 if (!htab->elems)
0333 return -ENOMEM;
0334
0335 if (!htab_is_percpu(htab))
0336 goto skip_percpu_elems;
0337
0338 for (i = 0; i < num_entries; i++) {
0339 u32 size = round_up(htab->map.value_size, 8);
0340 void __percpu *pptr;
0341
0342 pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
0343 GFP_USER | __GFP_NOWARN);
0344 if (!pptr)
0345 goto free_elems;
0346 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
0347 pptr);
0348 cond_resched();
0349 }
0350
0351 skip_percpu_elems:
0352 if (htab_is_lru(htab))
0353 err = bpf_lru_init(&htab->lru,
0354 htab->map.map_flags & BPF_F_NO_COMMON_LRU,
0355 offsetof(struct htab_elem, hash) -
0356 offsetof(struct htab_elem, lru_node),
0357 htab_lru_map_delete_node,
0358 htab);
0359 else
0360 err = pcpu_freelist_init(&htab->freelist);
0361
0362 if (err)
0363 goto free_elems;
0364
0365 if (htab_is_lru(htab))
0366 bpf_lru_populate(&htab->lru, htab->elems,
0367 offsetof(struct htab_elem, lru_node),
0368 htab->elem_size, num_entries);
0369 else
0370 pcpu_freelist_populate(&htab->freelist,
0371 htab->elems + offsetof(struct htab_elem, fnode),
0372 htab->elem_size, num_entries);
0373
0374 return 0;
0375
0376 free_elems:
0377 htab_free_elems(htab);
0378 return err;
0379 }
0380
0381 static void prealloc_destroy(struct bpf_htab *htab)
0382 {
0383 htab_free_elems(htab);
0384
0385 if (htab_is_lru(htab))
0386 bpf_lru_destroy(&htab->lru);
0387 else
0388 pcpu_freelist_destroy(&htab->freelist);
0389 }
0390
0391 static int alloc_extra_elems(struct bpf_htab *htab)
0392 {
0393 struct htab_elem *__percpu *pptr, *l_new;
0394 struct pcpu_freelist_node *l;
0395 int cpu;
0396
0397 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
0398 GFP_USER | __GFP_NOWARN);
0399 if (!pptr)
0400 return -ENOMEM;
0401
0402 for_each_possible_cpu(cpu) {
0403 l = pcpu_freelist_pop(&htab->freelist);
0404
0405
0406
0407 l_new = container_of(l, struct htab_elem, fnode);
0408 *per_cpu_ptr(pptr, cpu) = l_new;
0409 }
0410 htab->extra_elems = pptr;
0411 return 0;
0412 }
0413
0414
0415 static int htab_map_alloc_check(union bpf_attr *attr)
0416 {
0417 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
0418 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
0419 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
0420 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
0421
0422
0423
0424
0425
0426 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
0427 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
0428 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
0429 int numa_node = bpf_map_attr_numa_node(attr);
0430
0431 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
0432 offsetof(struct htab_elem, hash_node.pprev));
0433 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
0434 offsetof(struct htab_elem, hash_node.pprev));
0435
0436 if (lru && !bpf_capable())
0437
0438
0439
0440 return -EPERM;
0441
0442 if (zero_seed && !capable(CAP_SYS_ADMIN))
0443
0444 return -EPERM;
0445
0446 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
0447 !bpf_map_flags_access_ok(attr->map_flags))
0448 return -EINVAL;
0449
0450 if (!lru && percpu_lru)
0451 return -EINVAL;
0452
0453 if (lru && !prealloc)
0454 return -ENOTSUPP;
0455
0456 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
0457 return -EINVAL;
0458
0459
0460
0461
0462 if (attr->max_entries == 0 || attr->key_size == 0 ||
0463 attr->value_size == 0)
0464 return -EINVAL;
0465
0466 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
0467 sizeof(struct htab_elem))
0468
0469
0470
0471
0472
0473 return -E2BIG;
0474
0475 return 0;
0476 }
0477
0478 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
0479 {
0480 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
0481 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
0482 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
0483 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
0484
0485
0486
0487
0488
0489 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
0490 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
0491 struct bpf_htab *htab;
0492 int err, i;
0493
0494 htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
0495 if (!htab)
0496 return ERR_PTR(-ENOMEM);
0497
0498 lockdep_register_key(&htab->lockdep_key);
0499
0500 bpf_map_init_from_attr(&htab->map, attr);
0501
0502 if (percpu_lru) {
0503
0504
0505
0506
0507 htab->map.max_entries = roundup(attr->max_entries,
0508 num_possible_cpus());
0509 if (htab->map.max_entries < attr->max_entries)
0510 htab->map.max_entries = rounddown(attr->max_entries,
0511 num_possible_cpus());
0512 }
0513
0514
0515 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
0516
0517 htab->elem_size = sizeof(struct htab_elem) +
0518 round_up(htab->map.key_size, 8);
0519 if (percpu)
0520 htab->elem_size += sizeof(void *);
0521 else
0522 htab->elem_size += round_up(htab->map.value_size, 8);
0523
0524 err = -E2BIG;
0525
0526 if (htab->n_buckets == 0 ||
0527 htab->n_buckets > U32_MAX / sizeof(struct bucket))
0528 goto free_htab;
0529
0530 err = -ENOMEM;
0531 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
0532 sizeof(struct bucket),
0533 htab->map.numa_node);
0534 if (!htab->buckets)
0535 goto free_htab;
0536
0537 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
0538 htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
0539 sizeof(int),
0540 sizeof(int),
0541 GFP_USER);
0542 if (!htab->map_locked[i])
0543 goto free_map_locked;
0544 }
0545
0546 if (htab->map.map_flags & BPF_F_ZERO_SEED)
0547 htab->hashrnd = 0;
0548 else
0549 htab->hashrnd = get_random_int();
0550
0551 htab_init_buckets(htab);
0552
0553 if (prealloc) {
0554 err = prealloc_init(htab);
0555 if (err)
0556 goto free_map_locked;
0557
0558 if (!percpu && !lru) {
0559
0560
0561
0562 err = alloc_extra_elems(htab);
0563 if (err)
0564 goto free_prealloc;
0565 }
0566 }
0567
0568 return &htab->map;
0569
0570 free_prealloc:
0571 prealloc_destroy(htab);
0572 free_map_locked:
0573 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
0574 free_percpu(htab->map_locked[i]);
0575 bpf_map_area_free(htab->buckets);
0576 free_htab:
0577 lockdep_unregister_key(&htab->lockdep_key);
0578 kfree(htab);
0579 return ERR_PTR(err);
0580 }
0581
0582 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
0583 {
0584 return jhash(key, key_len, hashrnd);
0585 }
0586
0587 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
0588 {
0589 return &htab->buckets[hash & (htab->n_buckets - 1)];
0590 }
0591
0592 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
0593 {
0594 return &__select_bucket(htab, hash)->head;
0595 }
0596
0597
0598 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
0599 void *key, u32 key_size)
0600 {
0601 struct hlist_nulls_node *n;
0602 struct htab_elem *l;
0603
0604 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
0605 if (l->hash == hash && !memcmp(&l->key, key, key_size))
0606 return l;
0607
0608 return NULL;
0609 }
0610
0611
0612
0613
0614
0615 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
0616 u32 hash, void *key,
0617 u32 key_size, u32 n_buckets)
0618 {
0619 struct hlist_nulls_node *n;
0620 struct htab_elem *l;
0621
0622 again:
0623 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
0624 if (l->hash == hash && !memcmp(&l->key, key, key_size))
0625 return l;
0626
0627 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
0628 goto again;
0629
0630 return NULL;
0631 }
0632
0633
0634
0635
0636
0637
0638 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
0639 {
0640 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
0641 struct hlist_nulls_head *head;
0642 struct htab_elem *l;
0643 u32 hash, key_size;
0644
0645 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
0646 !rcu_read_lock_bh_held());
0647
0648 key_size = map->key_size;
0649
0650 hash = htab_map_hash(key, key_size, htab->hashrnd);
0651
0652 head = select_bucket(htab, hash);
0653
0654 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
0655
0656 return l;
0657 }
0658
0659 static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
0660 {
0661 struct htab_elem *l = __htab_map_lookup_elem(map, key);
0662
0663 if (l)
0664 return l->key + round_up(map->key_size, 8);
0665
0666 return NULL;
0667 }
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
0681 {
0682 struct bpf_insn *insn = insn_buf;
0683 const int ret = BPF_REG_0;
0684
0685 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
0686 (void *(*)(struct bpf_map *map, void *key))NULL));
0687 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
0688 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
0689 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
0690 offsetof(struct htab_elem, key) +
0691 round_up(map->key_size, 8));
0692 return insn - insn_buf;
0693 }
0694
0695 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
0696 void *key, const bool mark)
0697 {
0698 struct htab_elem *l = __htab_map_lookup_elem(map, key);
0699
0700 if (l) {
0701 if (mark)
0702 bpf_lru_node_set_ref(&l->lru_node);
0703 return l->key + round_up(map->key_size, 8);
0704 }
0705
0706 return NULL;
0707 }
0708
0709 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
0710 {
0711 return __htab_lru_map_lookup_elem(map, key, true);
0712 }
0713
0714 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
0715 {
0716 return __htab_lru_map_lookup_elem(map, key, false);
0717 }
0718
0719 static int htab_lru_map_gen_lookup(struct bpf_map *map,
0720 struct bpf_insn *insn_buf)
0721 {
0722 struct bpf_insn *insn = insn_buf;
0723 const int ret = BPF_REG_0;
0724 const int ref_reg = BPF_REG_1;
0725
0726 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
0727 (void *(*)(struct bpf_map *map, void *key))NULL));
0728 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
0729 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
0730 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
0731 offsetof(struct htab_elem, lru_node) +
0732 offsetof(struct bpf_lru_node, ref));
0733 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
0734 *insn++ = BPF_ST_MEM(BPF_B, ret,
0735 offsetof(struct htab_elem, lru_node) +
0736 offsetof(struct bpf_lru_node, ref),
0737 1);
0738 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
0739 offsetof(struct htab_elem, key) +
0740 round_up(map->key_size, 8));
0741 return insn - insn_buf;
0742 }
0743
0744 static void check_and_free_fields(struct bpf_htab *htab,
0745 struct htab_elem *elem)
0746 {
0747 void *map_value = elem->key + round_up(htab->map.key_size, 8);
0748
0749 if (map_value_has_timer(&htab->map))
0750 bpf_timer_cancel_and_free(map_value + htab->map.timer_off);
0751 if (map_value_has_kptrs(&htab->map))
0752 bpf_map_free_kptrs(&htab->map, map_value);
0753 }
0754
0755
0756
0757
0758 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
0759 {
0760 struct bpf_htab *htab = arg;
0761 struct htab_elem *l = NULL, *tgt_l;
0762 struct hlist_nulls_head *head;
0763 struct hlist_nulls_node *n;
0764 unsigned long flags;
0765 struct bucket *b;
0766 int ret;
0767
0768 tgt_l = container_of(node, struct htab_elem, lru_node);
0769 b = __select_bucket(htab, tgt_l->hash);
0770 head = &b->head;
0771
0772 ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
0773 if (ret)
0774 return false;
0775
0776 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
0777 if (l == tgt_l) {
0778 hlist_nulls_del_rcu(&l->hash_node);
0779 check_and_free_fields(htab, l);
0780 break;
0781 }
0782
0783 htab_unlock_bucket(htab, b, tgt_l->hash, flags);
0784
0785 return l == tgt_l;
0786 }
0787
0788
0789 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
0790 {
0791 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
0792 struct hlist_nulls_head *head;
0793 struct htab_elem *l, *next_l;
0794 u32 hash, key_size;
0795 int i = 0;
0796
0797 WARN_ON_ONCE(!rcu_read_lock_held());
0798
0799 key_size = map->key_size;
0800
0801 if (!key)
0802 goto find_first_elem;
0803
0804 hash = htab_map_hash(key, key_size, htab->hashrnd);
0805
0806 head = select_bucket(htab, hash);
0807
0808
0809 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
0810
0811 if (!l)
0812 goto find_first_elem;
0813
0814
0815 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
0816 struct htab_elem, hash_node);
0817
0818 if (next_l) {
0819
0820 memcpy(next_key, next_l->key, key_size);
0821 return 0;
0822 }
0823
0824
0825 i = hash & (htab->n_buckets - 1);
0826 i++;
0827
0828 find_first_elem:
0829
0830 for (; i < htab->n_buckets; i++) {
0831 head = select_bucket(htab, i);
0832
0833
0834 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
0835 struct htab_elem, hash_node);
0836 if (next_l) {
0837
0838 memcpy(next_key, next_l->key, key_size);
0839 return 0;
0840 }
0841 }
0842
0843
0844 return -ENOENT;
0845 }
0846
0847 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
0848 {
0849 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
0850 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
0851 check_and_free_fields(htab, l);
0852 kfree(l);
0853 }
0854
0855 static void htab_elem_free_rcu(struct rcu_head *head)
0856 {
0857 struct htab_elem *l = container_of(head, struct htab_elem, rcu);
0858 struct bpf_htab *htab = l->htab;
0859
0860 htab_elem_free(htab, l);
0861 }
0862
0863 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
0864 {
0865 struct bpf_map *map = &htab->map;
0866 void *ptr;
0867
0868 if (map->ops->map_fd_put_ptr) {
0869 ptr = fd_htab_map_get_ptr(map, l);
0870 map->ops->map_fd_put_ptr(ptr);
0871 }
0872 }
0873
0874 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
0875 {
0876 htab_put_fd_value(htab, l);
0877
0878 if (htab_is_prealloc(htab)) {
0879 check_and_free_fields(htab, l);
0880 __pcpu_freelist_push(&htab->freelist, &l->fnode);
0881 } else {
0882 atomic_dec(&htab->count);
0883 l->htab = htab;
0884 call_rcu(&l->rcu, htab_elem_free_rcu);
0885 }
0886 }
0887
0888 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
0889 void *value, bool onallcpus)
0890 {
0891 if (!onallcpus) {
0892
0893 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
0894 } else {
0895 u32 size = round_up(htab->map.value_size, 8);
0896 int off = 0, cpu;
0897
0898 for_each_possible_cpu(cpu) {
0899 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
0900 value + off, size);
0901 off += size;
0902 }
0903 }
0904 }
0905
0906 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
0907 void *value, bool onallcpus)
0908 {
0909
0910
0911
0912
0913
0914
0915 if (htab_is_prealloc(htab) && !onallcpus) {
0916 u32 size = round_up(htab->map.value_size, 8);
0917 int current_cpu = raw_smp_processor_id();
0918 int cpu;
0919
0920 for_each_possible_cpu(cpu) {
0921 if (cpu == current_cpu)
0922 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
0923 size);
0924 else
0925 memset(per_cpu_ptr(pptr, cpu), 0, size);
0926 }
0927 } else {
0928 pcpu_copy_value(htab, pptr, value, onallcpus);
0929 }
0930 }
0931
0932 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
0933 {
0934 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
0935 BITS_PER_LONG == 64;
0936 }
0937
0938 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
0939 void *value, u32 key_size, u32 hash,
0940 bool percpu, bool onallcpus,
0941 struct htab_elem *old_elem)
0942 {
0943 u32 size = htab->map.value_size;
0944 bool prealloc = htab_is_prealloc(htab);
0945 struct htab_elem *l_new, **pl_new;
0946 void __percpu *pptr;
0947
0948 if (prealloc) {
0949 if (old_elem) {
0950
0951
0952
0953 pl_new = this_cpu_ptr(htab->extra_elems);
0954 l_new = *pl_new;
0955 htab_put_fd_value(htab, old_elem);
0956 *pl_new = old_elem;
0957 } else {
0958 struct pcpu_freelist_node *l;
0959
0960 l = __pcpu_freelist_pop(&htab->freelist);
0961 if (!l)
0962 return ERR_PTR(-E2BIG);
0963 l_new = container_of(l, struct htab_elem, fnode);
0964 }
0965 } else {
0966 if (atomic_inc_return(&htab->count) > htab->map.max_entries)
0967 if (!old_elem) {
0968
0969
0970
0971
0972
0973 l_new = ERR_PTR(-E2BIG);
0974 goto dec_count;
0975 }
0976 l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
0977 GFP_NOWAIT | __GFP_NOWARN,
0978 htab->map.numa_node);
0979 if (!l_new) {
0980 l_new = ERR_PTR(-ENOMEM);
0981 goto dec_count;
0982 }
0983 check_and_init_map_value(&htab->map,
0984 l_new->key + round_up(key_size, 8));
0985 }
0986
0987 memcpy(l_new->key, key, key_size);
0988 if (percpu) {
0989 size = round_up(size, 8);
0990 if (prealloc) {
0991 pptr = htab_elem_get_ptr(l_new, key_size);
0992 } else {
0993
0994 pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
0995 GFP_NOWAIT | __GFP_NOWARN);
0996 if (!pptr) {
0997 kfree(l_new);
0998 l_new = ERR_PTR(-ENOMEM);
0999 goto dec_count;
1000 }
1001 }
1002
1003 pcpu_init_value(htab, pptr, value, onallcpus);
1004
1005 if (!prealloc)
1006 htab_elem_set_ptr(l_new, key_size, pptr);
1007 } else if (fd_htab_map_needs_adjust(htab)) {
1008 size = round_up(size, 8);
1009 memcpy(l_new->key + round_up(key_size, 8), value, size);
1010 } else {
1011 copy_map_value(&htab->map,
1012 l_new->key + round_up(key_size, 8),
1013 value);
1014 }
1015
1016 l_new->hash = hash;
1017 return l_new;
1018 dec_count:
1019 atomic_dec(&htab->count);
1020 return l_new;
1021 }
1022
1023 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
1024 u64 map_flags)
1025 {
1026 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
1027
1028 return -EEXIST;
1029
1030 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
1031
1032 return -ENOENT;
1033
1034 return 0;
1035 }
1036
1037
1038 static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
1039 u64 map_flags)
1040 {
1041 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1042 struct htab_elem *l_new = NULL, *l_old;
1043 struct hlist_nulls_head *head;
1044 unsigned long flags;
1045 struct bucket *b;
1046 u32 key_size, hash;
1047 int ret;
1048
1049 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
1050
1051 return -EINVAL;
1052
1053 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1054 !rcu_read_lock_bh_held());
1055
1056 key_size = map->key_size;
1057
1058 hash = htab_map_hash(key, key_size, htab->hashrnd);
1059
1060 b = __select_bucket(htab, hash);
1061 head = &b->head;
1062
1063 if (unlikely(map_flags & BPF_F_LOCK)) {
1064 if (unlikely(!map_value_has_spin_lock(map)))
1065 return -EINVAL;
1066
1067 l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
1068 htab->n_buckets);
1069 ret = check_flags(htab, l_old, map_flags);
1070 if (ret)
1071 return ret;
1072 if (l_old) {
1073
1074 copy_map_value_locked(map,
1075 l_old->key + round_up(key_size, 8),
1076 value, false);
1077 return 0;
1078 }
1079
1080
1081
1082
1083 }
1084
1085 ret = htab_lock_bucket(htab, b, hash, &flags);
1086 if (ret)
1087 return ret;
1088
1089 l_old = lookup_elem_raw(head, hash, key, key_size);
1090
1091 ret = check_flags(htab, l_old, map_flags);
1092 if (ret)
1093 goto err;
1094
1095 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
1096
1097
1098
1099
1100
1101
1102 copy_map_value_locked(map,
1103 l_old->key + round_up(key_size, 8),
1104 value, false);
1105 ret = 0;
1106 goto err;
1107 }
1108
1109 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1110 l_old);
1111 if (IS_ERR(l_new)) {
1112
1113 ret = PTR_ERR(l_new);
1114 goto err;
1115 }
1116
1117
1118
1119
1120 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1121 if (l_old) {
1122 hlist_nulls_del_rcu(&l_old->hash_node);
1123 if (!htab_is_prealloc(htab))
1124 free_htab_elem(htab, l_old);
1125 else
1126 check_and_free_fields(htab, l_old);
1127 }
1128 ret = 0;
1129 err:
1130 htab_unlock_bucket(htab, b, hash, flags);
1131 return ret;
1132 }
1133
1134 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
1135 {
1136 check_and_free_fields(htab, elem);
1137 bpf_lru_push_free(&htab->lru, &elem->lru_node);
1138 }
1139
1140 static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
1141 u64 map_flags)
1142 {
1143 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1144 struct htab_elem *l_new, *l_old = NULL;
1145 struct hlist_nulls_head *head;
1146 unsigned long flags;
1147 struct bucket *b;
1148 u32 key_size, hash;
1149 int ret;
1150
1151 if (unlikely(map_flags > BPF_EXIST))
1152
1153 return -EINVAL;
1154
1155 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1156 !rcu_read_lock_bh_held());
1157
1158 key_size = map->key_size;
1159
1160 hash = htab_map_hash(key, key_size, htab->hashrnd);
1161
1162 b = __select_bucket(htab, hash);
1163 head = &b->head;
1164
1165
1166
1167
1168
1169
1170 l_new = prealloc_lru_pop(htab, key, hash);
1171 if (!l_new)
1172 return -ENOMEM;
1173 copy_map_value(&htab->map,
1174 l_new->key + round_up(map->key_size, 8), value);
1175
1176 ret = htab_lock_bucket(htab, b, hash, &flags);
1177 if (ret)
1178 return ret;
1179
1180 l_old = lookup_elem_raw(head, hash, key, key_size);
1181
1182 ret = check_flags(htab, l_old, map_flags);
1183 if (ret)
1184 goto err;
1185
1186
1187
1188
1189 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1190 if (l_old) {
1191 bpf_lru_node_set_ref(&l_new->lru_node);
1192 hlist_nulls_del_rcu(&l_old->hash_node);
1193 }
1194 ret = 0;
1195
1196 err:
1197 htab_unlock_bucket(htab, b, hash, flags);
1198
1199 if (ret)
1200 htab_lru_push_free(htab, l_new);
1201 else if (l_old)
1202 htab_lru_push_free(htab, l_old);
1203
1204 return ret;
1205 }
1206
1207 static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1208 void *value, u64 map_flags,
1209 bool onallcpus)
1210 {
1211 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1212 struct htab_elem *l_new = NULL, *l_old;
1213 struct hlist_nulls_head *head;
1214 unsigned long flags;
1215 struct bucket *b;
1216 u32 key_size, hash;
1217 int ret;
1218
1219 if (unlikely(map_flags > BPF_EXIST))
1220
1221 return -EINVAL;
1222
1223 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1224 !rcu_read_lock_bh_held());
1225
1226 key_size = map->key_size;
1227
1228 hash = htab_map_hash(key, key_size, htab->hashrnd);
1229
1230 b = __select_bucket(htab, hash);
1231 head = &b->head;
1232
1233 ret = htab_lock_bucket(htab, b, hash, &flags);
1234 if (ret)
1235 return ret;
1236
1237 l_old = lookup_elem_raw(head, hash, key, key_size);
1238
1239 ret = check_flags(htab, l_old, map_flags);
1240 if (ret)
1241 goto err;
1242
1243 if (l_old) {
1244
1245 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1246 value, onallcpus);
1247 } else {
1248 l_new = alloc_htab_elem(htab, key, value, key_size,
1249 hash, true, onallcpus, NULL);
1250 if (IS_ERR(l_new)) {
1251 ret = PTR_ERR(l_new);
1252 goto err;
1253 }
1254 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1255 }
1256 ret = 0;
1257 err:
1258 htab_unlock_bucket(htab, b, hash, flags);
1259 return ret;
1260 }
1261
1262 static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1263 void *value, u64 map_flags,
1264 bool onallcpus)
1265 {
1266 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1267 struct htab_elem *l_new = NULL, *l_old;
1268 struct hlist_nulls_head *head;
1269 unsigned long flags;
1270 struct bucket *b;
1271 u32 key_size, hash;
1272 int ret;
1273
1274 if (unlikely(map_flags > BPF_EXIST))
1275
1276 return -EINVAL;
1277
1278 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1279 !rcu_read_lock_bh_held());
1280
1281 key_size = map->key_size;
1282
1283 hash = htab_map_hash(key, key_size, htab->hashrnd);
1284
1285 b = __select_bucket(htab, hash);
1286 head = &b->head;
1287
1288
1289
1290
1291
1292
1293 if (map_flags != BPF_EXIST) {
1294 l_new = prealloc_lru_pop(htab, key, hash);
1295 if (!l_new)
1296 return -ENOMEM;
1297 }
1298
1299 ret = htab_lock_bucket(htab, b, hash, &flags);
1300 if (ret)
1301 return ret;
1302
1303 l_old = lookup_elem_raw(head, hash, key, key_size);
1304
1305 ret = check_flags(htab, l_old, map_flags);
1306 if (ret)
1307 goto err;
1308
1309 if (l_old) {
1310 bpf_lru_node_set_ref(&l_old->lru_node);
1311
1312
1313 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1314 value, onallcpus);
1315 } else {
1316 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1317 value, onallcpus);
1318 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1319 l_new = NULL;
1320 }
1321 ret = 0;
1322 err:
1323 htab_unlock_bucket(htab, b, hash, flags);
1324 if (l_new)
1325 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
1326 return ret;
1327 }
1328
1329 static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1330 void *value, u64 map_flags)
1331 {
1332 return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
1333 }
1334
1335 static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1336 void *value, u64 map_flags)
1337 {
1338 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
1339 false);
1340 }
1341
1342
1343 static int htab_map_delete_elem(struct bpf_map *map, void *key)
1344 {
1345 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1346 struct hlist_nulls_head *head;
1347 struct bucket *b;
1348 struct htab_elem *l;
1349 unsigned long flags;
1350 u32 hash, key_size;
1351 int ret;
1352
1353 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1354 !rcu_read_lock_bh_held());
1355
1356 key_size = map->key_size;
1357
1358 hash = htab_map_hash(key, key_size, htab->hashrnd);
1359 b = __select_bucket(htab, hash);
1360 head = &b->head;
1361
1362 ret = htab_lock_bucket(htab, b, hash, &flags);
1363 if (ret)
1364 return ret;
1365
1366 l = lookup_elem_raw(head, hash, key, key_size);
1367
1368 if (l) {
1369 hlist_nulls_del_rcu(&l->hash_node);
1370 free_htab_elem(htab, l);
1371 } else {
1372 ret = -ENOENT;
1373 }
1374
1375 htab_unlock_bucket(htab, b, hash, flags);
1376 return ret;
1377 }
1378
1379 static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
1380 {
1381 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1382 struct hlist_nulls_head *head;
1383 struct bucket *b;
1384 struct htab_elem *l;
1385 unsigned long flags;
1386 u32 hash, key_size;
1387 int ret;
1388
1389 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1390 !rcu_read_lock_bh_held());
1391
1392 key_size = map->key_size;
1393
1394 hash = htab_map_hash(key, key_size, htab->hashrnd);
1395 b = __select_bucket(htab, hash);
1396 head = &b->head;
1397
1398 ret = htab_lock_bucket(htab, b, hash, &flags);
1399 if (ret)
1400 return ret;
1401
1402 l = lookup_elem_raw(head, hash, key, key_size);
1403
1404 if (l)
1405 hlist_nulls_del_rcu(&l->hash_node);
1406 else
1407 ret = -ENOENT;
1408
1409 htab_unlock_bucket(htab, b, hash, flags);
1410 if (l)
1411 htab_lru_push_free(htab, l);
1412 return ret;
1413 }
1414
1415 static void delete_all_elements(struct bpf_htab *htab)
1416 {
1417 int i;
1418
1419 for (i = 0; i < htab->n_buckets; i++) {
1420 struct hlist_nulls_head *head = select_bucket(htab, i);
1421 struct hlist_nulls_node *n;
1422 struct htab_elem *l;
1423
1424 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1425 hlist_nulls_del_rcu(&l->hash_node);
1426 htab_elem_free(htab, l);
1427 }
1428 }
1429 }
1430
1431 static void htab_free_malloced_timers(struct bpf_htab *htab)
1432 {
1433 int i;
1434
1435 rcu_read_lock();
1436 for (i = 0; i < htab->n_buckets; i++) {
1437 struct hlist_nulls_head *head = select_bucket(htab, i);
1438 struct hlist_nulls_node *n;
1439 struct htab_elem *l;
1440
1441 hlist_nulls_for_each_entry(l, n, head, hash_node) {
1442
1443
1444
1445 bpf_timer_cancel_and_free(l->key +
1446 round_up(htab->map.key_size, 8) +
1447 htab->map.timer_off);
1448 }
1449 cond_resched_rcu();
1450 }
1451 rcu_read_unlock();
1452 }
1453
1454 static void htab_map_free_timers(struct bpf_map *map)
1455 {
1456 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1457
1458
1459 if (!map_value_has_timer(&htab->map))
1460 return;
1461 if (!htab_is_prealloc(htab))
1462 htab_free_malloced_timers(htab);
1463 else
1464 htab_free_prealloced_timers(htab);
1465 }
1466
1467
1468 static void htab_map_free(struct bpf_map *map)
1469 {
1470 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1471 int i;
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 rcu_barrier();
1482 if (!htab_is_prealloc(htab)) {
1483 delete_all_elements(htab);
1484 } else {
1485 htab_free_prealloced_kptrs(htab);
1486 prealloc_destroy(htab);
1487 }
1488
1489 bpf_map_free_kptr_off_tab(map);
1490 free_percpu(htab->extra_elems);
1491 bpf_map_area_free(htab->buckets);
1492 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
1493 free_percpu(htab->map_locked[i]);
1494 lockdep_unregister_key(&htab->lockdep_key);
1495 kfree(htab);
1496 }
1497
1498 static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1499 struct seq_file *m)
1500 {
1501 void *value;
1502
1503 rcu_read_lock();
1504
1505 value = htab_map_lookup_elem(map, key);
1506 if (!value) {
1507 rcu_read_unlock();
1508 return;
1509 }
1510
1511 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1512 seq_puts(m, ": ");
1513 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
1514 seq_puts(m, "\n");
1515
1516 rcu_read_unlock();
1517 }
1518
1519 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1520 void *value, bool is_lru_map,
1521 bool is_percpu, u64 flags)
1522 {
1523 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1524 struct hlist_nulls_head *head;
1525 unsigned long bflags;
1526 struct htab_elem *l;
1527 u32 hash, key_size;
1528 struct bucket *b;
1529 int ret;
1530
1531 key_size = map->key_size;
1532
1533 hash = htab_map_hash(key, key_size, htab->hashrnd);
1534 b = __select_bucket(htab, hash);
1535 head = &b->head;
1536
1537 ret = htab_lock_bucket(htab, b, hash, &bflags);
1538 if (ret)
1539 return ret;
1540
1541 l = lookup_elem_raw(head, hash, key, key_size);
1542 if (!l) {
1543 ret = -ENOENT;
1544 } else {
1545 if (is_percpu) {
1546 u32 roundup_value_size = round_up(map->value_size, 8);
1547 void __percpu *pptr;
1548 int off = 0, cpu;
1549
1550 pptr = htab_elem_get_ptr(l, key_size);
1551 for_each_possible_cpu(cpu) {
1552 bpf_long_memcpy(value + off,
1553 per_cpu_ptr(pptr, cpu),
1554 roundup_value_size);
1555 off += roundup_value_size;
1556 }
1557 } else {
1558 u32 roundup_key_size = round_up(map->key_size, 8);
1559
1560 if (flags & BPF_F_LOCK)
1561 copy_map_value_locked(map, value, l->key +
1562 roundup_key_size,
1563 true);
1564 else
1565 copy_map_value(map, value, l->key +
1566 roundup_key_size);
1567 check_and_init_map_value(map, value);
1568 }
1569
1570 hlist_nulls_del_rcu(&l->hash_node);
1571 if (!is_lru_map)
1572 free_htab_elem(htab, l);
1573 }
1574
1575 htab_unlock_bucket(htab, b, hash, bflags);
1576
1577 if (is_lru_map && l)
1578 htab_lru_push_free(htab, l);
1579
1580 return ret;
1581 }
1582
1583 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1584 void *value, u64 flags)
1585 {
1586 return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
1587 flags);
1588 }
1589
1590 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1591 void *key, void *value,
1592 u64 flags)
1593 {
1594 return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
1595 flags);
1596 }
1597
1598 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1599 void *value, u64 flags)
1600 {
1601 return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
1602 flags);
1603 }
1604
1605 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1606 void *key, void *value,
1607 u64 flags)
1608 {
1609 return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
1610 flags);
1611 }
1612
1613 static int
1614 __htab_map_lookup_and_delete_batch(struct bpf_map *map,
1615 const union bpf_attr *attr,
1616 union bpf_attr __user *uattr,
1617 bool do_delete, bool is_lru_map,
1618 bool is_percpu)
1619 {
1620 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1621 u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
1622 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
1623 void __user *uvalues = u64_to_user_ptr(attr->batch.values);
1624 void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
1625 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1626 u32 batch, max_count, size, bucket_size, map_id;
1627 struct htab_elem *node_to_free = NULL;
1628 u64 elem_map_flags, map_flags;
1629 struct hlist_nulls_head *head;
1630 struct hlist_nulls_node *n;
1631 unsigned long flags = 0;
1632 bool locked = false;
1633 struct htab_elem *l;
1634 struct bucket *b;
1635 int ret = 0;
1636
1637 elem_map_flags = attr->batch.elem_flags;
1638 if ((elem_map_flags & ~BPF_F_LOCK) ||
1639 ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
1640 return -EINVAL;
1641
1642 map_flags = attr->batch.flags;
1643 if (map_flags)
1644 return -EINVAL;
1645
1646 max_count = attr->batch.count;
1647 if (!max_count)
1648 return 0;
1649
1650 if (put_user(0, &uattr->batch.count))
1651 return -EFAULT;
1652
1653 batch = 0;
1654 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
1655 return -EFAULT;
1656
1657 if (batch >= htab->n_buckets)
1658 return -ENOENT;
1659
1660 key_size = htab->map.key_size;
1661 roundup_key_size = round_up(htab->map.key_size, 8);
1662 value_size = htab->map.value_size;
1663 size = round_up(value_size, 8);
1664 if (is_percpu)
1665 value_size = size * num_possible_cpus();
1666 total = 0;
1667
1668
1669
1670 bucket_size = 5;
1671
1672 alloc:
1673
1674
1675
1676 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN);
1677 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN);
1678 if (!keys || !values) {
1679 ret = -ENOMEM;
1680 goto after_loop;
1681 }
1682
1683 again:
1684 bpf_disable_instrumentation();
1685 rcu_read_lock();
1686 again_nocopy:
1687 dst_key = keys;
1688 dst_val = values;
1689 b = &htab->buckets[batch];
1690 head = &b->head;
1691
1692 if (locked) {
1693 ret = htab_lock_bucket(htab, b, batch, &flags);
1694 if (ret)
1695 goto next_batch;
1696 }
1697
1698 bucket_cnt = 0;
1699 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
1700 bucket_cnt++;
1701
1702 if (bucket_cnt && !locked) {
1703 locked = true;
1704 goto again_nocopy;
1705 }
1706
1707 if (bucket_cnt > (max_count - total)) {
1708 if (total == 0)
1709 ret = -ENOSPC;
1710
1711
1712
1713 htab_unlock_bucket(htab, b, batch, flags);
1714 rcu_read_unlock();
1715 bpf_enable_instrumentation();
1716 goto after_loop;
1717 }
1718
1719 if (bucket_cnt > bucket_size) {
1720 bucket_size = bucket_cnt;
1721
1722
1723
1724 htab_unlock_bucket(htab, b, batch, flags);
1725 rcu_read_unlock();
1726 bpf_enable_instrumentation();
1727 kvfree(keys);
1728 kvfree(values);
1729 goto alloc;
1730 }
1731
1732
1733 if (!locked)
1734 goto next_batch;
1735
1736 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1737 memcpy(dst_key, l->key, key_size);
1738
1739 if (is_percpu) {
1740 int off = 0, cpu;
1741 void __percpu *pptr;
1742
1743 pptr = htab_elem_get_ptr(l, map->key_size);
1744 for_each_possible_cpu(cpu) {
1745 bpf_long_memcpy(dst_val + off,
1746 per_cpu_ptr(pptr, cpu), size);
1747 off += size;
1748 }
1749 } else {
1750 value = l->key + roundup_key_size;
1751 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
1752 struct bpf_map **inner_map = value;
1753
1754
1755 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map);
1756 value = &map_id;
1757 }
1758
1759 if (elem_map_flags & BPF_F_LOCK)
1760 copy_map_value_locked(map, dst_val, value,
1761 true);
1762 else
1763 copy_map_value(map, dst_val, value);
1764 check_and_init_map_value(map, dst_val);
1765 }
1766 if (do_delete) {
1767 hlist_nulls_del_rcu(&l->hash_node);
1768
1769
1770
1771
1772
1773
1774 if (is_lru_map) {
1775 l->batch_flink = node_to_free;
1776 node_to_free = l;
1777 } else {
1778 free_htab_elem(htab, l);
1779 }
1780 }
1781 dst_key += key_size;
1782 dst_val += value_size;
1783 }
1784
1785 htab_unlock_bucket(htab, b, batch, flags);
1786 locked = false;
1787
1788 while (node_to_free) {
1789 l = node_to_free;
1790 node_to_free = node_to_free->batch_flink;
1791 htab_lru_push_free(htab, l);
1792 }
1793
1794 next_batch:
1795
1796
1797
1798 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
1799 batch++;
1800 goto again_nocopy;
1801 }
1802
1803 rcu_read_unlock();
1804 bpf_enable_instrumentation();
1805 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
1806 key_size * bucket_cnt) ||
1807 copy_to_user(uvalues + total * value_size, values,
1808 value_size * bucket_cnt))) {
1809 ret = -EFAULT;
1810 goto after_loop;
1811 }
1812
1813 total += bucket_cnt;
1814 batch++;
1815 if (batch >= htab->n_buckets) {
1816 ret = -ENOENT;
1817 goto after_loop;
1818 }
1819 goto again;
1820
1821 after_loop:
1822 if (ret == -EFAULT)
1823 goto out;
1824
1825
1826 ubatch = u64_to_user_ptr(attr->batch.out_batch);
1827 if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
1828 put_user(total, &uattr->batch.count))
1829 ret = -EFAULT;
1830
1831 out:
1832 kvfree(keys);
1833 kvfree(values);
1834 return ret;
1835 }
1836
1837 static int
1838 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1839 union bpf_attr __user *uattr)
1840 {
1841 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1842 false, true);
1843 }
1844
1845 static int
1846 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1847 const union bpf_attr *attr,
1848 union bpf_attr __user *uattr)
1849 {
1850 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1851 false, true);
1852 }
1853
1854 static int
1855 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1856 union bpf_attr __user *uattr)
1857 {
1858 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1859 false, false);
1860 }
1861
1862 static int
1863 htab_map_lookup_and_delete_batch(struct bpf_map *map,
1864 const union bpf_attr *attr,
1865 union bpf_attr __user *uattr)
1866 {
1867 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1868 false, false);
1869 }
1870
1871 static int
1872 htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
1873 const union bpf_attr *attr,
1874 union bpf_attr __user *uattr)
1875 {
1876 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1877 true, true);
1878 }
1879
1880 static int
1881 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1882 const union bpf_attr *attr,
1883 union bpf_attr __user *uattr)
1884 {
1885 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1886 true, true);
1887 }
1888
1889 static int
1890 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1891 union bpf_attr __user *uattr)
1892 {
1893 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1894 true, false);
1895 }
1896
1897 static int
1898 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
1899 const union bpf_attr *attr,
1900 union bpf_attr __user *uattr)
1901 {
1902 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1903 true, false);
1904 }
1905
1906 struct bpf_iter_seq_hash_map_info {
1907 struct bpf_map *map;
1908 struct bpf_htab *htab;
1909 void *percpu_value_buf;
1910 u32 bucket_id;
1911 u32 skip_elems;
1912 };
1913
1914 static struct htab_elem *
1915 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
1916 struct htab_elem *prev_elem)
1917 {
1918 const struct bpf_htab *htab = info->htab;
1919 u32 skip_elems = info->skip_elems;
1920 u32 bucket_id = info->bucket_id;
1921 struct hlist_nulls_head *head;
1922 struct hlist_nulls_node *n;
1923 struct htab_elem *elem;
1924 struct bucket *b;
1925 u32 i, count;
1926
1927 if (bucket_id >= htab->n_buckets)
1928 return NULL;
1929
1930
1931 if (prev_elem) {
1932
1933
1934
1935 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
1936 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
1937 if (elem)
1938 return elem;
1939
1940
1941 b = &htab->buckets[bucket_id++];
1942 rcu_read_unlock();
1943 skip_elems = 0;
1944 }
1945
1946 for (i = bucket_id; i < htab->n_buckets; i++) {
1947 b = &htab->buckets[i];
1948 rcu_read_lock();
1949
1950 count = 0;
1951 head = &b->head;
1952 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
1953 if (count >= skip_elems) {
1954 info->bucket_id = i;
1955 info->skip_elems = count;
1956 return elem;
1957 }
1958 count++;
1959 }
1960
1961 rcu_read_unlock();
1962 skip_elems = 0;
1963 }
1964
1965 info->bucket_id = i;
1966 info->skip_elems = 0;
1967 return NULL;
1968 }
1969
1970 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
1971 {
1972 struct bpf_iter_seq_hash_map_info *info = seq->private;
1973 struct htab_elem *elem;
1974
1975 elem = bpf_hash_map_seq_find_next(info, NULL);
1976 if (!elem)
1977 return NULL;
1978
1979 if (*pos == 0)
1980 ++*pos;
1981 return elem;
1982 }
1983
1984 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1985 {
1986 struct bpf_iter_seq_hash_map_info *info = seq->private;
1987
1988 ++*pos;
1989 ++info->skip_elems;
1990 return bpf_hash_map_seq_find_next(info, v);
1991 }
1992
1993 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
1994 {
1995 struct bpf_iter_seq_hash_map_info *info = seq->private;
1996 u32 roundup_key_size, roundup_value_size;
1997 struct bpf_iter__bpf_map_elem ctx = {};
1998 struct bpf_map *map = info->map;
1999 struct bpf_iter_meta meta;
2000 int ret = 0, off = 0, cpu;
2001 struct bpf_prog *prog;
2002 void __percpu *pptr;
2003
2004 meta.seq = seq;
2005 prog = bpf_iter_get_info(&meta, elem == NULL);
2006 if (prog) {
2007 ctx.meta = &meta;
2008 ctx.map = info->map;
2009 if (elem) {
2010 roundup_key_size = round_up(map->key_size, 8);
2011 ctx.key = elem->key;
2012 if (!info->percpu_value_buf) {
2013 ctx.value = elem->key + roundup_key_size;
2014 } else {
2015 roundup_value_size = round_up(map->value_size, 8);
2016 pptr = htab_elem_get_ptr(elem, map->key_size);
2017 for_each_possible_cpu(cpu) {
2018 bpf_long_memcpy(info->percpu_value_buf + off,
2019 per_cpu_ptr(pptr, cpu),
2020 roundup_value_size);
2021 off += roundup_value_size;
2022 }
2023 ctx.value = info->percpu_value_buf;
2024 }
2025 }
2026 ret = bpf_iter_run_prog(prog, &ctx);
2027 }
2028
2029 return ret;
2030 }
2031
2032 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
2033 {
2034 return __bpf_hash_map_seq_show(seq, v);
2035 }
2036
2037 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
2038 {
2039 if (!v)
2040 (void)__bpf_hash_map_seq_show(seq, NULL);
2041 else
2042 rcu_read_unlock();
2043 }
2044
2045 static int bpf_iter_init_hash_map(void *priv_data,
2046 struct bpf_iter_aux_info *aux)
2047 {
2048 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2049 struct bpf_map *map = aux->map;
2050 void *value_buf;
2051 u32 buf_size;
2052
2053 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
2054 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
2055 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
2056 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
2057 if (!value_buf)
2058 return -ENOMEM;
2059
2060 seq_info->percpu_value_buf = value_buf;
2061 }
2062
2063 bpf_map_inc_with_uref(map);
2064 seq_info->map = map;
2065 seq_info->htab = container_of(map, struct bpf_htab, map);
2066 return 0;
2067 }
2068
2069 static void bpf_iter_fini_hash_map(void *priv_data)
2070 {
2071 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2072
2073 bpf_map_put_with_uref(seq_info->map);
2074 kfree(seq_info->percpu_value_buf);
2075 }
2076
2077 static const struct seq_operations bpf_hash_map_seq_ops = {
2078 .start = bpf_hash_map_seq_start,
2079 .next = bpf_hash_map_seq_next,
2080 .stop = bpf_hash_map_seq_stop,
2081 .show = bpf_hash_map_seq_show,
2082 };
2083
2084 static const struct bpf_iter_seq_info iter_seq_info = {
2085 .seq_ops = &bpf_hash_map_seq_ops,
2086 .init_seq_private = bpf_iter_init_hash_map,
2087 .fini_seq_private = bpf_iter_fini_hash_map,
2088 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
2089 };
2090
2091 static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
2092 void *callback_ctx, u64 flags)
2093 {
2094 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2095 struct hlist_nulls_head *head;
2096 struct hlist_nulls_node *n;
2097 struct htab_elem *elem;
2098 u32 roundup_key_size;
2099 int i, num_elems = 0;
2100 void __percpu *pptr;
2101 struct bucket *b;
2102 void *key, *val;
2103 bool is_percpu;
2104 u64 ret = 0;
2105
2106 if (flags != 0)
2107 return -EINVAL;
2108
2109 is_percpu = htab_is_percpu(htab);
2110
2111 roundup_key_size = round_up(map->key_size, 8);
2112
2113
2114
2115 if (is_percpu)
2116 migrate_disable();
2117 for (i = 0; i < htab->n_buckets; i++) {
2118 b = &htab->buckets[i];
2119 rcu_read_lock();
2120 head = &b->head;
2121 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
2122 key = elem->key;
2123 if (is_percpu) {
2124
2125 pptr = htab_elem_get_ptr(elem, map->key_size);
2126 val = this_cpu_ptr(pptr);
2127 } else {
2128 val = elem->key + roundup_key_size;
2129 }
2130 num_elems++;
2131 ret = callback_fn((u64)(long)map, (u64)(long)key,
2132 (u64)(long)val, (u64)(long)callback_ctx, 0);
2133
2134 if (ret) {
2135 rcu_read_unlock();
2136 goto out;
2137 }
2138 }
2139 rcu_read_unlock();
2140 }
2141 out:
2142 if (is_percpu)
2143 migrate_enable();
2144 return num_elems;
2145 }
2146
2147 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab)
2148 const struct bpf_map_ops htab_map_ops = {
2149 .map_meta_equal = bpf_map_meta_equal,
2150 .map_alloc_check = htab_map_alloc_check,
2151 .map_alloc = htab_map_alloc,
2152 .map_free = htab_map_free,
2153 .map_get_next_key = htab_map_get_next_key,
2154 .map_release_uref = htab_map_free_timers,
2155 .map_lookup_elem = htab_map_lookup_elem,
2156 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
2157 .map_update_elem = htab_map_update_elem,
2158 .map_delete_elem = htab_map_delete_elem,
2159 .map_gen_lookup = htab_map_gen_lookup,
2160 .map_seq_show_elem = htab_map_seq_show_elem,
2161 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2162 .map_for_each_callback = bpf_for_each_hash_elem,
2163 BATCH_OPS(htab),
2164 .map_btf_id = &htab_map_btf_ids[0],
2165 .iter_seq_info = &iter_seq_info,
2166 };
2167
2168 const struct bpf_map_ops htab_lru_map_ops = {
2169 .map_meta_equal = bpf_map_meta_equal,
2170 .map_alloc_check = htab_map_alloc_check,
2171 .map_alloc = htab_map_alloc,
2172 .map_free = htab_map_free,
2173 .map_get_next_key = htab_map_get_next_key,
2174 .map_release_uref = htab_map_free_timers,
2175 .map_lookup_elem = htab_lru_map_lookup_elem,
2176 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
2177 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
2178 .map_update_elem = htab_lru_map_update_elem,
2179 .map_delete_elem = htab_lru_map_delete_elem,
2180 .map_gen_lookup = htab_lru_map_gen_lookup,
2181 .map_seq_show_elem = htab_map_seq_show_elem,
2182 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2183 .map_for_each_callback = bpf_for_each_hash_elem,
2184 BATCH_OPS(htab_lru),
2185 .map_btf_id = &htab_map_btf_ids[0],
2186 .iter_seq_info = &iter_seq_info,
2187 };
2188
2189
2190 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
2191 {
2192 struct htab_elem *l = __htab_map_lookup_elem(map, key);
2193
2194 if (l)
2195 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
2196 else
2197 return NULL;
2198 }
2199
2200 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
2201 {
2202 struct htab_elem *l;
2203
2204 if (cpu >= nr_cpu_ids)
2205 return NULL;
2206
2207 l = __htab_map_lookup_elem(map, key);
2208 if (l)
2209 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
2210 else
2211 return NULL;
2212 }
2213
2214 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
2215 {
2216 struct htab_elem *l = __htab_map_lookup_elem(map, key);
2217
2218 if (l) {
2219 bpf_lru_node_set_ref(&l->lru_node);
2220 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
2221 }
2222
2223 return NULL;
2224 }
2225
2226 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
2227 {
2228 struct htab_elem *l;
2229
2230 if (cpu >= nr_cpu_ids)
2231 return NULL;
2232
2233 l = __htab_map_lookup_elem(map, key);
2234 if (l) {
2235 bpf_lru_node_set_ref(&l->lru_node);
2236 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
2237 }
2238
2239 return NULL;
2240 }
2241
2242 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2243 {
2244 struct htab_elem *l;
2245 void __percpu *pptr;
2246 int ret = -ENOENT;
2247 int cpu, off = 0;
2248 u32 size;
2249
2250
2251
2252
2253
2254 size = round_up(map->value_size, 8);
2255 rcu_read_lock();
2256 l = __htab_map_lookup_elem(map, key);
2257 if (!l)
2258 goto out;
2259
2260
2261
2262 pptr = htab_elem_get_ptr(l, map->key_size);
2263 for_each_possible_cpu(cpu) {
2264 bpf_long_memcpy(value + off,
2265 per_cpu_ptr(pptr, cpu), size);
2266 off += size;
2267 }
2268 ret = 0;
2269 out:
2270 rcu_read_unlock();
2271 return ret;
2272 }
2273
2274 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
2275 u64 map_flags)
2276 {
2277 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2278 int ret;
2279
2280 rcu_read_lock();
2281 if (htab_is_lru(htab))
2282 ret = __htab_lru_percpu_map_update_elem(map, key, value,
2283 map_flags, true);
2284 else
2285 ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
2286 true);
2287 rcu_read_unlock();
2288
2289 return ret;
2290 }
2291
2292 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
2293 struct seq_file *m)
2294 {
2295 struct htab_elem *l;
2296 void __percpu *pptr;
2297 int cpu;
2298
2299 rcu_read_lock();
2300
2301 l = __htab_map_lookup_elem(map, key);
2302 if (!l) {
2303 rcu_read_unlock();
2304 return;
2305 }
2306
2307 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
2308 seq_puts(m, ": {\n");
2309 pptr = htab_elem_get_ptr(l, map->key_size);
2310 for_each_possible_cpu(cpu) {
2311 seq_printf(m, "\tcpu%d: ", cpu);
2312 btf_type_seq_show(map->btf, map->btf_value_type_id,
2313 per_cpu_ptr(pptr, cpu), m);
2314 seq_puts(m, "\n");
2315 }
2316 seq_puts(m, "}\n");
2317
2318 rcu_read_unlock();
2319 }
2320
2321 const struct bpf_map_ops htab_percpu_map_ops = {
2322 .map_meta_equal = bpf_map_meta_equal,
2323 .map_alloc_check = htab_map_alloc_check,
2324 .map_alloc = htab_map_alloc,
2325 .map_free = htab_map_free,
2326 .map_get_next_key = htab_map_get_next_key,
2327 .map_lookup_elem = htab_percpu_map_lookup_elem,
2328 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
2329 .map_update_elem = htab_percpu_map_update_elem,
2330 .map_delete_elem = htab_map_delete_elem,
2331 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem,
2332 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2333 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2334 .map_for_each_callback = bpf_for_each_hash_elem,
2335 BATCH_OPS(htab_percpu),
2336 .map_btf_id = &htab_map_btf_ids[0],
2337 .iter_seq_info = &iter_seq_info,
2338 };
2339
2340 const struct bpf_map_ops htab_lru_percpu_map_ops = {
2341 .map_meta_equal = bpf_map_meta_equal,
2342 .map_alloc_check = htab_map_alloc_check,
2343 .map_alloc = htab_map_alloc,
2344 .map_free = htab_map_free,
2345 .map_get_next_key = htab_map_get_next_key,
2346 .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
2347 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
2348 .map_update_elem = htab_lru_percpu_map_update_elem,
2349 .map_delete_elem = htab_lru_map_delete_elem,
2350 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem,
2351 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2352 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2353 .map_for_each_callback = bpf_for_each_hash_elem,
2354 BATCH_OPS(htab_lru_percpu),
2355 .map_btf_id = &htab_map_btf_ids[0],
2356 .iter_seq_info = &iter_seq_info,
2357 };
2358
2359 static int fd_htab_map_alloc_check(union bpf_attr *attr)
2360 {
2361 if (attr->value_size != sizeof(u32))
2362 return -EINVAL;
2363 return htab_map_alloc_check(attr);
2364 }
2365
2366 static void fd_htab_map_free(struct bpf_map *map)
2367 {
2368 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2369 struct hlist_nulls_node *n;
2370 struct hlist_nulls_head *head;
2371 struct htab_elem *l;
2372 int i;
2373
2374 for (i = 0; i < htab->n_buckets; i++) {
2375 head = select_bucket(htab, i);
2376
2377 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
2378 void *ptr = fd_htab_map_get_ptr(map, l);
2379
2380 map->ops->map_fd_put_ptr(ptr);
2381 }
2382 }
2383
2384 htab_map_free(map);
2385 }
2386
2387
2388 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
2389 {
2390 void **ptr;
2391 int ret = 0;
2392
2393 if (!map->ops->map_fd_sys_lookup_elem)
2394 return -ENOTSUPP;
2395
2396 rcu_read_lock();
2397 ptr = htab_map_lookup_elem(map, key);
2398 if (ptr)
2399 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
2400 else
2401 ret = -ENOENT;
2402 rcu_read_unlock();
2403
2404 return ret;
2405 }
2406
2407
2408 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
2409 void *key, void *value, u64 map_flags)
2410 {
2411 void *ptr;
2412 int ret;
2413 u32 ufd = *(u32 *)value;
2414
2415 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
2416 if (IS_ERR(ptr))
2417 return PTR_ERR(ptr);
2418
2419 ret = htab_map_update_elem(map, key, &ptr, map_flags);
2420 if (ret)
2421 map->ops->map_fd_put_ptr(ptr);
2422
2423 return ret;
2424 }
2425
2426 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
2427 {
2428 struct bpf_map *map, *inner_map_meta;
2429
2430 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
2431 if (IS_ERR(inner_map_meta))
2432 return inner_map_meta;
2433
2434 map = htab_map_alloc(attr);
2435 if (IS_ERR(map)) {
2436 bpf_map_meta_free(inner_map_meta);
2437 return map;
2438 }
2439
2440 map->inner_map_meta = inner_map_meta;
2441
2442 return map;
2443 }
2444
2445 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
2446 {
2447 struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
2448
2449 if (!inner_map)
2450 return NULL;
2451
2452 return READ_ONCE(*inner_map);
2453 }
2454
2455 static int htab_of_map_gen_lookup(struct bpf_map *map,
2456 struct bpf_insn *insn_buf)
2457 {
2458 struct bpf_insn *insn = insn_buf;
2459 const int ret = BPF_REG_0;
2460
2461 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
2462 (void *(*)(struct bpf_map *map, void *key))NULL));
2463 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
2464 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
2465 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
2466 offsetof(struct htab_elem, key) +
2467 round_up(map->key_size, 8));
2468 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
2469
2470 return insn - insn_buf;
2471 }
2472
2473 static void htab_of_map_free(struct bpf_map *map)
2474 {
2475 bpf_map_meta_free(map->inner_map_meta);
2476 fd_htab_map_free(map);
2477 }
2478
2479 const struct bpf_map_ops htab_of_maps_map_ops = {
2480 .map_alloc_check = fd_htab_map_alloc_check,
2481 .map_alloc = htab_of_map_alloc,
2482 .map_free = htab_of_map_free,
2483 .map_get_next_key = htab_map_get_next_key,
2484 .map_lookup_elem = htab_of_map_lookup_elem,
2485 .map_delete_elem = htab_map_delete_elem,
2486 .map_fd_get_ptr = bpf_map_fd_get_ptr,
2487 .map_fd_put_ptr = bpf_map_fd_put_ptr,
2488 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
2489 .map_gen_lookup = htab_of_map_gen_lookup,
2490 .map_check_btf = map_check_no_btf,
2491 BATCH_OPS(htab),
2492 .map_btf_id = &htab_map_btf_ids[0],
2493 };