0001
0002
0003
0004
0005 #include <linux/bpf.h>
0006 #include <linux/btf.h>
0007 #include <linux/err.h>
0008 #include <linux/slab.h>
0009 #include <linux/mm.h>
0010 #include <linux/filter.h>
0011 #include <linux/perf_event.h>
0012 #include <uapi/linux/btf.h>
0013 #include <linux/rcupdate_trace.h>
0014 #include <linux/btf_ids.h>
0015
0016 #include "map_in_map.h"
0017
0018 #define ARRAY_CREATE_FLAG_MASK \
0019 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
0020 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
0021
0022 static void bpf_array_free_percpu(struct bpf_array *array)
0023 {
0024 int i;
0025
0026 for (i = 0; i < array->map.max_entries; i++) {
0027 free_percpu(array->pptrs[i]);
0028 cond_resched();
0029 }
0030 }
0031
0032 static int bpf_array_alloc_percpu(struct bpf_array *array)
0033 {
0034 void __percpu *ptr;
0035 int i;
0036
0037 for (i = 0; i < array->map.max_entries; i++) {
0038 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
0039 GFP_USER | __GFP_NOWARN);
0040 if (!ptr) {
0041 bpf_array_free_percpu(array);
0042 return -ENOMEM;
0043 }
0044 array->pptrs[i] = ptr;
0045 cond_resched();
0046 }
0047
0048 return 0;
0049 }
0050
0051
0052 int array_map_alloc_check(union bpf_attr *attr)
0053 {
0054 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
0055 int numa_node = bpf_map_attr_numa_node(attr);
0056
0057
0058 if (attr->max_entries == 0 || attr->key_size != 4 ||
0059 attr->value_size == 0 ||
0060 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
0061 !bpf_map_flags_access_ok(attr->map_flags) ||
0062 (percpu && numa_node != NUMA_NO_NODE))
0063 return -EINVAL;
0064
0065 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
0066 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
0067 return -EINVAL;
0068
0069 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
0070 attr->map_flags & BPF_F_PRESERVE_ELEMS)
0071 return -EINVAL;
0072
0073
0074 if (attr->value_size > INT_MAX)
0075 return -E2BIG;
0076
0077 return 0;
0078 }
0079
0080 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
0081 {
0082 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
0083 int numa_node = bpf_map_attr_numa_node(attr);
0084 u32 elem_size, index_mask, max_entries;
0085 bool bypass_spec_v1 = bpf_bypass_spec_v1();
0086 u64 array_size, mask64;
0087 struct bpf_array *array;
0088
0089 elem_size = round_up(attr->value_size, 8);
0090
0091 max_entries = attr->max_entries;
0092
0093
0094
0095
0096
0097 mask64 = fls_long(max_entries - 1);
0098 mask64 = 1ULL << mask64;
0099 mask64 -= 1;
0100
0101 index_mask = mask64;
0102 if (!bypass_spec_v1) {
0103
0104
0105
0106 max_entries = index_mask + 1;
0107
0108 if (max_entries < attr->max_entries)
0109 return ERR_PTR(-E2BIG);
0110 }
0111
0112 array_size = sizeof(*array);
0113 if (percpu) {
0114 array_size += (u64) max_entries * sizeof(void *);
0115 } else {
0116
0117
0118
0119 if (attr->map_flags & BPF_F_MMAPABLE) {
0120 array_size = PAGE_ALIGN(array_size);
0121 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
0122 } else {
0123 array_size += (u64) max_entries * elem_size;
0124 }
0125 }
0126
0127
0128 if (attr->map_flags & BPF_F_MMAPABLE) {
0129 void *data;
0130
0131
0132 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
0133 if (!data)
0134 return ERR_PTR(-ENOMEM);
0135 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
0136 - offsetof(struct bpf_array, value);
0137 } else {
0138 array = bpf_map_area_alloc(array_size, numa_node);
0139 }
0140 if (!array)
0141 return ERR_PTR(-ENOMEM);
0142 array->index_mask = index_mask;
0143 array->map.bypass_spec_v1 = bypass_spec_v1;
0144
0145
0146 bpf_map_init_from_attr(&array->map, attr);
0147 array->elem_size = elem_size;
0148
0149 if (percpu && bpf_array_alloc_percpu(array)) {
0150 bpf_map_area_free(array);
0151 return ERR_PTR(-ENOMEM);
0152 }
0153
0154 return &array->map;
0155 }
0156
0157 static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
0158 {
0159 return array->value + (u64)array->elem_size * index;
0160 }
0161
0162
0163 static void *array_map_lookup_elem(struct bpf_map *map, void *key)
0164 {
0165 struct bpf_array *array = container_of(map, struct bpf_array, map);
0166 u32 index = *(u32 *)key;
0167
0168 if (unlikely(index >= array->map.max_entries))
0169 return NULL;
0170
0171 return array->value + (u64)array->elem_size * (index & array->index_mask);
0172 }
0173
0174 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
0175 u32 off)
0176 {
0177 struct bpf_array *array = container_of(map, struct bpf_array, map);
0178
0179 if (map->max_entries != 1)
0180 return -ENOTSUPP;
0181 if (off >= map->value_size)
0182 return -EINVAL;
0183
0184 *imm = (unsigned long)array->value;
0185 return 0;
0186 }
0187
0188 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
0189 u32 *off)
0190 {
0191 struct bpf_array *array = container_of(map, struct bpf_array, map);
0192 u64 base = (unsigned long)array->value;
0193 u64 range = array->elem_size;
0194
0195 if (map->max_entries != 1)
0196 return -ENOTSUPP;
0197 if (imm < base || imm >= base + range)
0198 return -ENOENT;
0199
0200 *off = imm - base;
0201 return 0;
0202 }
0203
0204
0205 static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
0206 {
0207 struct bpf_array *array = container_of(map, struct bpf_array, map);
0208 struct bpf_insn *insn = insn_buf;
0209 u32 elem_size = array->elem_size;
0210 const int ret = BPF_REG_0;
0211 const int map_ptr = BPF_REG_1;
0212 const int index = BPF_REG_2;
0213
0214 if (map->map_flags & BPF_F_INNER_MAP)
0215 return -EOPNOTSUPP;
0216
0217 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
0218 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
0219 if (!map->bypass_spec_v1) {
0220 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
0221 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
0222 } else {
0223 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
0224 }
0225
0226 if (is_power_of_2(elem_size)) {
0227 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
0228 } else {
0229 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
0230 }
0231 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
0232 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
0233 *insn++ = BPF_MOV64_IMM(ret, 0);
0234 return insn - insn_buf;
0235 }
0236
0237
0238 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
0239 {
0240 struct bpf_array *array = container_of(map, struct bpf_array, map);
0241 u32 index = *(u32 *)key;
0242
0243 if (unlikely(index >= array->map.max_entries))
0244 return NULL;
0245
0246 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
0247 }
0248
0249 static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
0250 {
0251 struct bpf_array *array = container_of(map, struct bpf_array, map);
0252 u32 index = *(u32 *)key;
0253
0254 if (cpu >= nr_cpu_ids)
0255 return NULL;
0256
0257 if (unlikely(index >= array->map.max_entries))
0258 return NULL;
0259
0260 return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
0261 }
0262
0263 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
0264 {
0265 struct bpf_array *array = container_of(map, struct bpf_array, map);
0266 u32 index = *(u32 *)key;
0267 void __percpu *pptr;
0268 int cpu, off = 0;
0269 u32 size;
0270
0271 if (unlikely(index >= array->map.max_entries))
0272 return -ENOENT;
0273
0274
0275
0276
0277
0278 size = array->elem_size;
0279 rcu_read_lock();
0280 pptr = array->pptrs[index & array->index_mask];
0281 for_each_possible_cpu(cpu) {
0282 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
0283 off += size;
0284 }
0285 rcu_read_unlock();
0286 return 0;
0287 }
0288
0289
0290 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
0291 {
0292 struct bpf_array *array = container_of(map, struct bpf_array, map);
0293 u32 index = key ? *(u32 *)key : U32_MAX;
0294 u32 *next = (u32 *)next_key;
0295
0296 if (index >= array->map.max_entries) {
0297 *next = 0;
0298 return 0;
0299 }
0300
0301 if (index == array->map.max_entries - 1)
0302 return -ENOENT;
0303
0304 *next = index + 1;
0305 return 0;
0306 }
0307
0308 static void check_and_free_fields(struct bpf_array *arr, void *val)
0309 {
0310 if (map_value_has_timer(&arr->map))
0311 bpf_timer_cancel_and_free(val + arr->map.timer_off);
0312 if (map_value_has_kptrs(&arr->map))
0313 bpf_map_free_kptrs(&arr->map, val);
0314 }
0315
0316
0317 static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
0318 u64 map_flags)
0319 {
0320 struct bpf_array *array = container_of(map, struct bpf_array, map);
0321 u32 index = *(u32 *)key;
0322 char *val;
0323
0324 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
0325
0326 return -EINVAL;
0327
0328 if (unlikely(index >= array->map.max_entries))
0329
0330 return -E2BIG;
0331
0332 if (unlikely(map_flags & BPF_NOEXIST))
0333
0334 return -EEXIST;
0335
0336 if (unlikely((map_flags & BPF_F_LOCK) &&
0337 !map_value_has_spin_lock(map)))
0338 return -EINVAL;
0339
0340 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
0341 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
0342 value, map->value_size);
0343 } else {
0344 val = array->value +
0345 (u64)array->elem_size * (index & array->index_mask);
0346 if (map_flags & BPF_F_LOCK)
0347 copy_map_value_locked(map, val, value, false);
0348 else
0349 copy_map_value(map, val, value);
0350 check_and_free_fields(array, val);
0351 }
0352 return 0;
0353 }
0354
0355 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
0356 u64 map_flags)
0357 {
0358 struct bpf_array *array = container_of(map, struct bpf_array, map);
0359 u32 index = *(u32 *)key;
0360 void __percpu *pptr;
0361 int cpu, off = 0;
0362 u32 size;
0363
0364 if (unlikely(map_flags > BPF_EXIST))
0365
0366 return -EINVAL;
0367
0368 if (unlikely(index >= array->map.max_entries))
0369
0370 return -E2BIG;
0371
0372 if (unlikely(map_flags == BPF_NOEXIST))
0373
0374 return -EEXIST;
0375
0376
0377
0378
0379
0380
0381
0382 size = array->elem_size;
0383 rcu_read_lock();
0384 pptr = array->pptrs[index & array->index_mask];
0385 for_each_possible_cpu(cpu) {
0386 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
0387 off += size;
0388 }
0389 rcu_read_unlock();
0390 return 0;
0391 }
0392
0393
0394 static int array_map_delete_elem(struct bpf_map *map, void *key)
0395 {
0396 return -EINVAL;
0397 }
0398
0399 static void *array_map_vmalloc_addr(struct bpf_array *array)
0400 {
0401 return (void *)round_down((unsigned long)array, PAGE_SIZE);
0402 }
0403
0404 static void array_map_free_timers(struct bpf_map *map)
0405 {
0406 struct bpf_array *array = container_of(map, struct bpf_array, map);
0407 int i;
0408
0409
0410 if (!map_value_has_timer(map))
0411 return;
0412
0413 for (i = 0; i < array->map.max_entries; i++)
0414 bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
0415 }
0416
0417
0418 static void array_map_free(struct bpf_map *map)
0419 {
0420 struct bpf_array *array = container_of(map, struct bpf_array, map);
0421 int i;
0422
0423 if (map_value_has_kptrs(map)) {
0424 for (i = 0; i < array->map.max_entries; i++)
0425 bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
0426 bpf_map_free_kptr_off_tab(map);
0427 }
0428
0429 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
0430 bpf_array_free_percpu(array);
0431
0432 if (array->map.map_flags & BPF_F_MMAPABLE)
0433 bpf_map_area_free(array_map_vmalloc_addr(array));
0434 else
0435 bpf_map_area_free(array);
0436 }
0437
0438 static void array_map_seq_show_elem(struct bpf_map *map, void *key,
0439 struct seq_file *m)
0440 {
0441 void *value;
0442
0443 rcu_read_lock();
0444
0445 value = array_map_lookup_elem(map, key);
0446 if (!value) {
0447 rcu_read_unlock();
0448 return;
0449 }
0450
0451 if (map->btf_key_type_id)
0452 seq_printf(m, "%u: ", *(u32 *)key);
0453 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
0454 seq_puts(m, "\n");
0455
0456 rcu_read_unlock();
0457 }
0458
0459 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
0460 struct seq_file *m)
0461 {
0462 struct bpf_array *array = container_of(map, struct bpf_array, map);
0463 u32 index = *(u32 *)key;
0464 void __percpu *pptr;
0465 int cpu;
0466
0467 rcu_read_lock();
0468
0469 seq_printf(m, "%u: {\n", *(u32 *)key);
0470 pptr = array->pptrs[index & array->index_mask];
0471 for_each_possible_cpu(cpu) {
0472 seq_printf(m, "\tcpu%d: ", cpu);
0473 btf_type_seq_show(map->btf, map->btf_value_type_id,
0474 per_cpu_ptr(pptr, cpu), m);
0475 seq_puts(m, "\n");
0476 }
0477 seq_puts(m, "}\n");
0478
0479 rcu_read_unlock();
0480 }
0481
0482 static int array_map_check_btf(const struct bpf_map *map,
0483 const struct btf *btf,
0484 const struct btf_type *key_type,
0485 const struct btf_type *value_type)
0486 {
0487 u32 int_data;
0488
0489
0490 if (btf_type_is_void(key_type)) {
0491 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
0492 map->max_entries != 1)
0493 return -EINVAL;
0494
0495 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
0496 return -EINVAL;
0497
0498 return 0;
0499 }
0500
0501 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
0502 return -EINVAL;
0503
0504 int_data = *(u32 *)(key_type + 1);
0505
0506
0507
0508 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
0509 return -EINVAL;
0510
0511 return 0;
0512 }
0513
0514 static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
0515 {
0516 struct bpf_array *array = container_of(map, struct bpf_array, map);
0517 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
0518
0519 if (!(map->map_flags & BPF_F_MMAPABLE))
0520 return -EINVAL;
0521
0522 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
0523 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
0524 return -EINVAL;
0525
0526 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
0527 vma->vm_pgoff + pgoff);
0528 }
0529
0530 static bool array_map_meta_equal(const struct bpf_map *meta0,
0531 const struct bpf_map *meta1)
0532 {
0533 if (!bpf_map_meta_equal(meta0, meta1))
0534 return false;
0535 return meta0->map_flags & BPF_F_INNER_MAP ? true :
0536 meta0->max_entries == meta1->max_entries;
0537 }
0538
0539 struct bpf_iter_seq_array_map_info {
0540 struct bpf_map *map;
0541 void *percpu_value_buf;
0542 u32 index;
0543 };
0544
0545 static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
0546 {
0547 struct bpf_iter_seq_array_map_info *info = seq->private;
0548 struct bpf_map *map = info->map;
0549 struct bpf_array *array;
0550 u32 index;
0551
0552 if (info->index >= map->max_entries)
0553 return NULL;
0554
0555 if (*pos == 0)
0556 ++*pos;
0557 array = container_of(map, struct bpf_array, map);
0558 index = info->index & array->index_mask;
0559 if (info->percpu_value_buf)
0560 return array->pptrs[index];
0561 return array_map_elem_ptr(array, index);
0562 }
0563
0564 static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0565 {
0566 struct bpf_iter_seq_array_map_info *info = seq->private;
0567 struct bpf_map *map = info->map;
0568 struct bpf_array *array;
0569 u32 index;
0570
0571 ++*pos;
0572 ++info->index;
0573 if (info->index >= map->max_entries)
0574 return NULL;
0575
0576 array = container_of(map, struct bpf_array, map);
0577 index = info->index & array->index_mask;
0578 if (info->percpu_value_buf)
0579 return array->pptrs[index];
0580 return array_map_elem_ptr(array, index);
0581 }
0582
0583 static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
0584 {
0585 struct bpf_iter_seq_array_map_info *info = seq->private;
0586 struct bpf_iter__bpf_map_elem ctx = {};
0587 struct bpf_map *map = info->map;
0588 struct bpf_array *array = container_of(map, struct bpf_array, map);
0589 struct bpf_iter_meta meta;
0590 struct bpf_prog *prog;
0591 int off = 0, cpu = 0;
0592 void __percpu **pptr;
0593 u32 size;
0594
0595 meta.seq = seq;
0596 prog = bpf_iter_get_info(&meta, v == NULL);
0597 if (!prog)
0598 return 0;
0599
0600 ctx.meta = &meta;
0601 ctx.map = info->map;
0602 if (v) {
0603 ctx.key = &info->index;
0604
0605 if (!info->percpu_value_buf) {
0606 ctx.value = v;
0607 } else {
0608 pptr = v;
0609 size = array->elem_size;
0610 for_each_possible_cpu(cpu) {
0611 bpf_long_memcpy(info->percpu_value_buf + off,
0612 per_cpu_ptr(pptr, cpu),
0613 size);
0614 off += size;
0615 }
0616 ctx.value = info->percpu_value_buf;
0617 }
0618 }
0619
0620 return bpf_iter_run_prog(prog, &ctx);
0621 }
0622
0623 static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
0624 {
0625 return __bpf_array_map_seq_show(seq, v);
0626 }
0627
0628 static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
0629 {
0630 if (!v)
0631 (void)__bpf_array_map_seq_show(seq, NULL);
0632 }
0633
0634 static int bpf_iter_init_array_map(void *priv_data,
0635 struct bpf_iter_aux_info *aux)
0636 {
0637 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
0638 struct bpf_map *map = aux->map;
0639 struct bpf_array *array = container_of(map, struct bpf_array, map);
0640 void *value_buf;
0641 u32 buf_size;
0642
0643 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
0644 buf_size = array->elem_size * num_possible_cpus();
0645 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
0646 if (!value_buf)
0647 return -ENOMEM;
0648
0649 seq_info->percpu_value_buf = value_buf;
0650 }
0651
0652
0653
0654
0655
0656 bpf_map_inc_with_uref(map);
0657 seq_info->map = map;
0658 return 0;
0659 }
0660
0661 static void bpf_iter_fini_array_map(void *priv_data)
0662 {
0663 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
0664
0665 bpf_map_put_with_uref(seq_info->map);
0666 kfree(seq_info->percpu_value_buf);
0667 }
0668
0669 static const struct seq_operations bpf_array_map_seq_ops = {
0670 .start = bpf_array_map_seq_start,
0671 .next = bpf_array_map_seq_next,
0672 .stop = bpf_array_map_seq_stop,
0673 .show = bpf_array_map_seq_show,
0674 };
0675
0676 static const struct bpf_iter_seq_info iter_seq_info = {
0677 .seq_ops = &bpf_array_map_seq_ops,
0678 .init_seq_private = bpf_iter_init_array_map,
0679 .fini_seq_private = bpf_iter_fini_array_map,
0680 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
0681 };
0682
0683 static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
0684 void *callback_ctx, u64 flags)
0685 {
0686 u32 i, key, num_elems = 0;
0687 struct bpf_array *array;
0688 bool is_percpu;
0689 u64 ret = 0;
0690 void *val;
0691
0692 if (flags != 0)
0693 return -EINVAL;
0694
0695 is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
0696 array = container_of(map, struct bpf_array, map);
0697 if (is_percpu)
0698 migrate_disable();
0699 for (i = 0; i < map->max_entries; i++) {
0700 if (is_percpu)
0701 val = this_cpu_ptr(array->pptrs[i]);
0702 else
0703 val = array_map_elem_ptr(array, i);
0704 num_elems++;
0705 key = i;
0706 ret = callback_fn((u64)(long)map, (u64)(long)&key,
0707 (u64)(long)val, (u64)(long)callback_ctx, 0);
0708
0709 if (ret)
0710 break;
0711 }
0712
0713 if (is_percpu)
0714 migrate_enable();
0715 return num_elems;
0716 }
0717
0718 BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
0719 const struct bpf_map_ops array_map_ops = {
0720 .map_meta_equal = array_map_meta_equal,
0721 .map_alloc_check = array_map_alloc_check,
0722 .map_alloc = array_map_alloc,
0723 .map_free = array_map_free,
0724 .map_get_next_key = array_map_get_next_key,
0725 .map_release_uref = array_map_free_timers,
0726 .map_lookup_elem = array_map_lookup_elem,
0727 .map_update_elem = array_map_update_elem,
0728 .map_delete_elem = array_map_delete_elem,
0729 .map_gen_lookup = array_map_gen_lookup,
0730 .map_direct_value_addr = array_map_direct_value_addr,
0731 .map_direct_value_meta = array_map_direct_value_meta,
0732 .map_mmap = array_map_mmap,
0733 .map_seq_show_elem = array_map_seq_show_elem,
0734 .map_check_btf = array_map_check_btf,
0735 .map_lookup_batch = generic_map_lookup_batch,
0736 .map_update_batch = generic_map_update_batch,
0737 .map_set_for_each_callback_args = map_set_for_each_callback_args,
0738 .map_for_each_callback = bpf_for_each_array_elem,
0739 .map_btf_id = &array_map_btf_ids[0],
0740 .iter_seq_info = &iter_seq_info,
0741 };
0742
0743 const struct bpf_map_ops percpu_array_map_ops = {
0744 .map_meta_equal = bpf_map_meta_equal,
0745 .map_alloc_check = array_map_alloc_check,
0746 .map_alloc = array_map_alloc,
0747 .map_free = array_map_free,
0748 .map_get_next_key = array_map_get_next_key,
0749 .map_lookup_elem = percpu_array_map_lookup_elem,
0750 .map_update_elem = array_map_update_elem,
0751 .map_delete_elem = array_map_delete_elem,
0752 .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
0753 .map_seq_show_elem = percpu_array_map_seq_show_elem,
0754 .map_check_btf = array_map_check_btf,
0755 .map_lookup_batch = generic_map_lookup_batch,
0756 .map_update_batch = generic_map_update_batch,
0757 .map_set_for_each_callback_args = map_set_for_each_callback_args,
0758 .map_for_each_callback = bpf_for_each_array_elem,
0759 .map_btf_id = &array_map_btf_ids[0],
0760 .iter_seq_info = &iter_seq_info,
0761 };
0762
0763 static int fd_array_map_alloc_check(union bpf_attr *attr)
0764 {
0765
0766 if (attr->value_size != sizeof(u32))
0767 return -EINVAL;
0768
0769 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
0770 return -EINVAL;
0771 return array_map_alloc_check(attr);
0772 }
0773
0774 static void fd_array_map_free(struct bpf_map *map)
0775 {
0776 struct bpf_array *array = container_of(map, struct bpf_array, map);
0777 int i;
0778
0779
0780 for (i = 0; i < array->map.max_entries; i++)
0781 BUG_ON(array->ptrs[i] != NULL);
0782
0783 bpf_map_area_free(array);
0784 }
0785
0786 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
0787 {
0788 return ERR_PTR(-EOPNOTSUPP);
0789 }
0790
0791
0792 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
0793 {
0794 void **elem, *ptr;
0795 int ret = 0;
0796
0797 if (!map->ops->map_fd_sys_lookup_elem)
0798 return -ENOTSUPP;
0799
0800 rcu_read_lock();
0801 elem = array_map_lookup_elem(map, key);
0802 if (elem && (ptr = READ_ONCE(*elem)))
0803 *value = map->ops->map_fd_sys_lookup_elem(ptr);
0804 else
0805 ret = -ENOENT;
0806 rcu_read_unlock();
0807
0808 return ret;
0809 }
0810
0811
0812 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
0813 void *key, void *value, u64 map_flags)
0814 {
0815 struct bpf_array *array = container_of(map, struct bpf_array, map);
0816 void *new_ptr, *old_ptr;
0817 u32 index = *(u32 *)key, ufd;
0818
0819 if (map_flags != BPF_ANY)
0820 return -EINVAL;
0821
0822 if (index >= array->map.max_entries)
0823 return -E2BIG;
0824
0825 ufd = *(u32 *)value;
0826 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
0827 if (IS_ERR(new_ptr))
0828 return PTR_ERR(new_ptr);
0829
0830 if (map->ops->map_poke_run) {
0831 mutex_lock(&array->aux->poke_mutex);
0832 old_ptr = xchg(array->ptrs + index, new_ptr);
0833 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
0834 mutex_unlock(&array->aux->poke_mutex);
0835 } else {
0836 old_ptr = xchg(array->ptrs + index, new_ptr);
0837 }
0838
0839 if (old_ptr)
0840 map->ops->map_fd_put_ptr(old_ptr);
0841 return 0;
0842 }
0843
0844 static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
0845 {
0846 struct bpf_array *array = container_of(map, struct bpf_array, map);
0847 void *old_ptr;
0848 u32 index = *(u32 *)key;
0849
0850 if (index >= array->map.max_entries)
0851 return -E2BIG;
0852
0853 if (map->ops->map_poke_run) {
0854 mutex_lock(&array->aux->poke_mutex);
0855 old_ptr = xchg(array->ptrs + index, NULL);
0856 map->ops->map_poke_run(map, index, old_ptr, NULL);
0857 mutex_unlock(&array->aux->poke_mutex);
0858 } else {
0859 old_ptr = xchg(array->ptrs + index, NULL);
0860 }
0861
0862 if (old_ptr) {
0863 map->ops->map_fd_put_ptr(old_ptr);
0864 return 0;
0865 } else {
0866 return -ENOENT;
0867 }
0868 }
0869
0870 static void *prog_fd_array_get_ptr(struct bpf_map *map,
0871 struct file *map_file, int fd)
0872 {
0873 struct bpf_prog *prog = bpf_prog_get(fd);
0874
0875 if (IS_ERR(prog))
0876 return prog;
0877
0878 if (!bpf_prog_map_compatible(map, prog)) {
0879 bpf_prog_put(prog);
0880 return ERR_PTR(-EINVAL);
0881 }
0882
0883 return prog;
0884 }
0885
0886 static void prog_fd_array_put_ptr(void *ptr)
0887 {
0888 bpf_prog_put(ptr);
0889 }
0890
0891 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
0892 {
0893 return ((struct bpf_prog *)ptr)->aux->id;
0894 }
0895
0896
0897 static void bpf_fd_array_map_clear(struct bpf_map *map)
0898 {
0899 struct bpf_array *array = container_of(map, struct bpf_array, map);
0900 int i;
0901
0902 for (i = 0; i < array->map.max_entries; i++)
0903 fd_array_map_delete_elem(map, &i);
0904 }
0905
0906 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
0907 struct seq_file *m)
0908 {
0909 void **elem, *ptr;
0910 u32 prog_id;
0911
0912 rcu_read_lock();
0913
0914 elem = array_map_lookup_elem(map, key);
0915 if (elem) {
0916 ptr = READ_ONCE(*elem);
0917 if (ptr) {
0918 seq_printf(m, "%u: ", *(u32 *)key);
0919 prog_id = prog_fd_array_sys_lookup_elem(ptr);
0920 btf_type_seq_show(map->btf, map->btf_value_type_id,
0921 &prog_id, m);
0922 seq_puts(m, "\n");
0923 }
0924 }
0925
0926 rcu_read_unlock();
0927 }
0928
0929 struct prog_poke_elem {
0930 struct list_head list;
0931 struct bpf_prog_aux *aux;
0932 };
0933
0934 static int prog_array_map_poke_track(struct bpf_map *map,
0935 struct bpf_prog_aux *prog_aux)
0936 {
0937 struct prog_poke_elem *elem;
0938 struct bpf_array_aux *aux;
0939 int ret = 0;
0940
0941 aux = container_of(map, struct bpf_array, map)->aux;
0942 mutex_lock(&aux->poke_mutex);
0943 list_for_each_entry(elem, &aux->poke_progs, list) {
0944 if (elem->aux == prog_aux)
0945 goto out;
0946 }
0947
0948 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
0949 if (!elem) {
0950 ret = -ENOMEM;
0951 goto out;
0952 }
0953
0954 INIT_LIST_HEAD(&elem->list);
0955
0956
0957
0958
0959 elem->aux = prog_aux;
0960
0961 list_add_tail(&elem->list, &aux->poke_progs);
0962 out:
0963 mutex_unlock(&aux->poke_mutex);
0964 return ret;
0965 }
0966
0967 static void prog_array_map_poke_untrack(struct bpf_map *map,
0968 struct bpf_prog_aux *prog_aux)
0969 {
0970 struct prog_poke_elem *elem, *tmp;
0971 struct bpf_array_aux *aux;
0972
0973 aux = container_of(map, struct bpf_array, map)->aux;
0974 mutex_lock(&aux->poke_mutex);
0975 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
0976 if (elem->aux == prog_aux) {
0977 list_del_init(&elem->list);
0978 kfree(elem);
0979 break;
0980 }
0981 }
0982 mutex_unlock(&aux->poke_mutex);
0983 }
0984
0985 static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
0986 struct bpf_prog *old,
0987 struct bpf_prog *new)
0988 {
0989 u8 *old_addr, *new_addr, *old_bypass_addr;
0990 struct prog_poke_elem *elem;
0991 struct bpf_array_aux *aux;
0992
0993 aux = container_of(map, struct bpf_array, map)->aux;
0994 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
0995
0996 list_for_each_entry(elem, &aux->poke_progs, list) {
0997 struct bpf_jit_poke_descriptor *poke;
0998 int i, ret;
0999
1000 for (i = 0; i < elem->aux->size_poke_tab; i++) {
1001 poke = &elem->aux->poke_tab[i];
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033 if (!READ_ONCE(poke->tailcall_target_stable))
1034 continue;
1035 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1036 continue;
1037 if (poke->tail_call.map != map ||
1038 poke->tail_call.key != key)
1039 continue;
1040
1041 old_bypass_addr = old ? NULL : poke->bypass_addr;
1042 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
1043 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
1044
1045 if (new) {
1046 ret = bpf_arch_text_poke(poke->tailcall_target,
1047 BPF_MOD_JUMP,
1048 old_addr, new_addr);
1049 BUG_ON(ret < 0 && ret != -EINVAL);
1050 if (!old) {
1051 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1052 BPF_MOD_JUMP,
1053 poke->bypass_addr,
1054 NULL);
1055 BUG_ON(ret < 0 && ret != -EINVAL);
1056 }
1057 } else {
1058 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1059 BPF_MOD_JUMP,
1060 old_bypass_addr,
1061 poke->bypass_addr);
1062 BUG_ON(ret < 0 && ret != -EINVAL);
1063
1064
1065
1066
1067 if (!ret)
1068 synchronize_rcu();
1069 ret = bpf_arch_text_poke(poke->tailcall_target,
1070 BPF_MOD_JUMP,
1071 old_addr, NULL);
1072 BUG_ON(ret < 0 && ret != -EINVAL);
1073 }
1074 }
1075 }
1076 }
1077
1078 static void prog_array_map_clear_deferred(struct work_struct *work)
1079 {
1080 struct bpf_map *map = container_of(work, struct bpf_array_aux,
1081 work)->map;
1082 bpf_fd_array_map_clear(map);
1083 bpf_map_put(map);
1084 }
1085
1086 static void prog_array_map_clear(struct bpf_map *map)
1087 {
1088 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1089 map)->aux;
1090 bpf_map_inc(map);
1091 schedule_work(&aux->work);
1092 }
1093
1094 static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1095 {
1096 struct bpf_array_aux *aux;
1097 struct bpf_map *map;
1098
1099 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1100 if (!aux)
1101 return ERR_PTR(-ENOMEM);
1102
1103 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1104 INIT_LIST_HEAD(&aux->poke_progs);
1105 mutex_init(&aux->poke_mutex);
1106
1107 map = array_map_alloc(attr);
1108 if (IS_ERR(map)) {
1109 kfree(aux);
1110 return map;
1111 }
1112
1113 container_of(map, struct bpf_array, map)->aux = aux;
1114 aux->map = map;
1115
1116 return map;
1117 }
1118
1119 static void prog_array_map_free(struct bpf_map *map)
1120 {
1121 struct prog_poke_elem *elem, *tmp;
1122 struct bpf_array_aux *aux;
1123
1124 aux = container_of(map, struct bpf_array, map)->aux;
1125 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1126 list_del_init(&elem->list);
1127 kfree(elem);
1128 }
1129 kfree(aux);
1130 fd_array_map_free(map);
1131 }
1132
1133
1134
1135
1136
1137
1138 const struct bpf_map_ops prog_array_map_ops = {
1139 .map_alloc_check = fd_array_map_alloc_check,
1140 .map_alloc = prog_array_map_alloc,
1141 .map_free = prog_array_map_free,
1142 .map_poke_track = prog_array_map_poke_track,
1143 .map_poke_untrack = prog_array_map_poke_untrack,
1144 .map_poke_run = prog_array_map_poke_run,
1145 .map_get_next_key = array_map_get_next_key,
1146 .map_lookup_elem = fd_array_map_lookup_elem,
1147 .map_delete_elem = fd_array_map_delete_elem,
1148 .map_fd_get_ptr = prog_fd_array_get_ptr,
1149 .map_fd_put_ptr = prog_fd_array_put_ptr,
1150 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1151 .map_release_uref = prog_array_map_clear,
1152 .map_seq_show_elem = prog_array_map_seq_show_elem,
1153 .map_btf_id = &array_map_btf_ids[0],
1154 };
1155
1156 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1157 struct file *map_file)
1158 {
1159 struct bpf_event_entry *ee;
1160
1161 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1162 if (ee) {
1163 ee->event = perf_file->private_data;
1164 ee->perf_file = perf_file;
1165 ee->map_file = map_file;
1166 }
1167
1168 return ee;
1169 }
1170
1171 static void __bpf_event_entry_free(struct rcu_head *rcu)
1172 {
1173 struct bpf_event_entry *ee;
1174
1175 ee = container_of(rcu, struct bpf_event_entry, rcu);
1176 fput(ee->perf_file);
1177 kfree(ee);
1178 }
1179
1180 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1181 {
1182 call_rcu(&ee->rcu, __bpf_event_entry_free);
1183 }
1184
1185 static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1186 struct file *map_file, int fd)
1187 {
1188 struct bpf_event_entry *ee;
1189 struct perf_event *event;
1190 struct file *perf_file;
1191 u64 value;
1192
1193 perf_file = perf_event_get(fd);
1194 if (IS_ERR(perf_file))
1195 return perf_file;
1196
1197 ee = ERR_PTR(-EOPNOTSUPP);
1198 event = perf_file->private_data;
1199 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1200 goto err_out;
1201
1202 ee = bpf_event_entry_gen(perf_file, map_file);
1203 if (ee)
1204 return ee;
1205 ee = ERR_PTR(-ENOMEM);
1206 err_out:
1207 fput(perf_file);
1208 return ee;
1209 }
1210
1211 static void perf_event_fd_array_put_ptr(void *ptr)
1212 {
1213 bpf_event_entry_free_rcu(ptr);
1214 }
1215
1216 static void perf_event_fd_array_release(struct bpf_map *map,
1217 struct file *map_file)
1218 {
1219 struct bpf_array *array = container_of(map, struct bpf_array, map);
1220 struct bpf_event_entry *ee;
1221 int i;
1222
1223 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1224 return;
1225
1226 rcu_read_lock();
1227 for (i = 0; i < array->map.max_entries; i++) {
1228 ee = READ_ONCE(array->ptrs[i]);
1229 if (ee && ee->map_file == map_file)
1230 fd_array_map_delete_elem(map, &i);
1231 }
1232 rcu_read_unlock();
1233 }
1234
1235 static void perf_event_fd_array_map_free(struct bpf_map *map)
1236 {
1237 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1238 bpf_fd_array_map_clear(map);
1239 fd_array_map_free(map);
1240 }
1241
1242 const struct bpf_map_ops perf_event_array_map_ops = {
1243 .map_meta_equal = bpf_map_meta_equal,
1244 .map_alloc_check = fd_array_map_alloc_check,
1245 .map_alloc = array_map_alloc,
1246 .map_free = perf_event_fd_array_map_free,
1247 .map_get_next_key = array_map_get_next_key,
1248 .map_lookup_elem = fd_array_map_lookup_elem,
1249 .map_delete_elem = fd_array_map_delete_elem,
1250 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1251 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1252 .map_release = perf_event_fd_array_release,
1253 .map_check_btf = map_check_no_btf,
1254 .map_btf_id = &array_map_btf_ids[0],
1255 };
1256
1257 #ifdef CONFIG_CGROUPS
1258 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1259 struct file *map_file ,
1260 int fd)
1261 {
1262 return cgroup_get_from_fd(fd);
1263 }
1264
1265 static void cgroup_fd_array_put_ptr(void *ptr)
1266 {
1267
1268 cgroup_put(ptr);
1269 }
1270
1271 static void cgroup_fd_array_free(struct bpf_map *map)
1272 {
1273 bpf_fd_array_map_clear(map);
1274 fd_array_map_free(map);
1275 }
1276
1277 const struct bpf_map_ops cgroup_array_map_ops = {
1278 .map_meta_equal = bpf_map_meta_equal,
1279 .map_alloc_check = fd_array_map_alloc_check,
1280 .map_alloc = array_map_alloc,
1281 .map_free = cgroup_fd_array_free,
1282 .map_get_next_key = array_map_get_next_key,
1283 .map_lookup_elem = fd_array_map_lookup_elem,
1284 .map_delete_elem = fd_array_map_delete_elem,
1285 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1286 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1287 .map_check_btf = map_check_no_btf,
1288 .map_btf_id = &array_map_btf_ids[0],
1289 };
1290 #endif
1291
1292 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1293 {
1294 struct bpf_map *map, *inner_map_meta;
1295
1296 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1297 if (IS_ERR(inner_map_meta))
1298 return inner_map_meta;
1299
1300 map = array_map_alloc(attr);
1301 if (IS_ERR(map)) {
1302 bpf_map_meta_free(inner_map_meta);
1303 return map;
1304 }
1305
1306 map->inner_map_meta = inner_map_meta;
1307
1308 return map;
1309 }
1310
1311 static void array_of_map_free(struct bpf_map *map)
1312 {
1313
1314
1315
1316 bpf_map_meta_free(map->inner_map_meta);
1317 bpf_fd_array_map_clear(map);
1318 fd_array_map_free(map);
1319 }
1320
1321 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1322 {
1323 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1324
1325 if (!inner_map)
1326 return NULL;
1327
1328 return READ_ONCE(*inner_map);
1329 }
1330
1331 static int array_of_map_gen_lookup(struct bpf_map *map,
1332 struct bpf_insn *insn_buf)
1333 {
1334 struct bpf_array *array = container_of(map, struct bpf_array, map);
1335 u32 elem_size = array->elem_size;
1336 struct bpf_insn *insn = insn_buf;
1337 const int ret = BPF_REG_0;
1338 const int map_ptr = BPF_REG_1;
1339 const int index = BPF_REG_2;
1340
1341 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1342 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1343 if (!map->bypass_spec_v1) {
1344 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1345 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1346 } else {
1347 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1348 }
1349 if (is_power_of_2(elem_size))
1350 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1351 else
1352 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1353 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1354 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1355 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1356 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1357 *insn++ = BPF_MOV64_IMM(ret, 0);
1358
1359 return insn - insn_buf;
1360 }
1361
1362 const struct bpf_map_ops array_of_maps_map_ops = {
1363 .map_alloc_check = fd_array_map_alloc_check,
1364 .map_alloc = array_of_map_alloc,
1365 .map_free = array_of_map_free,
1366 .map_get_next_key = array_map_get_next_key,
1367 .map_lookup_elem = array_of_map_lookup_elem,
1368 .map_delete_elem = fd_array_map_delete_elem,
1369 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1370 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1371 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1372 .map_gen_lookup = array_of_map_gen_lookup,
1373 .map_lookup_batch = generic_map_lookup_batch,
1374 .map_update_batch = generic_map_update_batch,
1375 .map_check_btf = map_check_no_btf,
1376 .map_btf_id = &array_map_btf_ids[0],
1377 };