0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 #include <linux/bpf.h>
0048 #include <net/xdp.h>
0049 #include <linux/filter.h>
0050 #include <trace/events/xdp.h>
0051 #include <linux/btf_ids.h>
0052
0053 #define DEV_CREATE_FLAG_MASK \
0054 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
0055
0056 struct xdp_dev_bulk_queue {
0057 struct xdp_frame *q[DEV_MAP_BULK_SIZE];
0058 struct list_head flush_node;
0059 struct net_device *dev;
0060 struct net_device *dev_rx;
0061 struct bpf_prog *xdp_prog;
0062 unsigned int count;
0063 };
0064
0065 struct bpf_dtab_netdev {
0066 struct net_device *dev;
0067 struct hlist_node index_hlist;
0068 struct bpf_dtab *dtab;
0069 struct bpf_prog *xdp_prog;
0070 struct rcu_head rcu;
0071 unsigned int idx;
0072 struct bpf_devmap_val val;
0073 };
0074
0075 struct bpf_dtab {
0076 struct bpf_map map;
0077 struct bpf_dtab_netdev __rcu **netdev_map;
0078 struct list_head list;
0079
0080
0081 struct hlist_head *dev_index_head;
0082 spinlock_t index_lock;
0083 unsigned int items;
0084 u32 n_buckets;
0085 };
0086
0087 static DEFINE_PER_CPU(struct list_head, dev_flush_list);
0088 static DEFINE_SPINLOCK(dev_map_lock);
0089 static LIST_HEAD(dev_map_list);
0090
0091 static struct hlist_head *dev_map_create_hash(unsigned int entries,
0092 int numa_node)
0093 {
0094 int i;
0095 struct hlist_head *hash;
0096
0097 hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node);
0098 if (hash != NULL)
0099 for (i = 0; i < entries; i++)
0100 INIT_HLIST_HEAD(&hash[i]);
0101
0102 return hash;
0103 }
0104
0105 static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
0106 int idx)
0107 {
0108 return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
0109 }
0110
0111 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
0112 {
0113 u32 valsize = attr->value_size;
0114
0115
0116
0117
0118
0119 if (attr->max_entries == 0 || attr->key_size != 4 ||
0120 (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
0121 valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
0122 attr->map_flags & ~DEV_CREATE_FLAG_MASK)
0123 return -EINVAL;
0124
0125
0126
0127
0128 attr->map_flags |= BPF_F_RDONLY_PROG;
0129
0130
0131 bpf_map_init_from_attr(&dtab->map, attr);
0132
0133 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
0134 dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
0135
0136 if (!dtab->n_buckets)
0137 return -EINVAL;
0138 }
0139
0140 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
0141 dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
0142 dtab->map.numa_node);
0143 if (!dtab->dev_index_head)
0144 return -ENOMEM;
0145
0146 spin_lock_init(&dtab->index_lock);
0147 } else {
0148 dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries *
0149 sizeof(struct bpf_dtab_netdev *),
0150 dtab->map.numa_node);
0151 if (!dtab->netdev_map)
0152 return -ENOMEM;
0153 }
0154
0155 return 0;
0156 }
0157
0158 static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
0159 {
0160 struct bpf_dtab *dtab;
0161 int err;
0162
0163 if (!capable(CAP_NET_ADMIN))
0164 return ERR_PTR(-EPERM);
0165
0166 dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT);
0167 if (!dtab)
0168 return ERR_PTR(-ENOMEM);
0169
0170 err = dev_map_init_map(dtab, attr);
0171 if (err) {
0172 kfree(dtab);
0173 return ERR_PTR(err);
0174 }
0175
0176 spin_lock(&dev_map_lock);
0177 list_add_tail_rcu(&dtab->list, &dev_map_list);
0178 spin_unlock(&dev_map_lock);
0179
0180 return &dtab->map;
0181 }
0182
0183 static void dev_map_free(struct bpf_map *map)
0184 {
0185 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0186 int i;
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 spin_lock(&dev_map_lock);
0199 list_del_rcu(&dtab->list);
0200 spin_unlock(&dev_map_lock);
0201
0202 bpf_clear_redirect_map(map);
0203 synchronize_rcu();
0204
0205
0206 rcu_barrier();
0207
0208 if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
0209 for (i = 0; i < dtab->n_buckets; i++) {
0210 struct bpf_dtab_netdev *dev;
0211 struct hlist_head *head;
0212 struct hlist_node *next;
0213
0214 head = dev_map_index_hash(dtab, i);
0215
0216 hlist_for_each_entry_safe(dev, next, head, index_hlist) {
0217 hlist_del_rcu(&dev->index_hlist);
0218 if (dev->xdp_prog)
0219 bpf_prog_put(dev->xdp_prog);
0220 dev_put(dev->dev);
0221 kfree(dev);
0222 }
0223 }
0224
0225 bpf_map_area_free(dtab->dev_index_head);
0226 } else {
0227 for (i = 0; i < dtab->map.max_entries; i++) {
0228 struct bpf_dtab_netdev *dev;
0229
0230 dev = rcu_dereference_raw(dtab->netdev_map[i]);
0231 if (!dev)
0232 continue;
0233
0234 if (dev->xdp_prog)
0235 bpf_prog_put(dev->xdp_prog);
0236 dev_put(dev->dev);
0237 kfree(dev);
0238 }
0239
0240 bpf_map_area_free(dtab->netdev_map);
0241 }
0242
0243 kfree(dtab);
0244 }
0245
0246 static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
0247 {
0248 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0249 u32 index = key ? *(u32 *)key : U32_MAX;
0250 u32 *next = next_key;
0251
0252 if (index >= dtab->map.max_entries) {
0253 *next = 0;
0254 return 0;
0255 }
0256
0257 if (index == dtab->map.max_entries - 1)
0258 return -ENOENT;
0259 *next = index + 1;
0260 return 0;
0261 }
0262
0263
0264
0265
0266
0267 static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
0268 {
0269 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0270 struct hlist_head *head = dev_map_index_hash(dtab, key);
0271 struct bpf_dtab_netdev *dev;
0272
0273 hlist_for_each_entry_rcu(dev, head, index_hlist,
0274 lockdep_is_held(&dtab->index_lock))
0275 if (dev->idx == key)
0276 return dev;
0277
0278 return NULL;
0279 }
0280
0281 static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
0282 void *next_key)
0283 {
0284 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0285 u32 idx, *next = next_key;
0286 struct bpf_dtab_netdev *dev, *next_dev;
0287 struct hlist_head *head;
0288 int i = 0;
0289
0290 if (!key)
0291 goto find_first;
0292
0293 idx = *(u32 *)key;
0294
0295 dev = __dev_map_hash_lookup_elem(map, idx);
0296 if (!dev)
0297 goto find_first;
0298
0299 next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
0300 struct bpf_dtab_netdev, index_hlist);
0301
0302 if (next_dev) {
0303 *next = next_dev->idx;
0304 return 0;
0305 }
0306
0307 i = idx & (dtab->n_buckets - 1);
0308 i++;
0309
0310 find_first:
0311 for (; i < dtab->n_buckets; i++) {
0312 head = dev_map_index_hash(dtab, i);
0313
0314 next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
0315 struct bpf_dtab_netdev,
0316 index_hlist);
0317 if (next_dev) {
0318 *next = next_dev->idx;
0319 return 0;
0320 }
0321 }
0322
0323 return -ENOENT;
0324 }
0325
0326 static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
0327 struct xdp_frame **frames, int n,
0328 struct net_device *dev)
0329 {
0330 struct xdp_txq_info txq = { .dev = dev };
0331 struct xdp_buff xdp;
0332 int i, nframes = 0;
0333
0334 for (i = 0; i < n; i++) {
0335 struct xdp_frame *xdpf = frames[i];
0336 u32 act;
0337 int err;
0338
0339 xdp_convert_frame_to_buff(xdpf, &xdp);
0340 xdp.txq = &txq;
0341
0342 act = bpf_prog_run_xdp(xdp_prog, &xdp);
0343 switch (act) {
0344 case XDP_PASS:
0345 err = xdp_update_frame_from_buff(&xdp, xdpf);
0346 if (unlikely(err < 0))
0347 xdp_return_frame_rx_napi(xdpf);
0348 else
0349 frames[nframes++] = xdpf;
0350 break;
0351 default:
0352 bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
0353 fallthrough;
0354 case XDP_ABORTED:
0355 trace_xdp_exception(dev, xdp_prog, act);
0356 fallthrough;
0357 case XDP_DROP:
0358 xdp_return_frame_rx_napi(xdpf);
0359 break;
0360 }
0361 }
0362 return nframes;
0363 }
0364
0365 static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
0366 {
0367 struct net_device *dev = bq->dev;
0368 unsigned int cnt = bq->count;
0369 int sent = 0, err = 0;
0370 int to_send = cnt;
0371 int i;
0372
0373 if (unlikely(!cnt))
0374 return;
0375
0376 for (i = 0; i < cnt; i++) {
0377 struct xdp_frame *xdpf = bq->q[i];
0378
0379 prefetch(xdpf);
0380 }
0381
0382 if (bq->xdp_prog) {
0383 to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
0384 if (!to_send)
0385 goto out;
0386 }
0387
0388 sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
0389 if (sent < 0) {
0390
0391
0392
0393 err = sent;
0394 sent = 0;
0395 }
0396
0397
0398
0399
0400 for (i = sent; unlikely(i < to_send); i++)
0401 xdp_return_frame_rx_napi(bq->q[i]);
0402
0403 out:
0404 bq->count = 0;
0405 trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
0406 }
0407
0408
0409
0410
0411
0412 void __dev_flush(void)
0413 {
0414 struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
0415 struct xdp_dev_bulk_queue *bq, *tmp;
0416
0417 list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
0418 bq_xmit_all(bq, XDP_XMIT_FLUSH);
0419 bq->dev_rx = NULL;
0420 bq->xdp_prog = NULL;
0421 __list_del_clearprev(&bq->flush_node);
0422 }
0423 }
0424
0425
0426
0427
0428
0429 static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
0430 {
0431 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0432 struct bpf_dtab_netdev *obj;
0433
0434 if (key >= map->max_entries)
0435 return NULL;
0436
0437 obj = rcu_dereference_check(dtab->netdev_map[key],
0438 rcu_read_lock_bh_held());
0439 return obj;
0440 }
0441
0442
0443
0444
0445
0446 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
0447 struct net_device *dev_rx, struct bpf_prog *xdp_prog)
0448 {
0449 struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
0450 struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
0451
0452 if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
0453 bq_xmit_all(bq, 0);
0454
0455
0456
0457
0458
0459
0460
0461
0462 if (!bq->dev_rx) {
0463 bq->dev_rx = dev_rx;
0464 bq->xdp_prog = xdp_prog;
0465 list_add(&bq->flush_node, flush_list);
0466 }
0467
0468 bq->q[bq->count++] = xdpf;
0469 }
0470
0471 static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
0472 struct net_device *dev_rx,
0473 struct bpf_prog *xdp_prog)
0474 {
0475 int err;
0476
0477 if (!dev->netdev_ops->ndo_xdp_xmit)
0478 return -EOPNOTSUPP;
0479
0480 err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf));
0481 if (unlikely(err))
0482 return err;
0483
0484 bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
0485 return 0;
0486 }
0487
0488 static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
0489 {
0490 struct xdp_txq_info txq = { .dev = dst->dev };
0491 struct xdp_buff xdp;
0492 u32 act;
0493
0494 if (!dst->xdp_prog)
0495 return XDP_PASS;
0496
0497 __skb_pull(skb, skb->mac_len);
0498 xdp.txq = &txq;
0499
0500 act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
0501 switch (act) {
0502 case XDP_PASS:
0503 __skb_push(skb, skb->mac_len);
0504 break;
0505 default:
0506 bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, act);
0507 fallthrough;
0508 case XDP_ABORTED:
0509 trace_xdp_exception(dst->dev, dst->xdp_prog, act);
0510 fallthrough;
0511 case XDP_DROP:
0512 kfree_skb(skb);
0513 break;
0514 }
0515
0516 return act;
0517 }
0518
0519 int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
0520 struct net_device *dev_rx)
0521 {
0522 return __xdp_enqueue(dev, xdpf, dev_rx, NULL);
0523 }
0524
0525 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
0526 struct net_device *dev_rx)
0527 {
0528 struct net_device *dev = dst->dev;
0529
0530 return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog);
0531 }
0532
0533 static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
0534 {
0535 if (!obj ||
0536 !obj->dev->netdev_ops->ndo_xdp_xmit)
0537 return false;
0538
0539 if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf)))
0540 return false;
0541
0542 return true;
0543 }
0544
0545 static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
0546 struct net_device *dev_rx,
0547 struct xdp_frame *xdpf)
0548 {
0549 struct xdp_frame *nxdpf;
0550
0551 nxdpf = xdpf_clone(xdpf);
0552 if (!nxdpf)
0553 return -ENOMEM;
0554
0555 bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);
0556
0557 return 0;
0558 }
0559
0560 static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
0561 {
0562 while (num_excluded--) {
0563 if (ifindex == excluded[num_excluded])
0564 return true;
0565 }
0566 return false;
0567 }
0568
0569
0570
0571
0572
0573 static int get_upper_ifindexes(struct net_device *dev, int *indexes)
0574 {
0575 struct net_device *upper;
0576 struct list_head *iter;
0577 int n = 0;
0578
0579 netdev_for_each_upper_dev_rcu(dev, upper, iter) {
0580 indexes[n++] = upper->ifindex;
0581 }
0582 return n;
0583 }
0584
0585 int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
0586 struct bpf_map *map, bool exclude_ingress)
0587 {
0588 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0589 struct bpf_dtab_netdev *dst, *last_dst = NULL;
0590 int excluded_devices[1+MAX_NEST_DEV];
0591 struct hlist_head *head;
0592 int num_excluded = 0;
0593 unsigned int i;
0594 int err;
0595
0596 if (exclude_ingress) {
0597 num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
0598 excluded_devices[num_excluded++] = dev_rx->ifindex;
0599 }
0600
0601 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
0602 for (i = 0; i < map->max_entries; i++) {
0603 dst = rcu_dereference_check(dtab->netdev_map[i],
0604 rcu_read_lock_bh_held());
0605 if (!is_valid_dst(dst, xdpf))
0606 continue;
0607
0608 if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
0609 continue;
0610
0611
0612 if (!last_dst) {
0613 last_dst = dst;
0614 continue;
0615 }
0616
0617 err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
0618 if (err)
0619 return err;
0620
0621 last_dst = dst;
0622 }
0623 } else {
0624 for (i = 0; i < dtab->n_buckets; i++) {
0625 head = dev_map_index_hash(dtab, i);
0626 hlist_for_each_entry_rcu(dst, head, index_hlist,
0627 lockdep_is_held(&dtab->index_lock)) {
0628 if (!is_valid_dst(dst, xdpf))
0629 continue;
0630
0631 if (is_ifindex_excluded(excluded_devices, num_excluded,
0632 dst->dev->ifindex))
0633 continue;
0634
0635
0636 if (!last_dst) {
0637 last_dst = dst;
0638 continue;
0639 }
0640
0641 err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
0642 if (err)
0643 return err;
0644
0645 last_dst = dst;
0646 }
0647 }
0648 }
0649
0650
0651 if (last_dst)
0652 bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
0653 else
0654 xdp_return_frame_rx_napi(xdpf);
0655
0656 return 0;
0657 }
0658
0659 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
0660 struct bpf_prog *xdp_prog)
0661 {
0662 int err;
0663
0664 err = xdp_ok_fwd_dev(dst->dev, skb->len);
0665 if (unlikely(err))
0666 return err;
0667
0668
0669
0670
0671
0672 if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
0673 return 0;
0674
0675 skb->dev = dst->dev;
0676 generic_xdp_tx(skb, xdp_prog);
0677
0678 return 0;
0679 }
0680
0681 static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
0682 struct sk_buff *skb,
0683 struct bpf_prog *xdp_prog)
0684 {
0685 struct sk_buff *nskb;
0686 int err;
0687
0688 nskb = skb_clone(skb, GFP_ATOMIC);
0689 if (!nskb)
0690 return -ENOMEM;
0691
0692 err = dev_map_generic_redirect(dst, nskb, xdp_prog);
0693 if (unlikely(err)) {
0694 consume_skb(nskb);
0695 return err;
0696 }
0697
0698 return 0;
0699 }
0700
0701 int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
0702 struct bpf_prog *xdp_prog, struct bpf_map *map,
0703 bool exclude_ingress)
0704 {
0705 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0706 struct bpf_dtab_netdev *dst, *last_dst = NULL;
0707 int excluded_devices[1+MAX_NEST_DEV];
0708 struct hlist_head *head;
0709 struct hlist_node *next;
0710 int num_excluded = 0;
0711 unsigned int i;
0712 int err;
0713
0714 if (exclude_ingress) {
0715 num_excluded = get_upper_ifindexes(dev, excluded_devices);
0716 excluded_devices[num_excluded++] = dev->ifindex;
0717 }
0718
0719 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
0720 for (i = 0; i < map->max_entries; i++) {
0721 dst = rcu_dereference_check(dtab->netdev_map[i],
0722 rcu_read_lock_bh_held());
0723 if (!dst)
0724 continue;
0725
0726 if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
0727 continue;
0728
0729
0730 if (!last_dst) {
0731 last_dst = dst;
0732 continue;
0733 }
0734
0735 err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
0736 if (err)
0737 return err;
0738
0739 last_dst = dst;
0740
0741 }
0742 } else {
0743 for (i = 0; i < dtab->n_buckets; i++) {
0744 head = dev_map_index_hash(dtab, i);
0745 hlist_for_each_entry_safe(dst, next, head, index_hlist) {
0746 if (!dst)
0747 continue;
0748
0749 if (is_ifindex_excluded(excluded_devices, num_excluded,
0750 dst->dev->ifindex))
0751 continue;
0752
0753
0754 if (!last_dst) {
0755 last_dst = dst;
0756 continue;
0757 }
0758
0759 err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
0760 if (err)
0761 return err;
0762
0763 last_dst = dst;
0764 }
0765 }
0766 }
0767
0768
0769 if (last_dst)
0770 return dev_map_generic_redirect(last_dst, skb, xdp_prog);
0771
0772
0773 consume_skb(skb);
0774 return 0;
0775 }
0776
0777 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
0778 {
0779 struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
0780
0781 return obj ? &obj->val : NULL;
0782 }
0783
0784 static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
0785 {
0786 struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
0787 *(u32 *)key);
0788 return obj ? &obj->val : NULL;
0789 }
0790
0791 static void __dev_map_entry_free(struct rcu_head *rcu)
0792 {
0793 struct bpf_dtab_netdev *dev;
0794
0795 dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
0796 if (dev->xdp_prog)
0797 bpf_prog_put(dev->xdp_prog);
0798 dev_put(dev->dev);
0799 kfree(dev);
0800 }
0801
0802 static int dev_map_delete_elem(struct bpf_map *map, void *key)
0803 {
0804 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0805 struct bpf_dtab_netdev *old_dev;
0806 int k = *(u32 *)key;
0807
0808 if (k >= map->max_entries)
0809 return -EINVAL;
0810
0811 old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL));
0812 if (old_dev)
0813 call_rcu(&old_dev->rcu, __dev_map_entry_free);
0814 return 0;
0815 }
0816
0817 static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
0818 {
0819 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0820 struct bpf_dtab_netdev *old_dev;
0821 int k = *(u32 *)key;
0822 unsigned long flags;
0823 int ret = -ENOENT;
0824
0825 spin_lock_irqsave(&dtab->index_lock, flags);
0826
0827 old_dev = __dev_map_hash_lookup_elem(map, k);
0828 if (old_dev) {
0829 dtab->items--;
0830 hlist_del_init_rcu(&old_dev->index_hlist);
0831 call_rcu(&old_dev->rcu, __dev_map_entry_free);
0832 ret = 0;
0833 }
0834 spin_unlock_irqrestore(&dtab->index_lock, flags);
0835
0836 return ret;
0837 }
0838
0839 static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
0840 struct bpf_dtab *dtab,
0841 struct bpf_devmap_val *val,
0842 unsigned int idx)
0843 {
0844 struct bpf_prog *prog = NULL;
0845 struct bpf_dtab_netdev *dev;
0846
0847 dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
0848 GFP_NOWAIT | __GFP_NOWARN,
0849 dtab->map.numa_node);
0850 if (!dev)
0851 return ERR_PTR(-ENOMEM);
0852
0853 dev->dev = dev_get_by_index(net, val->ifindex);
0854 if (!dev->dev)
0855 goto err_out;
0856
0857 if (val->bpf_prog.fd > 0) {
0858 prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
0859 BPF_PROG_TYPE_XDP, false);
0860 if (IS_ERR(prog))
0861 goto err_put_dev;
0862 if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
0863 !bpf_prog_map_compatible(&dtab->map, prog))
0864 goto err_put_prog;
0865 }
0866
0867 dev->idx = idx;
0868 dev->dtab = dtab;
0869 if (prog) {
0870 dev->xdp_prog = prog;
0871 dev->val.bpf_prog.id = prog->aux->id;
0872 } else {
0873 dev->xdp_prog = NULL;
0874 dev->val.bpf_prog.id = 0;
0875 }
0876 dev->val.ifindex = val->ifindex;
0877
0878 return dev;
0879 err_put_prog:
0880 bpf_prog_put(prog);
0881 err_put_dev:
0882 dev_put(dev->dev);
0883 err_out:
0884 kfree(dev);
0885 return ERR_PTR(-EINVAL);
0886 }
0887
0888 static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
0889 void *key, void *value, u64 map_flags)
0890 {
0891 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0892 struct bpf_dtab_netdev *dev, *old_dev;
0893 struct bpf_devmap_val val = {};
0894 u32 i = *(u32 *)key;
0895
0896 if (unlikely(map_flags > BPF_EXIST))
0897 return -EINVAL;
0898 if (unlikely(i >= dtab->map.max_entries))
0899 return -E2BIG;
0900 if (unlikely(map_flags == BPF_NOEXIST))
0901 return -EEXIST;
0902
0903
0904 memcpy(&val, value, map->value_size);
0905
0906 if (!val.ifindex) {
0907 dev = NULL;
0908
0909 if (val.bpf_prog.fd > 0)
0910 return -EINVAL;
0911 } else {
0912 dev = __dev_map_alloc_node(net, dtab, &val, i);
0913 if (IS_ERR(dev))
0914 return PTR_ERR(dev);
0915 }
0916
0917
0918
0919
0920
0921 old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev)));
0922 if (old_dev)
0923 call_rcu(&old_dev->rcu, __dev_map_entry_free);
0924
0925 return 0;
0926 }
0927
0928 static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
0929 u64 map_flags)
0930 {
0931 return __dev_map_update_elem(current->nsproxy->net_ns,
0932 map, key, value, map_flags);
0933 }
0934
0935 static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
0936 void *key, void *value, u64 map_flags)
0937 {
0938 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
0939 struct bpf_dtab_netdev *dev, *old_dev;
0940 struct bpf_devmap_val val = {};
0941 u32 idx = *(u32 *)key;
0942 unsigned long flags;
0943 int err = -EEXIST;
0944
0945
0946 memcpy(&val, value, map->value_size);
0947
0948 if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
0949 return -EINVAL;
0950
0951 spin_lock_irqsave(&dtab->index_lock, flags);
0952
0953 old_dev = __dev_map_hash_lookup_elem(map, idx);
0954 if (old_dev && (map_flags & BPF_NOEXIST))
0955 goto out_err;
0956
0957 dev = __dev_map_alloc_node(net, dtab, &val, idx);
0958 if (IS_ERR(dev)) {
0959 err = PTR_ERR(dev);
0960 goto out_err;
0961 }
0962
0963 if (old_dev) {
0964 hlist_del_rcu(&old_dev->index_hlist);
0965 } else {
0966 if (dtab->items >= dtab->map.max_entries) {
0967 spin_unlock_irqrestore(&dtab->index_lock, flags);
0968 call_rcu(&dev->rcu, __dev_map_entry_free);
0969 return -E2BIG;
0970 }
0971 dtab->items++;
0972 }
0973
0974 hlist_add_head_rcu(&dev->index_hlist,
0975 dev_map_index_hash(dtab, idx));
0976 spin_unlock_irqrestore(&dtab->index_lock, flags);
0977
0978 if (old_dev)
0979 call_rcu(&old_dev->rcu, __dev_map_entry_free);
0980
0981 return 0;
0982
0983 out_err:
0984 spin_unlock_irqrestore(&dtab->index_lock, flags);
0985 return err;
0986 }
0987
0988 static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
0989 u64 map_flags)
0990 {
0991 return __dev_map_hash_update_elem(current->nsproxy->net_ns,
0992 map, key, value, map_flags);
0993 }
0994
0995 static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
0996 {
0997 return __bpf_xdp_redirect_map(map, ifindex, flags,
0998 BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
0999 __dev_map_lookup_elem);
1000 }
1001
1002 static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
1003 {
1004 return __bpf_xdp_redirect_map(map, ifindex, flags,
1005 BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
1006 __dev_map_hash_lookup_elem);
1007 }
1008
1009 BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab)
1010 const struct bpf_map_ops dev_map_ops = {
1011 .map_meta_equal = bpf_map_meta_equal,
1012 .map_alloc = dev_map_alloc,
1013 .map_free = dev_map_free,
1014 .map_get_next_key = dev_map_get_next_key,
1015 .map_lookup_elem = dev_map_lookup_elem,
1016 .map_update_elem = dev_map_update_elem,
1017 .map_delete_elem = dev_map_delete_elem,
1018 .map_check_btf = map_check_no_btf,
1019 .map_btf_id = &dev_map_btf_ids[0],
1020 .map_redirect = dev_map_redirect,
1021 };
1022
1023 const struct bpf_map_ops dev_map_hash_ops = {
1024 .map_meta_equal = bpf_map_meta_equal,
1025 .map_alloc = dev_map_alloc,
1026 .map_free = dev_map_free,
1027 .map_get_next_key = dev_map_hash_get_next_key,
1028 .map_lookup_elem = dev_map_hash_lookup_elem,
1029 .map_update_elem = dev_map_hash_update_elem,
1030 .map_delete_elem = dev_map_hash_delete_elem,
1031 .map_check_btf = map_check_no_btf,
1032 .map_btf_id = &dev_map_btf_ids[0],
1033 .map_redirect = dev_hash_map_redirect,
1034 };
1035
1036 static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
1037 struct net_device *netdev)
1038 {
1039 unsigned long flags;
1040 u32 i;
1041
1042 spin_lock_irqsave(&dtab->index_lock, flags);
1043 for (i = 0; i < dtab->n_buckets; i++) {
1044 struct bpf_dtab_netdev *dev;
1045 struct hlist_head *head;
1046 struct hlist_node *next;
1047
1048 head = dev_map_index_hash(dtab, i);
1049
1050 hlist_for_each_entry_safe(dev, next, head, index_hlist) {
1051 if (netdev != dev->dev)
1052 continue;
1053
1054 dtab->items--;
1055 hlist_del_rcu(&dev->index_hlist);
1056 call_rcu(&dev->rcu, __dev_map_entry_free);
1057 }
1058 }
1059 spin_unlock_irqrestore(&dtab->index_lock, flags);
1060 }
1061
1062 static int dev_map_notification(struct notifier_block *notifier,
1063 ulong event, void *ptr)
1064 {
1065 struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1066 struct bpf_dtab *dtab;
1067 int i, cpu;
1068
1069 switch (event) {
1070 case NETDEV_REGISTER:
1071 if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
1072 break;
1073
1074
1075 netdev->xdp_bulkq = alloc_percpu(struct xdp_dev_bulk_queue);
1076 if (!netdev->xdp_bulkq)
1077 return NOTIFY_BAD;
1078
1079 for_each_possible_cpu(cpu)
1080 per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
1081 break;
1082 case NETDEV_UNREGISTER:
1083
1084
1085
1086
1087
1088 rcu_read_lock();
1089 list_for_each_entry_rcu(dtab, &dev_map_list, list) {
1090 if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
1091 dev_map_hash_remove_netdev(dtab, netdev);
1092 continue;
1093 }
1094
1095 for (i = 0; i < dtab->map.max_entries; i++) {
1096 struct bpf_dtab_netdev *dev, *odev;
1097
1098 dev = rcu_dereference(dtab->netdev_map[i]);
1099 if (!dev || netdev != dev->dev)
1100 continue;
1101 odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL));
1102 if (dev == odev)
1103 call_rcu(&dev->rcu,
1104 __dev_map_entry_free);
1105 }
1106 }
1107 rcu_read_unlock();
1108 break;
1109 default:
1110 break;
1111 }
1112 return NOTIFY_OK;
1113 }
1114
1115 static struct notifier_block dev_map_notifier = {
1116 .notifier_call = dev_map_notification,
1117 };
1118
1119 static int __init dev_map_init(void)
1120 {
1121 int cpu;
1122
1123
1124 BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
1125 offsetof(struct _bpf_dtab_netdev, dev));
1126 register_netdevice_notifier(&dev_map_notifier);
1127
1128 for_each_possible_cpu(cpu)
1129 INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
1130 return 0;
1131 }
1132
1133 subsys_initcall(dev_map_init);