0001
0002
0003 #include <linux/bpf.h>
0004 #include <linux/bpf-netns.h>
0005 #include <linux/filter.h>
0006 #include <net/net_namespace.h>
0007
0008
0009
0010
0011
0012 struct bpf_netns_link {
0013 struct bpf_link link;
0014 enum bpf_attach_type type;
0015 enum netns_bpf_attach_type netns_type;
0016
0017
0018
0019
0020
0021
0022 struct net *net;
0023 struct list_head node;
0024 };
0025
0026
0027 DEFINE_MUTEX(netns_bpf_mutex);
0028
0029 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
0030 {
0031 switch (type) {
0032 #ifdef CONFIG_INET
0033 case NETNS_BPF_SK_LOOKUP:
0034 static_branch_dec(&bpf_sk_lookup_enabled);
0035 break;
0036 #endif
0037 default:
0038 break;
0039 }
0040 }
0041
0042 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
0043 {
0044 switch (type) {
0045 #ifdef CONFIG_INET
0046 case NETNS_BPF_SK_LOOKUP:
0047 static_branch_inc(&bpf_sk_lookup_enabled);
0048 break;
0049 #endif
0050 default:
0051 break;
0052 }
0053 }
0054
0055
0056 static void netns_bpf_run_array_detach(struct net *net,
0057 enum netns_bpf_attach_type type)
0058 {
0059 struct bpf_prog_array *run_array;
0060
0061 run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
0062 lockdep_is_held(&netns_bpf_mutex));
0063 bpf_prog_array_free(run_array);
0064 }
0065
0066 static int link_index(struct net *net, enum netns_bpf_attach_type type,
0067 struct bpf_netns_link *link)
0068 {
0069 struct bpf_netns_link *pos;
0070 int i = 0;
0071
0072 list_for_each_entry(pos, &net->bpf.links[type], node) {
0073 if (pos == link)
0074 return i;
0075 i++;
0076 }
0077 return -ENOENT;
0078 }
0079
0080 static int link_count(struct net *net, enum netns_bpf_attach_type type)
0081 {
0082 struct list_head *pos;
0083 int i = 0;
0084
0085 list_for_each(pos, &net->bpf.links[type])
0086 i++;
0087 return i;
0088 }
0089
0090 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
0091 struct bpf_prog_array *prog_array)
0092 {
0093 struct bpf_netns_link *pos;
0094 unsigned int i = 0;
0095
0096 list_for_each_entry(pos, &net->bpf.links[type], node) {
0097 prog_array->items[i].prog = pos->link.prog;
0098 i++;
0099 }
0100 }
0101
0102 static void bpf_netns_link_release(struct bpf_link *link)
0103 {
0104 struct bpf_netns_link *net_link =
0105 container_of(link, struct bpf_netns_link, link);
0106 enum netns_bpf_attach_type type = net_link->netns_type;
0107 struct bpf_prog_array *old_array, *new_array;
0108 struct net *net;
0109 int cnt, idx;
0110
0111 mutex_lock(&netns_bpf_mutex);
0112
0113
0114
0115
0116
0117 net = net_link->net;
0118 if (!net)
0119 goto out_unlock;
0120
0121
0122 netns_bpf_attach_type_unneed(type);
0123
0124
0125 idx = link_index(net, type, net_link);
0126 list_del(&net_link->node);
0127
0128 cnt = link_count(net, type);
0129 if (!cnt) {
0130 netns_bpf_run_array_detach(net, type);
0131 goto out_unlock;
0132 }
0133
0134 old_array = rcu_dereference_protected(net->bpf.run_array[type],
0135 lockdep_is_held(&netns_bpf_mutex));
0136 new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
0137 if (!new_array) {
0138 WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
0139 goto out_unlock;
0140 }
0141 fill_prog_array(net, type, new_array);
0142 rcu_assign_pointer(net->bpf.run_array[type], new_array);
0143 bpf_prog_array_free(old_array);
0144
0145 out_unlock:
0146 net_link->net = NULL;
0147 mutex_unlock(&netns_bpf_mutex);
0148 }
0149
0150 static int bpf_netns_link_detach(struct bpf_link *link)
0151 {
0152 bpf_netns_link_release(link);
0153 return 0;
0154 }
0155
0156 static void bpf_netns_link_dealloc(struct bpf_link *link)
0157 {
0158 struct bpf_netns_link *net_link =
0159 container_of(link, struct bpf_netns_link, link);
0160
0161 kfree(net_link);
0162 }
0163
0164 static int bpf_netns_link_update_prog(struct bpf_link *link,
0165 struct bpf_prog *new_prog,
0166 struct bpf_prog *old_prog)
0167 {
0168 struct bpf_netns_link *net_link =
0169 container_of(link, struct bpf_netns_link, link);
0170 enum netns_bpf_attach_type type = net_link->netns_type;
0171 struct bpf_prog_array *run_array;
0172 struct net *net;
0173 int idx, ret;
0174
0175 if (old_prog && old_prog != link->prog)
0176 return -EPERM;
0177 if (new_prog->type != link->prog->type)
0178 return -EINVAL;
0179
0180 mutex_lock(&netns_bpf_mutex);
0181
0182 net = net_link->net;
0183 if (!net || !check_net(net)) {
0184
0185 ret = -ENOLINK;
0186 goto out_unlock;
0187 }
0188
0189 run_array = rcu_dereference_protected(net->bpf.run_array[type],
0190 lockdep_is_held(&netns_bpf_mutex));
0191 idx = link_index(net, type, net_link);
0192 ret = bpf_prog_array_update_at(run_array, idx, new_prog);
0193 if (ret)
0194 goto out_unlock;
0195
0196 old_prog = xchg(&link->prog, new_prog);
0197 bpf_prog_put(old_prog);
0198
0199 out_unlock:
0200 mutex_unlock(&netns_bpf_mutex);
0201 return ret;
0202 }
0203
0204 static int bpf_netns_link_fill_info(const struct bpf_link *link,
0205 struct bpf_link_info *info)
0206 {
0207 const struct bpf_netns_link *net_link =
0208 container_of(link, struct bpf_netns_link, link);
0209 unsigned int inum = 0;
0210 struct net *net;
0211
0212 mutex_lock(&netns_bpf_mutex);
0213 net = net_link->net;
0214 if (net && check_net(net))
0215 inum = net->ns.inum;
0216 mutex_unlock(&netns_bpf_mutex);
0217
0218 info->netns.netns_ino = inum;
0219 info->netns.attach_type = net_link->type;
0220 return 0;
0221 }
0222
0223 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
0224 struct seq_file *seq)
0225 {
0226 struct bpf_link_info info = {};
0227
0228 bpf_netns_link_fill_info(link, &info);
0229 seq_printf(seq,
0230 "netns_ino:\t%u\n"
0231 "attach_type:\t%u\n",
0232 info.netns.netns_ino,
0233 info.netns.attach_type);
0234 }
0235
0236 static const struct bpf_link_ops bpf_netns_link_ops = {
0237 .release = bpf_netns_link_release,
0238 .dealloc = bpf_netns_link_dealloc,
0239 .detach = bpf_netns_link_detach,
0240 .update_prog = bpf_netns_link_update_prog,
0241 .fill_link_info = bpf_netns_link_fill_info,
0242 .show_fdinfo = bpf_netns_link_show_fdinfo,
0243 };
0244
0245
0246 static int __netns_bpf_prog_query(const union bpf_attr *attr,
0247 union bpf_attr __user *uattr,
0248 struct net *net,
0249 enum netns_bpf_attach_type type)
0250 {
0251 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
0252 struct bpf_prog_array *run_array;
0253 u32 prog_cnt = 0, flags = 0;
0254
0255 run_array = rcu_dereference_protected(net->bpf.run_array[type],
0256 lockdep_is_held(&netns_bpf_mutex));
0257 if (run_array)
0258 prog_cnt = bpf_prog_array_length(run_array);
0259
0260 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
0261 return -EFAULT;
0262 if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
0263 return -EFAULT;
0264 if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
0265 return 0;
0266
0267 return bpf_prog_array_copy_to_user(run_array, prog_ids,
0268 attr->query.prog_cnt);
0269 }
0270
0271 int netns_bpf_prog_query(const union bpf_attr *attr,
0272 union bpf_attr __user *uattr)
0273 {
0274 enum netns_bpf_attach_type type;
0275 struct net *net;
0276 int ret;
0277
0278 if (attr->query.query_flags)
0279 return -EINVAL;
0280
0281 type = to_netns_bpf_attach_type(attr->query.attach_type);
0282 if (type < 0)
0283 return -EINVAL;
0284
0285 net = get_net_ns_by_fd(attr->query.target_fd);
0286 if (IS_ERR(net))
0287 return PTR_ERR(net);
0288
0289 mutex_lock(&netns_bpf_mutex);
0290 ret = __netns_bpf_prog_query(attr, uattr, net, type);
0291 mutex_unlock(&netns_bpf_mutex);
0292
0293 put_net(net);
0294 return ret;
0295 }
0296
0297 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
0298 {
0299 struct bpf_prog_array *run_array;
0300 enum netns_bpf_attach_type type;
0301 struct bpf_prog *attached;
0302 struct net *net;
0303 int ret;
0304
0305 if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
0306 return -EINVAL;
0307
0308 type = to_netns_bpf_attach_type(attr->attach_type);
0309 if (type < 0)
0310 return -EINVAL;
0311
0312 net = current->nsproxy->net_ns;
0313 mutex_lock(&netns_bpf_mutex);
0314
0315
0316 if (!list_empty(&net->bpf.links[type])) {
0317 ret = -EEXIST;
0318 goto out_unlock;
0319 }
0320
0321 switch (type) {
0322 case NETNS_BPF_FLOW_DISSECTOR:
0323 ret = flow_dissector_bpf_prog_attach_check(net, prog);
0324 break;
0325 default:
0326 ret = -EINVAL;
0327 break;
0328 }
0329 if (ret)
0330 goto out_unlock;
0331
0332 attached = net->bpf.progs[type];
0333 if (attached == prog) {
0334
0335 ret = -EINVAL;
0336 goto out_unlock;
0337 }
0338
0339 run_array = rcu_dereference_protected(net->bpf.run_array[type],
0340 lockdep_is_held(&netns_bpf_mutex));
0341 if (run_array) {
0342 WRITE_ONCE(run_array->items[0].prog, prog);
0343 } else {
0344 run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
0345 if (!run_array) {
0346 ret = -ENOMEM;
0347 goto out_unlock;
0348 }
0349 run_array->items[0].prog = prog;
0350 rcu_assign_pointer(net->bpf.run_array[type], run_array);
0351 }
0352
0353 net->bpf.progs[type] = prog;
0354 if (attached)
0355 bpf_prog_put(attached);
0356
0357 out_unlock:
0358 mutex_unlock(&netns_bpf_mutex);
0359
0360 return ret;
0361 }
0362
0363
0364 static int __netns_bpf_prog_detach(struct net *net,
0365 enum netns_bpf_attach_type type,
0366 struct bpf_prog *old)
0367 {
0368 struct bpf_prog *attached;
0369
0370
0371 if (!list_empty(&net->bpf.links[type]))
0372 return -EINVAL;
0373
0374 attached = net->bpf.progs[type];
0375 if (!attached || attached != old)
0376 return -ENOENT;
0377 netns_bpf_run_array_detach(net, type);
0378 net->bpf.progs[type] = NULL;
0379 bpf_prog_put(attached);
0380 return 0;
0381 }
0382
0383 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
0384 {
0385 enum netns_bpf_attach_type type;
0386 struct bpf_prog *prog;
0387 int ret;
0388
0389 if (attr->target_fd)
0390 return -EINVAL;
0391
0392 type = to_netns_bpf_attach_type(attr->attach_type);
0393 if (type < 0)
0394 return -EINVAL;
0395
0396 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
0397 if (IS_ERR(prog))
0398 return PTR_ERR(prog);
0399
0400 mutex_lock(&netns_bpf_mutex);
0401 ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
0402 mutex_unlock(&netns_bpf_mutex);
0403
0404 bpf_prog_put(prog);
0405
0406 return ret;
0407 }
0408
0409 static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
0410 {
0411 switch (type) {
0412 case NETNS_BPF_FLOW_DISSECTOR:
0413 return 1;
0414 case NETNS_BPF_SK_LOOKUP:
0415 return 64;
0416 default:
0417 return 0;
0418 }
0419 }
0420
0421 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
0422 enum netns_bpf_attach_type type)
0423 {
0424 struct bpf_netns_link *net_link =
0425 container_of(link, struct bpf_netns_link, link);
0426 struct bpf_prog_array *run_array;
0427 int cnt, err;
0428
0429 mutex_lock(&netns_bpf_mutex);
0430
0431 cnt = link_count(net, type);
0432 if (cnt >= netns_bpf_max_progs(type)) {
0433 err = -E2BIG;
0434 goto out_unlock;
0435 }
0436
0437 if (net->bpf.progs[type]) {
0438 err = -EEXIST;
0439 goto out_unlock;
0440 }
0441
0442 switch (type) {
0443 case NETNS_BPF_FLOW_DISSECTOR:
0444 err = flow_dissector_bpf_prog_attach_check(net, link->prog);
0445 break;
0446 case NETNS_BPF_SK_LOOKUP:
0447 err = 0;
0448 break;
0449 default:
0450 err = -EINVAL;
0451 break;
0452 }
0453 if (err)
0454 goto out_unlock;
0455
0456 run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
0457 if (!run_array) {
0458 err = -ENOMEM;
0459 goto out_unlock;
0460 }
0461
0462 list_add_tail(&net_link->node, &net->bpf.links[type]);
0463
0464 fill_prog_array(net, type, run_array);
0465 run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
0466 lockdep_is_held(&netns_bpf_mutex));
0467 bpf_prog_array_free(run_array);
0468
0469
0470 netns_bpf_attach_type_need(type);
0471
0472 out_unlock:
0473 mutex_unlock(&netns_bpf_mutex);
0474 return err;
0475 }
0476
0477 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
0478 {
0479 enum netns_bpf_attach_type netns_type;
0480 struct bpf_link_primer link_primer;
0481 struct bpf_netns_link *net_link;
0482 enum bpf_attach_type type;
0483 struct net *net;
0484 int err;
0485
0486 if (attr->link_create.flags)
0487 return -EINVAL;
0488
0489 type = attr->link_create.attach_type;
0490 netns_type = to_netns_bpf_attach_type(type);
0491 if (netns_type < 0)
0492 return -EINVAL;
0493
0494 net = get_net_ns_by_fd(attr->link_create.target_fd);
0495 if (IS_ERR(net))
0496 return PTR_ERR(net);
0497
0498 net_link = kzalloc(sizeof(*net_link), GFP_USER);
0499 if (!net_link) {
0500 err = -ENOMEM;
0501 goto out_put_net;
0502 }
0503 bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
0504 &bpf_netns_link_ops, prog);
0505 net_link->net = net;
0506 net_link->type = type;
0507 net_link->netns_type = netns_type;
0508
0509 err = bpf_link_prime(&net_link->link, &link_primer);
0510 if (err) {
0511 kfree(net_link);
0512 goto out_put_net;
0513 }
0514
0515 err = netns_bpf_link_attach(net, &net_link->link, netns_type);
0516 if (err) {
0517 bpf_link_cleanup(&link_primer);
0518 goto out_put_net;
0519 }
0520
0521 put_net(net);
0522 return bpf_link_settle(&link_primer);
0523
0524 out_put_net:
0525 put_net(net);
0526 return err;
0527 }
0528
0529 static int __net_init netns_bpf_pernet_init(struct net *net)
0530 {
0531 int type;
0532
0533 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
0534 INIT_LIST_HEAD(&net->bpf.links[type]);
0535
0536 return 0;
0537 }
0538
0539 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
0540 {
0541 enum netns_bpf_attach_type type;
0542 struct bpf_netns_link *net_link;
0543
0544 mutex_lock(&netns_bpf_mutex);
0545 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
0546 netns_bpf_run_array_detach(net, type);
0547 list_for_each_entry(net_link, &net->bpf.links[type], node) {
0548 net_link->net = NULL;
0549 netns_bpf_attach_type_unneed(type);
0550 }
0551 if (net->bpf.progs[type])
0552 bpf_prog_put(net->bpf.progs[type]);
0553 }
0554 mutex_unlock(&netns_bpf_mutex);
0555 }
0556
0557 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
0558 .init = netns_bpf_pernet_init,
0559 .pre_exit = netns_bpf_pernet_pre_exit,
0560 };
0561
0562 static int __init netns_bpf_init(void)
0563 {
0564 return register_pernet_subsys(&netns_bpf_pernet_ops);
0565 }
0566
0567 subsys_initcall(netns_bpf_init);