Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #include <linux/bpf.h>
0004 #include <linux/bpf-netns.h>
0005 #include <linux/filter.h>
0006 #include <net/net_namespace.h>
0007 
0008 /*
0009  * Functions to manage BPF programs attached to netns
0010  */
0011 
0012 struct bpf_netns_link {
0013     struct bpf_link link;
0014     enum bpf_attach_type type;
0015     enum netns_bpf_attach_type netns_type;
0016 
0017     /* We don't hold a ref to net in order to auto-detach the link
0018      * when netns is going away. Instead we rely on pernet
0019      * pre_exit callback to clear this pointer. Must be accessed
0020      * with netns_bpf_mutex held.
0021      */
0022     struct net *net;
0023     struct list_head node; /* node in list of links attached to net */
0024 };
0025 
0026 /* Protects updates to netns_bpf */
0027 DEFINE_MUTEX(netns_bpf_mutex);
0028 
0029 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
0030 {
0031     switch (type) {
0032 #ifdef CONFIG_INET
0033     case NETNS_BPF_SK_LOOKUP:
0034         static_branch_dec(&bpf_sk_lookup_enabled);
0035         break;
0036 #endif
0037     default:
0038         break;
0039     }
0040 }
0041 
0042 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
0043 {
0044     switch (type) {
0045 #ifdef CONFIG_INET
0046     case NETNS_BPF_SK_LOOKUP:
0047         static_branch_inc(&bpf_sk_lookup_enabled);
0048         break;
0049 #endif
0050     default:
0051         break;
0052     }
0053 }
0054 
0055 /* Must be called with netns_bpf_mutex held. */
0056 static void netns_bpf_run_array_detach(struct net *net,
0057                        enum netns_bpf_attach_type type)
0058 {
0059     struct bpf_prog_array *run_array;
0060 
0061     run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
0062                     lockdep_is_held(&netns_bpf_mutex));
0063     bpf_prog_array_free(run_array);
0064 }
0065 
0066 static int link_index(struct net *net, enum netns_bpf_attach_type type,
0067               struct bpf_netns_link *link)
0068 {
0069     struct bpf_netns_link *pos;
0070     int i = 0;
0071 
0072     list_for_each_entry(pos, &net->bpf.links[type], node) {
0073         if (pos == link)
0074             return i;
0075         i++;
0076     }
0077     return -ENOENT;
0078 }
0079 
0080 static int link_count(struct net *net, enum netns_bpf_attach_type type)
0081 {
0082     struct list_head *pos;
0083     int i = 0;
0084 
0085     list_for_each(pos, &net->bpf.links[type])
0086         i++;
0087     return i;
0088 }
0089 
0090 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
0091                 struct bpf_prog_array *prog_array)
0092 {
0093     struct bpf_netns_link *pos;
0094     unsigned int i = 0;
0095 
0096     list_for_each_entry(pos, &net->bpf.links[type], node) {
0097         prog_array->items[i].prog = pos->link.prog;
0098         i++;
0099     }
0100 }
0101 
0102 static void bpf_netns_link_release(struct bpf_link *link)
0103 {
0104     struct bpf_netns_link *net_link =
0105         container_of(link, struct bpf_netns_link, link);
0106     enum netns_bpf_attach_type type = net_link->netns_type;
0107     struct bpf_prog_array *old_array, *new_array;
0108     struct net *net;
0109     int cnt, idx;
0110 
0111     mutex_lock(&netns_bpf_mutex);
0112 
0113     /* We can race with cleanup_net, but if we see a non-NULL
0114      * struct net pointer, pre_exit has not run yet and wait for
0115      * netns_bpf_mutex.
0116      */
0117     net = net_link->net;
0118     if (!net)
0119         goto out_unlock;
0120 
0121     /* Mark attach point as unused */
0122     netns_bpf_attach_type_unneed(type);
0123 
0124     /* Remember link position in case of safe delete */
0125     idx = link_index(net, type, net_link);
0126     list_del(&net_link->node);
0127 
0128     cnt = link_count(net, type);
0129     if (!cnt) {
0130         netns_bpf_run_array_detach(net, type);
0131         goto out_unlock;
0132     }
0133 
0134     old_array = rcu_dereference_protected(net->bpf.run_array[type],
0135                           lockdep_is_held(&netns_bpf_mutex));
0136     new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
0137     if (!new_array) {
0138         WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
0139         goto out_unlock;
0140     }
0141     fill_prog_array(net, type, new_array);
0142     rcu_assign_pointer(net->bpf.run_array[type], new_array);
0143     bpf_prog_array_free(old_array);
0144 
0145 out_unlock:
0146     net_link->net = NULL;
0147     mutex_unlock(&netns_bpf_mutex);
0148 }
0149 
0150 static int bpf_netns_link_detach(struct bpf_link *link)
0151 {
0152     bpf_netns_link_release(link);
0153     return 0;
0154 }
0155 
0156 static void bpf_netns_link_dealloc(struct bpf_link *link)
0157 {
0158     struct bpf_netns_link *net_link =
0159         container_of(link, struct bpf_netns_link, link);
0160 
0161     kfree(net_link);
0162 }
0163 
0164 static int bpf_netns_link_update_prog(struct bpf_link *link,
0165                       struct bpf_prog *new_prog,
0166                       struct bpf_prog *old_prog)
0167 {
0168     struct bpf_netns_link *net_link =
0169         container_of(link, struct bpf_netns_link, link);
0170     enum netns_bpf_attach_type type = net_link->netns_type;
0171     struct bpf_prog_array *run_array;
0172     struct net *net;
0173     int idx, ret;
0174 
0175     if (old_prog && old_prog != link->prog)
0176         return -EPERM;
0177     if (new_prog->type != link->prog->type)
0178         return -EINVAL;
0179 
0180     mutex_lock(&netns_bpf_mutex);
0181 
0182     net = net_link->net;
0183     if (!net || !check_net(net)) {
0184         /* Link auto-detached or netns dying */
0185         ret = -ENOLINK;
0186         goto out_unlock;
0187     }
0188 
0189     run_array = rcu_dereference_protected(net->bpf.run_array[type],
0190                           lockdep_is_held(&netns_bpf_mutex));
0191     idx = link_index(net, type, net_link);
0192     ret = bpf_prog_array_update_at(run_array, idx, new_prog);
0193     if (ret)
0194         goto out_unlock;
0195 
0196     old_prog = xchg(&link->prog, new_prog);
0197     bpf_prog_put(old_prog);
0198 
0199 out_unlock:
0200     mutex_unlock(&netns_bpf_mutex);
0201     return ret;
0202 }
0203 
0204 static int bpf_netns_link_fill_info(const struct bpf_link *link,
0205                     struct bpf_link_info *info)
0206 {
0207     const struct bpf_netns_link *net_link =
0208         container_of(link, struct bpf_netns_link, link);
0209     unsigned int inum = 0;
0210     struct net *net;
0211 
0212     mutex_lock(&netns_bpf_mutex);
0213     net = net_link->net;
0214     if (net && check_net(net))
0215         inum = net->ns.inum;
0216     mutex_unlock(&netns_bpf_mutex);
0217 
0218     info->netns.netns_ino = inum;
0219     info->netns.attach_type = net_link->type;
0220     return 0;
0221 }
0222 
0223 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
0224                        struct seq_file *seq)
0225 {
0226     struct bpf_link_info info = {};
0227 
0228     bpf_netns_link_fill_info(link, &info);
0229     seq_printf(seq,
0230            "netns_ino:\t%u\n"
0231            "attach_type:\t%u\n",
0232            info.netns.netns_ino,
0233            info.netns.attach_type);
0234 }
0235 
0236 static const struct bpf_link_ops bpf_netns_link_ops = {
0237     .release = bpf_netns_link_release,
0238     .dealloc = bpf_netns_link_dealloc,
0239     .detach = bpf_netns_link_detach,
0240     .update_prog = bpf_netns_link_update_prog,
0241     .fill_link_info = bpf_netns_link_fill_info,
0242     .show_fdinfo = bpf_netns_link_show_fdinfo,
0243 };
0244 
0245 /* Must be called with netns_bpf_mutex held. */
0246 static int __netns_bpf_prog_query(const union bpf_attr *attr,
0247                   union bpf_attr __user *uattr,
0248                   struct net *net,
0249                   enum netns_bpf_attach_type type)
0250 {
0251     __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
0252     struct bpf_prog_array *run_array;
0253     u32 prog_cnt = 0, flags = 0;
0254 
0255     run_array = rcu_dereference_protected(net->bpf.run_array[type],
0256                           lockdep_is_held(&netns_bpf_mutex));
0257     if (run_array)
0258         prog_cnt = bpf_prog_array_length(run_array);
0259 
0260     if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
0261         return -EFAULT;
0262     if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
0263         return -EFAULT;
0264     if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
0265         return 0;
0266 
0267     return bpf_prog_array_copy_to_user(run_array, prog_ids,
0268                        attr->query.prog_cnt);
0269 }
0270 
0271 int netns_bpf_prog_query(const union bpf_attr *attr,
0272              union bpf_attr __user *uattr)
0273 {
0274     enum netns_bpf_attach_type type;
0275     struct net *net;
0276     int ret;
0277 
0278     if (attr->query.query_flags)
0279         return -EINVAL;
0280 
0281     type = to_netns_bpf_attach_type(attr->query.attach_type);
0282     if (type < 0)
0283         return -EINVAL;
0284 
0285     net = get_net_ns_by_fd(attr->query.target_fd);
0286     if (IS_ERR(net))
0287         return PTR_ERR(net);
0288 
0289     mutex_lock(&netns_bpf_mutex);
0290     ret = __netns_bpf_prog_query(attr, uattr, net, type);
0291     mutex_unlock(&netns_bpf_mutex);
0292 
0293     put_net(net);
0294     return ret;
0295 }
0296 
0297 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
0298 {
0299     struct bpf_prog_array *run_array;
0300     enum netns_bpf_attach_type type;
0301     struct bpf_prog *attached;
0302     struct net *net;
0303     int ret;
0304 
0305     if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
0306         return -EINVAL;
0307 
0308     type = to_netns_bpf_attach_type(attr->attach_type);
0309     if (type < 0)
0310         return -EINVAL;
0311 
0312     net = current->nsproxy->net_ns;
0313     mutex_lock(&netns_bpf_mutex);
0314 
0315     /* Attaching prog directly is not compatible with links */
0316     if (!list_empty(&net->bpf.links[type])) {
0317         ret = -EEXIST;
0318         goto out_unlock;
0319     }
0320 
0321     switch (type) {
0322     case NETNS_BPF_FLOW_DISSECTOR:
0323         ret = flow_dissector_bpf_prog_attach_check(net, prog);
0324         break;
0325     default:
0326         ret = -EINVAL;
0327         break;
0328     }
0329     if (ret)
0330         goto out_unlock;
0331 
0332     attached = net->bpf.progs[type];
0333     if (attached == prog) {
0334         /* The same program cannot be attached twice */
0335         ret = -EINVAL;
0336         goto out_unlock;
0337     }
0338 
0339     run_array = rcu_dereference_protected(net->bpf.run_array[type],
0340                           lockdep_is_held(&netns_bpf_mutex));
0341     if (run_array) {
0342         WRITE_ONCE(run_array->items[0].prog, prog);
0343     } else {
0344         run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
0345         if (!run_array) {
0346             ret = -ENOMEM;
0347             goto out_unlock;
0348         }
0349         run_array->items[0].prog = prog;
0350         rcu_assign_pointer(net->bpf.run_array[type], run_array);
0351     }
0352 
0353     net->bpf.progs[type] = prog;
0354     if (attached)
0355         bpf_prog_put(attached);
0356 
0357 out_unlock:
0358     mutex_unlock(&netns_bpf_mutex);
0359 
0360     return ret;
0361 }
0362 
0363 /* Must be called with netns_bpf_mutex held. */
0364 static int __netns_bpf_prog_detach(struct net *net,
0365                    enum netns_bpf_attach_type type,
0366                    struct bpf_prog *old)
0367 {
0368     struct bpf_prog *attached;
0369 
0370     /* Progs attached via links cannot be detached */
0371     if (!list_empty(&net->bpf.links[type]))
0372         return -EINVAL;
0373 
0374     attached = net->bpf.progs[type];
0375     if (!attached || attached != old)
0376         return -ENOENT;
0377     netns_bpf_run_array_detach(net, type);
0378     net->bpf.progs[type] = NULL;
0379     bpf_prog_put(attached);
0380     return 0;
0381 }
0382 
0383 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
0384 {
0385     enum netns_bpf_attach_type type;
0386     struct bpf_prog *prog;
0387     int ret;
0388 
0389     if (attr->target_fd)
0390         return -EINVAL;
0391 
0392     type = to_netns_bpf_attach_type(attr->attach_type);
0393     if (type < 0)
0394         return -EINVAL;
0395 
0396     prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
0397     if (IS_ERR(prog))
0398         return PTR_ERR(prog);
0399 
0400     mutex_lock(&netns_bpf_mutex);
0401     ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
0402     mutex_unlock(&netns_bpf_mutex);
0403 
0404     bpf_prog_put(prog);
0405 
0406     return ret;
0407 }
0408 
0409 static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
0410 {
0411     switch (type) {
0412     case NETNS_BPF_FLOW_DISSECTOR:
0413         return 1;
0414     case NETNS_BPF_SK_LOOKUP:
0415         return 64;
0416     default:
0417         return 0;
0418     }
0419 }
0420 
0421 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
0422                  enum netns_bpf_attach_type type)
0423 {
0424     struct bpf_netns_link *net_link =
0425         container_of(link, struct bpf_netns_link, link);
0426     struct bpf_prog_array *run_array;
0427     int cnt, err;
0428 
0429     mutex_lock(&netns_bpf_mutex);
0430 
0431     cnt = link_count(net, type);
0432     if (cnt >= netns_bpf_max_progs(type)) {
0433         err = -E2BIG;
0434         goto out_unlock;
0435     }
0436     /* Links are not compatible with attaching prog directly */
0437     if (net->bpf.progs[type]) {
0438         err = -EEXIST;
0439         goto out_unlock;
0440     }
0441 
0442     switch (type) {
0443     case NETNS_BPF_FLOW_DISSECTOR:
0444         err = flow_dissector_bpf_prog_attach_check(net, link->prog);
0445         break;
0446     case NETNS_BPF_SK_LOOKUP:
0447         err = 0; /* nothing to check */
0448         break;
0449     default:
0450         err = -EINVAL;
0451         break;
0452     }
0453     if (err)
0454         goto out_unlock;
0455 
0456     run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
0457     if (!run_array) {
0458         err = -ENOMEM;
0459         goto out_unlock;
0460     }
0461 
0462     list_add_tail(&net_link->node, &net->bpf.links[type]);
0463 
0464     fill_prog_array(net, type, run_array);
0465     run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
0466                     lockdep_is_held(&netns_bpf_mutex));
0467     bpf_prog_array_free(run_array);
0468 
0469     /* Mark attach point as used */
0470     netns_bpf_attach_type_need(type);
0471 
0472 out_unlock:
0473     mutex_unlock(&netns_bpf_mutex);
0474     return err;
0475 }
0476 
0477 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
0478 {
0479     enum netns_bpf_attach_type netns_type;
0480     struct bpf_link_primer link_primer;
0481     struct bpf_netns_link *net_link;
0482     enum bpf_attach_type type;
0483     struct net *net;
0484     int err;
0485 
0486     if (attr->link_create.flags)
0487         return -EINVAL;
0488 
0489     type = attr->link_create.attach_type;
0490     netns_type = to_netns_bpf_attach_type(type);
0491     if (netns_type < 0)
0492         return -EINVAL;
0493 
0494     net = get_net_ns_by_fd(attr->link_create.target_fd);
0495     if (IS_ERR(net))
0496         return PTR_ERR(net);
0497 
0498     net_link = kzalloc(sizeof(*net_link), GFP_USER);
0499     if (!net_link) {
0500         err = -ENOMEM;
0501         goto out_put_net;
0502     }
0503     bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
0504               &bpf_netns_link_ops, prog);
0505     net_link->net = net;
0506     net_link->type = type;
0507     net_link->netns_type = netns_type;
0508 
0509     err = bpf_link_prime(&net_link->link, &link_primer);
0510     if (err) {
0511         kfree(net_link);
0512         goto out_put_net;
0513     }
0514 
0515     err = netns_bpf_link_attach(net, &net_link->link, netns_type);
0516     if (err) {
0517         bpf_link_cleanup(&link_primer);
0518         goto out_put_net;
0519     }
0520 
0521     put_net(net);
0522     return bpf_link_settle(&link_primer);
0523 
0524 out_put_net:
0525     put_net(net);
0526     return err;
0527 }
0528 
0529 static int __net_init netns_bpf_pernet_init(struct net *net)
0530 {
0531     int type;
0532 
0533     for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
0534         INIT_LIST_HEAD(&net->bpf.links[type]);
0535 
0536     return 0;
0537 }
0538 
0539 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
0540 {
0541     enum netns_bpf_attach_type type;
0542     struct bpf_netns_link *net_link;
0543 
0544     mutex_lock(&netns_bpf_mutex);
0545     for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
0546         netns_bpf_run_array_detach(net, type);
0547         list_for_each_entry(net_link, &net->bpf.links[type], node) {
0548             net_link->net = NULL; /* auto-detach link */
0549             netns_bpf_attach_type_unneed(type);
0550         }
0551         if (net->bpf.progs[type])
0552             bpf_prog_put(net->bpf.progs[type]);
0553     }
0554     mutex_unlock(&netns_bpf_mutex);
0555 }
0556 
0557 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
0558     .init = netns_bpf_pernet_init,
0559     .pre_exit = netns_bpf_pernet_pre_exit,
0560 };
0561 
0562 static int __init netns_bpf_init(void)
0563 {
0564     return register_pernet_subsys(&netns_bpf_pernet_ops);
0565 }
0566 
0567 subsys_initcall(netns_bpf_init);